novelty-seeking/experiments/novelty_loop/novelty_metrics.py

"""
Novelty Metrics Module - Compute novelty scores for generated outputs.

This module provides embedding-based novelty metrics adapted from the AUT flexibility
analysis framework for use in novelty-driven agent loops.

Key Metrics:
- Centroid Distance: Measures how far a new output is from the centroid of previous outputs
- Cumulative Novelty: Tracks novelty over the generation sequence
- Jump Detection: Identifies significant semantic shifts between consecutive outputs
"""

from dataclasses import dataclass, field
from typing import List, Optional
import numpy as np


@dataclass
class NoveltyScore:
    """Result of novelty computation for a single output."""
    score: float  # Main novelty score (0.0 = identical to centroid, 1.0 = maximally distant)
    distance_from_centroid: float
    min_distance_to_existing: float  # Nearest neighbor distance
    is_jump: bool  # Whether this represents a significant semantic jump
    jump_magnitude: Optional[float] = None  # Similarity to previous output (if applicable)


@dataclass
class NoveltyTrajectory:
    """Tracks novelty scores over a generation sequence."""
    scores: List[float] = field(default_factory=list)
    cumulative_novelty: List[float] = field(default_factory=list)
    jump_positions: List[int] = field(default_factory=list)
    centroid_history: List[np.ndarray] = field(default_factory=list)

    @property
    def mean_novelty(self) -> float:
        """Average novelty across all outputs."""
        return float(np.mean(self.scores)) if self.scores else 0.0

    @property
    def max_novelty(self) -> float:
        """Maximum novelty achieved."""
        return float(max(self.scores)) if self.scores else 0.0

    @property
    def jump_ratio(self) -> float:
        """Proportion of transitions that were jumps."""
        if len(self.scores) < 2:
            return 0.0
        return len(self.jump_positions) / (len(self.scores) - 1)

    @property
    def final_cumulative_novelty(self) -> float:
        """Total accumulated novelty."""
        return self.cumulative_novelty[-1] if self.cumulative_novelty else 0.0


class NoveltyMetrics:
    """
    Computes novelty metrics for embeddings in a streaming fashion.

    Designed for use in an agent loop where outputs are generated one at a time
    and we need to assess novelty incrementally.
    """

    def __init__(
        self,
        similarity_threshold: float = 0.7,
        jump_detection_enabled: bool = True
    ):
        """
        Args:
            similarity_threshold: Threshold for semantic similarity (below = jump)
            jump_detection_enabled: Whether to track semantic jumps
        """
        self.similarity_threshold = similarity_threshold
        self.jump_detection_enabled = jump_detection_enabled

        # State
        self.embeddings: List[np.ndarray] = []
        self.trajectory = NoveltyTrajectory()
        self._centroid: Optional[np.ndarray] = None

    def reset(self):
        """Reset all state for a new generation session."""
        self.embeddings = []
        self.trajectory = NoveltyTrajectory()
        self._centroid = None

    @staticmethod
    def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
        """Compute cosine similarity between two vectors."""
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        if norm_a == 0 or norm_b == 0:
            return 0.0
        return float(np.dot(a, b) / (norm_a * norm_b))

    @staticmethod
    def cosine_distance(a: np.ndarray, b: np.ndarray) -> float:
        """Compute cosine distance (1 - similarity) between two vectors."""
        return 1.0 - NoveltyMetrics.cosine_similarity(a, b)

    def compute_centroid(self) -> Optional[np.ndarray]:
        """Compute centroid of all current embeddings."""
        if not self.embeddings:
            return None
        return np.mean(self.embeddings, axis=0)

    def compute_novelty(self, embedding: np.ndarray) -> NoveltyScore:
        """
        Compute novelty score for a new embedding.

        This does NOT add the embedding to the history - call add_embedding() for that.

        Args:
            embedding: The embedding vector to evaluate

        Returns:
            NoveltyScore with computed metrics
        """
        embedding = np.array(embedding)

        # First output is maximally novel (nothing to compare to)
        if not self.embeddings:
            return NoveltyScore(
                score=1.0,
                distance_from_centroid=1.0,
                min_distance_to_existing=1.0,
                is_jump=False,
                jump_magnitude=None
            )

        # Distance from centroid (primary novelty metric)
        centroid = self.compute_centroid()
        distance_from_centroid = self.cosine_distance(embedding, centroid)

        # Minimum distance to any existing embedding (nearest neighbor)
        min_distance = min(
            self.cosine_distance(embedding, existing)
            for existing in self.embeddings
        )

        # Jump detection (similarity to previous output)
        is_jump = False
        jump_magnitude = None
        if self.jump_detection_enabled and self.embeddings:
            similarity_to_prev = self.cosine_similarity(embedding, self.embeddings[-1])
            jump_magnitude = similarity_to_prev
            is_jump = similarity_to_prev < self.similarity_threshold

        # Primary novelty score is distance from centroid
        # Normalized to [0, 1] range where higher = more novel
        novelty_score = distance_from_centroid

        return NoveltyScore(
            score=novelty_score,
            distance_from_centroid=distance_from_centroid,
            min_distance_to_existing=min_distance,
            is_jump=is_jump,
            jump_magnitude=jump_magnitude
        )

    def add_embedding(self, embedding: np.ndarray, novelty: Optional[NoveltyScore] = None):
        """
        Add an embedding to the history and update trajectory.

        Args:
            embedding: The embedding to add
            novelty: Pre-computed novelty score (computed if not provided)
        """
        embedding = np.array(embedding)

        if novelty is None:
            novelty = self.compute_novelty(embedding)

        # Update state
        self.embeddings.append(embedding)
        self._centroid = self.compute_centroid()

        # Update trajectory
        self.trajectory.scores.append(novelty.score)

        # Cumulative novelty
        prev_cumulative = self.trajectory.cumulative_novelty[-1] if self.trajectory.cumulative_novelty else 0.0
        self.trajectory.cumulative_novelty.append(prev_cumulative + novelty.score)

        # Track jumps
        if novelty.is_jump:
            self.trajectory.jump_positions.append(len(self.embeddings) - 1)

        # Store centroid history
        if self._centroid is not None:
            self.trajectory.centroid_history.append(self._centroid.copy())

    def get_current_state(self) -> dict:
        """Get current state as a dictionary for logging/debugging."""
        return {
            "num_embeddings": len(self.embeddings),
            "mean_novelty": self.trajectory.mean_novelty,
            "max_novelty": self.trajectory.max_novelty,
            "jump_ratio": self.trajectory.jump_ratio,
            "cumulative_novelty": self.trajectory.final_cumulative_novelty,
            "recent_scores": self.trajectory.scores[-5:] if self.trajectory.scores else []
        }


def compute_batch_novelty(
    embeddings: List[np.ndarray],
    reference_embeddings: Optional[List[np.ndarray]] = None
) -> List[float]:
    """
    Compute novelty scores for a batch of embeddings.

    Useful for post-hoc analysis of generated outputs.

    Args:
        embeddings: List of embeddings to evaluate
        reference_embeddings: Optional reference set (uses self if not provided)

    Returns:
        List of novelty scores (distance from centroid)
    """
    if not embeddings:
        return []

    embeddings_arr = np.array(embeddings)

    if reference_embeddings is not None:
        centroid = np.mean(reference_embeddings, axis=0)
    else:
        centroid = np.mean(embeddings_arr, axis=0)

    scores = []
    for emb in embeddings_arr:
        distance = NoveltyMetrics.cosine_distance(emb, centroid)
        scores.append(distance)

    return scores


def find_most_novel(
    embeddings: List[np.ndarray],
    texts: List[str],
    top_k: int = 5
) -> List[tuple]:
    """
    Find the most novel outputs from a batch.

    Args:
        embeddings: List of embeddings
        texts: Corresponding text outputs
        top_k: Number of top results to return

    Returns:
        List of (text, novelty_score, index) tuples, sorted by novelty descending
    """
    scores = compute_batch_novelty(embeddings)

    indexed_results = [
        (texts[i], scores[i], i)
        for i in range(len(texts))
    ]

    # Sort by novelty score descending
    indexed_results.sort(key=lambda x: x[1], reverse=True)

    return indexed_results[:top_k]