feat: Add experiments framework and novelty-driven agent loop

- Add complete experiments directory with pilot study infrastructure - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective) - Human assessment tool with React frontend and FastAPI backend - AUT flexibility analysis with jump signal detection - Result visualization and metrics computation - Add novelty-driven agent loop module (experiments/novelty_loop/) - NoveltyDrivenTaskAgent with expert perspective perturbation - Three termination strategies: breakthrough, exhaust, coverage - Interactive CLI demo with colored output - Embedding-based novelty scoring - Add DDC knowledge domain classification data (en/zh) - Add CLAUDE.md project documentation - Update research report with experiment findings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 10:16:21 +08:00
parent 26a56a2a07
commit 43c025e060
81 changed files with 18766 additions and 2 deletions
--- a/experiments/novelty_loop/novelty_metrics.py
+++ b/experiments/novelty_loop/novelty_metrics.py
@@ -0,0 +1,269 @@
+"""
+Novelty Metrics Module - Compute novelty scores for generated outputs.
+
+This module provides embedding-based novelty metrics adapted from the AUT flexibility
+analysis framework for use in novelty-driven agent loops.
+
+Key Metrics:
+- Centroid Distance: Measures how far a new output is from the centroid of previous outputs
+- Cumulative Novelty: Tracks novelty over the generation sequence
+- Jump Detection: Identifies significant semantic shifts between consecutive outputs
+"""
+
+from dataclasses import dataclass, field
+from typing import List, Optional
+import numpy as np
+
+
+@dataclass
+class NoveltyScore:
+    """Result of novelty computation for a single output."""
+    score: float  # Main novelty score (0.0 = identical to centroid, 1.0 = maximally distant)
+    distance_from_centroid: float
+    min_distance_to_existing: float  # Nearest neighbor distance
+    is_jump: bool  # Whether this represents a significant semantic jump
+    jump_magnitude: Optional[float] = None  # Similarity to previous output (if applicable)
+
+
+@dataclass
+class NoveltyTrajectory:
+    """Tracks novelty scores over a generation sequence."""
+    scores: List[float] = field(default_factory=list)
+    cumulative_novelty: List[float] = field(default_factory=list)
+    jump_positions: List[int] = field(default_factory=list)
+    centroid_history: List[np.ndarray] = field(default_factory=list)
+
+    @property
+    def mean_novelty(self) -> float:
+        """Average novelty across all outputs."""
+        return float(np.mean(self.scores)) if self.scores else 0.0
+
+    @property
+    def max_novelty(self) -> float:
+        """Maximum novelty achieved."""
+        return float(max(self.scores)) if self.scores else 0.0
+
+    @property
+    def jump_ratio(self) -> float:
+        """Proportion of transitions that were jumps."""
+        if len(self.scores) < 2:
+            return 0.0
+        return len(self.jump_positions) / (len(self.scores) - 1)
+
+    @property
+    def final_cumulative_novelty(self) -> float:
+        """Total accumulated novelty."""
+        return self.cumulative_novelty[-1] if self.cumulative_novelty else 0.0
+
+
+class NoveltyMetrics:
+    """
+    Computes novelty metrics for embeddings in a streaming fashion.
+
+    Designed for use in an agent loop where outputs are generated one at a time
+    and we need to assess novelty incrementally.
+    """
+
+    def __init__(
+        self,
+        similarity_threshold: float = 0.7,
+        jump_detection_enabled: bool = True
+    ):
+        """
+        Args:
+            similarity_threshold: Threshold for semantic similarity (below = jump)
+            jump_detection_enabled: Whether to track semantic jumps
+        """
+        self.similarity_threshold = similarity_threshold
+        self.jump_detection_enabled = jump_detection_enabled
+
+        # State
+        self.embeddings: List[np.ndarray] = []
+        self.trajectory = NoveltyTrajectory()
+        self._centroid: Optional[np.ndarray] = None
+
+    def reset(self):
+        """Reset all state for a new generation session."""
+        self.embeddings = []
+        self.trajectory = NoveltyTrajectory()
+        self._centroid = None
+
+    @staticmethod
+    def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
+        """Compute cosine similarity between two vectors."""
+        norm_a = np.linalg.norm(a)
+        norm_b = np.linalg.norm(b)
+        if norm_a == 0 or norm_b == 0:
+            return 0.0
+        return float(np.dot(a, b) / (norm_a * norm_b))
+
+    @staticmethod
+    def cosine_distance(a: np.ndarray, b: np.ndarray) -> float:
+        """Compute cosine distance (1 - similarity) between two vectors."""
+        return 1.0 - NoveltyMetrics.cosine_similarity(a, b)
+
+    def compute_centroid(self) -> Optional[np.ndarray]:
+        """Compute centroid of all current embeddings."""
+        if not self.embeddings:
+            return None
+        return np.mean(self.embeddings, axis=0)
+
+    def compute_novelty(self, embedding: np.ndarray) -> NoveltyScore:
+        """
+        Compute novelty score for a new embedding.
+
+        This does NOT add the embedding to the history - call add_embedding() for that.
+
+        Args:
+            embedding: The embedding vector to evaluate
+
+        Returns:
+            NoveltyScore with computed metrics
+        """
+        embedding = np.array(embedding)
+
+        # First output is maximally novel (nothing to compare to)
+        if not self.embeddings:
+            return NoveltyScore(
+                score=1.0,
+                distance_from_centroid=1.0,
+                min_distance_to_existing=1.0,
+                is_jump=False,
+                jump_magnitude=None
+            )
+
+        # Distance from centroid (primary novelty metric)
+        centroid = self.compute_centroid()
+        distance_from_centroid = self.cosine_distance(embedding, centroid)
+
+        # Minimum distance to any existing embedding (nearest neighbor)
+        min_distance = min(
+            self.cosine_distance(embedding, existing)
+            for existing in self.embeddings
+        )
+
+        # Jump detection (similarity to previous output)
+        is_jump = False
+        jump_magnitude = None
+        if self.jump_detection_enabled and self.embeddings:
+            similarity_to_prev = self.cosine_similarity(embedding, self.embeddings[-1])
+            jump_magnitude = similarity_to_prev
+            is_jump = similarity_to_prev < self.similarity_threshold
+
+        # Primary novelty score is distance from centroid
+        # Normalized to [0, 1] range where higher = more novel
+        novelty_score = distance_from_centroid
+
+        return NoveltyScore(
+            score=novelty_score,
+            distance_from_centroid=distance_from_centroid,
+            min_distance_to_existing=min_distance,
+            is_jump=is_jump,
+            jump_magnitude=jump_magnitude
+        )
+
+    def add_embedding(self, embedding: np.ndarray, novelty: Optional[NoveltyScore] = None):
+        """
+        Add an embedding to the history and update trajectory.
+
+        Args:
+            embedding: The embedding to add
+            novelty: Pre-computed novelty score (computed if not provided)
+        """
+        embedding = np.array(embedding)
+
+        if novelty is None:
+            novelty = self.compute_novelty(embedding)
+
+        # Update state
+        self.embeddings.append(embedding)
+        self._centroid = self.compute_centroid()
+
+        # Update trajectory
+        self.trajectory.scores.append(novelty.score)
+
+        # Cumulative novelty
+        prev_cumulative = self.trajectory.cumulative_novelty[-1] if self.trajectory.cumulative_novelty else 0.0
+        self.trajectory.cumulative_novelty.append(prev_cumulative + novelty.score)
+
+        # Track jumps
+        if novelty.is_jump:
+            self.trajectory.jump_positions.append(len(self.embeddings) - 1)
+
+        # Store centroid history
+        if self._centroid is not None:
+            self.trajectory.centroid_history.append(self._centroid.copy())
+
+    def get_current_state(self) -> dict:
+        """Get current state as a dictionary for logging/debugging."""
+        return {
+            "num_embeddings": len(self.embeddings),
+            "mean_novelty": self.trajectory.mean_novelty,
+            "max_novelty": self.trajectory.max_novelty,
+            "jump_ratio": self.trajectory.jump_ratio,
+            "cumulative_novelty": self.trajectory.final_cumulative_novelty,
+            "recent_scores": self.trajectory.scores[-5:] if self.trajectory.scores else []
+        }
+
+
+def compute_batch_novelty(
+    embeddings: List[np.ndarray],
+    reference_embeddings: Optional[List[np.ndarray]] = None
+) -> List[float]:
+    """
+    Compute novelty scores for a batch of embeddings.
+
+    Useful for post-hoc analysis of generated outputs.
+
+    Args:
+        embeddings: List of embeddings to evaluate
+        reference_embeddings: Optional reference set (uses self if not provided)
+
+    Returns:
+        List of novelty scores (distance from centroid)
+    """
+    if not embeddings:
+        return []
+
+    embeddings_arr = np.array(embeddings)
+
+    if reference_embeddings is not None:
+        centroid = np.mean(reference_embeddings, axis=0)
+    else:
+        centroid = np.mean(embeddings_arr, axis=0)
+
+    scores = []
+    for emb in embeddings_arr:
+        distance = NoveltyMetrics.cosine_distance(emb, centroid)
+        scores.append(distance)
+
+    return scores
+
+
+def find_most_novel(
+    embeddings: List[np.ndarray],
+    texts: List[str],
+    top_k: int = 5
+) -> List[tuple]:
+    """
+    Find the most novel outputs from a batch.
+
+    Args:
+        embeddings: List of embeddings
+        texts: Corresponding text outputs
+        top_k: Number of top results to return
+
+    Returns:
+        List of (text, novelty_score, index) tuples, sorted by novelty descending
+    """
+    scores = compute_batch_novelty(embeddings)
+
+    indexed_results = [
+        (texts[i], scores[i], i)
+        for i in range(len(texts))
+    ]
+
+    # Sort by novelty score descending
+    indexed_results.sort(key=lambda x: x[1], reverse=True)
+
+    return indexed_results[:top_k]