feat: Add experiments framework and novelty-driven agent loop

- Add complete experiments directory with pilot study infrastructure - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective) - Human assessment tool with React frontend and FastAPI backend - AUT flexibility analysis with jump signal detection - Result visualization and metrics computation - Add novelty-driven agent loop module (experiments/novelty_loop/) - NoveltyDrivenTaskAgent with expert perspective perturbation - Three termination strategies: breakthrough, exhaust, coverage - Interactive CLI demo with colored output - Embedding-based novelty scoring - Add DDC knowledge domain classification data (en/zh) - Add CLAUDE.md project documentation - Update research report with experiment findings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 10:16:21 +08:00
parent 26a56a2a07
commit 43c025e060
81 changed files with 18766 additions and 2 deletions
--- a/experiments/novelty_loop/demo.py
+++ b/experiments/novelty_loop/demo.py
@@ -0,0 +1,313 @@
+#!/usr/bin/env python3
+"""
+Novelty-Driven Task Generation Demo
+
+Interactive CLI for exploring the novelty-driven task generation agent.
+
+Examples:
+    # Basic usage with default settings
+    python demo.py "Improve urban transportation"
+
+    # Custom threshold and iterations
+    python demo.py "Design a better bicycle" --threshold 0.35 --max-iter 15
+
+    # Use Chinese language
+    python demo.py "改进城市交通" --language zh
+
+    # Use exhaustion strategy (explore until stuck)
+    python demo.py "Sustainable energy solutions" --strategy exhaust
+
+    # Use coverage strategy (find N distinct clusters)
+    python demo.py "Future of education" --strategy coverage --clusters 5
+
+    # Save results to file
+    python demo.py "Smart home innovations" --output results.json
+
+    # Verbose mode with detailed logging
+    python demo.py "Healthcare improvements" --verbose
+"""
+
+import argparse
+import asyncio
+import json
+import logging
+import sys
+from datetime import datetime
+from pathlib import Path
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from experiments.novelty_loop.agent import (
+    NoveltyDrivenTaskAgent,
+    ExhaustFrontierAgent,
+    CoverageTargetAgent,
+    GeneratedTask,
+    TaskGenerationResult
+)
+
+# ANSI color codes for terminal output
+class Colors:
+    HEADER = '\033[95m'
+    BLUE = '\033[94m'
+    CYAN = '\033[96m'
+    GREEN = '\033[92m'
+    YELLOW = '\033[93m'
+    RED = '\033[91m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+    END = '\033[0m'
+
+
+def print_header(text: str):
+    """Print a styled header."""
+    print(f"\n{Colors.BOLD}{Colors.HEADER}{'='*60}{Colors.END}")
+    print(f"{Colors.BOLD}{Colors.HEADER}{text.center(60)}{Colors.END}")
+    print(f"{Colors.BOLD}{Colors.HEADER}{'='*60}{Colors.END}\n")
+
+
+def print_iteration(task: GeneratedTask):
+    """Print iteration result with colors."""
+    status_color = Colors.GREEN if task.is_breakthrough else Colors.CYAN
+
+    print(f"\n{Colors.BOLD}Iteration {task.iteration}{Colors.END}")
+    print(f"  {Colors.YELLOW}Expert:{Colors.END} {task.expert} ({task.expert_domain})")
+    print(f"  {Colors.YELLOW}Task:{Colors.END} {task.task}")
+
+    novelty_bar = "█" * int(task.novelty_score * 20) + "░" * (20 - int(task.novelty_score * 20))
+    print(f"  {Colors.YELLOW}Novelty:{Colors.END} [{novelty_bar}] {task.novelty_score:.4f}")
+
+    if task.is_breakthrough:
+        print(f"  {Colors.GREEN}{Colors.BOLD}★ BREAKTHROUGH! ★{Colors.END}")
+
+
+def print_result(result: TaskGenerationResult):
+    """Print final result summary."""
+    print_header("RESULTS")
+
+    print(f"{Colors.BOLD}Seed Problem:{Colors.END} {result.seed_problem}")
+    print(f"{Colors.BOLD}Total Iterations:{Colors.END} {result.total_iterations}")
+    print(f"{Colors.BOLD}Terminated By:{Colors.END} {result.terminated_by}")
+
+    if result.novelty_trajectory:
+        print(f"\n{Colors.BOLD}Novelty Statistics:{Colors.END}")
+        print(f"  Mean Novelty: {result.novelty_trajectory.mean_novelty:.4f}")
+        print(f"  Max Novelty:  {result.novelty_trajectory.max_novelty:.4f}")
+        print(f"  Jump Ratio:   {result.novelty_trajectory.jump_ratio:.2%}")
+
+    if result.breakthrough_task:
+        print(f"\n{Colors.GREEN}{Colors.BOLD}{'='*60}{Colors.END}")
+        print(f"{Colors.GREEN}{Colors.BOLD}BREAKTHROUGH TASK{Colors.END}")
+        print(f"{Colors.GREEN}{Colors.BOLD}{'='*60}{Colors.END}")
+        print(f"\n{Colors.BOLD}Expert:{Colors.END} {result.breakthrough_task.expert}")
+        print(f"{Colors.BOLD}Domain:{Colors.END} {result.breakthrough_task.expert_domain}")
+        print(f"{Colors.BOLD}Task:{Colors.END}")
+        print(f"  {Colors.CYAN}{result.breakthrough_task.task}{Colors.END}")
+        print(f"\n{Colors.BOLD}Novelty Score:{Colors.END} {result.breakthrough_task.novelty_score:.4f}")
+        print(f"{Colors.BOLD}Found at Iteration:{Colors.END} {result.breakthrough_task.iteration}")
+
+    # Show trajectory summary
+    print(f"\n{Colors.BOLD}Exploration Trajectory:{Colors.END}")
+    for task in result.trajectory:
+        marker = "★" if task.is_breakthrough else "○"
+        novelty_indicator = "█" * int(task.novelty_score * 10)
+        print(f"  {marker} [{task.iteration:2d}] {task.expert:20s} | {novelty_indicator:10s} {task.novelty_score:.3f}")
+
+
+def save_result(result: TaskGenerationResult, output_path: str):
+    """Save result to JSON file."""
+    with open(output_path, "w", encoding="utf-8") as f:
+        json.dump(result.to_dict(), f, ensure_ascii=False, indent=2)
+    print(f"\n{Colors.GREEN}Results saved to: {output_path}{Colors.END}")
+
+
+async def run_demo(args):
+    """Run the novelty-driven task generation demo."""
+
+    print_header("NOVELTY-DRIVEN TASK GENERATION")
+
+    print(f"{Colors.BOLD}Configuration:{Colors.END}")
+    print(f"  Seed Problem: {args.seed_problem}")
+    print(f"  Strategy: {args.strategy}")
+    print(f"  Novelty Threshold: {args.threshold}")
+    print(f"  Max Iterations: {args.max_iter}")
+    print(f"  Language: {args.language}")
+    print(f"  LLM Model: {args.model}")
+
+    # Create appropriate agent based on strategy
+    common_kwargs = {
+        "max_iterations": args.max_iter,
+        "llm_model": args.model,
+        "embedding_model": args.embedding_model,
+        "language": args.language,
+        "temperature": args.temperature,
+        "on_iteration": print_iteration if not args.quiet else None
+    }
+
+    if args.strategy == "breakthrough":
+        agent = NoveltyDrivenTaskAgent(
+            novelty_threshold=args.threshold,
+            **common_kwargs
+        )
+    elif args.strategy == "exhaust":
+        agent = ExhaustFrontierAgent(
+            exhaustion_threshold=args.exhaust_threshold,
+            window_size=args.window_size,
+            min_iterations=args.min_iter,
+            **common_kwargs
+        )
+    elif args.strategy == "coverage":
+        agent = CoverageTargetAgent(
+            target_clusters=args.clusters,
+            cluster_threshold=args.cluster_threshold,
+            **common_kwargs
+        )
+    else:
+        print(f"{Colors.RED}Unknown strategy: {args.strategy}{Colors.END}")
+        return
+
+    print(f"\n{Colors.BOLD}Starting generation loop...{Colors.END}")
+    print("-" * 60)
+
+    try:
+        result = await agent.run(args.seed_problem)
+        print_result(result)
+
+        if args.output:
+            save_result(result, args.output)
+
+    except Exception as e:
+        print(f"\n{Colors.RED}Error: {e}{Colors.END}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+    finally:
+        await agent.close()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Novelty-Driven Task Generation Demo",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__
+    )
+
+    # Required argument
+    parser.add_argument(
+        "seed_problem",
+        help="The seed problem or challenge to explore"
+    )
+
+    # Strategy selection
+    parser.add_argument(
+        "--strategy", "-s",
+        choices=["breakthrough", "exhaust", "coverage"],
+        default="breakthrough",
+        help="Termination strategy (default: breakthrough)"
+    )
+
+    # Common options
+    parser.add_argument(
+        "--threshold", "-t",
+        type=float,
+        default=0.4,
+        help="Novelty threshold for breakthrough (default: 0.4)"
+    )
+    parser.add_argument(
+        "--max-iter", "-m",
+        type=int,
+        default=20,
+        help="Maximum iterations (default: 20)"
+    )
+    parser.add_argument(
+        "--language", "-l",
+        choices=["en", "zh"],
+        default="en",
+        help="Language for prompts and experts (default: en)"
+    )
+
+    # Model options
+    parser.add_argument(
+        "--model",
+        default="qwen3:8b",
+        help="LLM model for task generation (default: qwen3:8b)"
+    )
+    parser.add_argument(
+        "--embedding-model",
+        default="qwen3-embedding:4b",
+        help="Embedding model (default: qwen3-embedding:4b)"
+    )
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        default=0.7,
+        help="LLM temperature (default: 0.7)"
+    )
+
+    # Exhaust strategy options
+    parser.add_argument(
+        "--exhaust-threshold",
+        type=float,
+        default=0.15,
+        help="Exhaustion threshold for 'exhaust' strategy (default: 0.15)"
+    )
+    parser.add_argument(
+        "--window-size",
+        type=int,
+        default=3,
+        help="Window size for exhaustion check (default: 3)"
+    )
+    parser.add_argument(
+        "--min-iter",
+        type=int,
+        default=5,
+        help="Minimum iterations before exhaustion check (default: 5)"
+    )
+
+    # Coverage strategy options
+    parser.add_argument(
+        "--clusters",
+        type=int,
+        default=5,
+        help="Target clusters for 'coverage' strategy (default: 5)"
+    )
+    parser.add_argument(
+        "--cluster-threshold",
+        type=float,
+        default=0.7,
+        help="Cluster similarity threshold (default: 0.7)"
+    )
+
+    # Output options
+    parser.add_argument(
+        "--output", "-o",
+        help="Save results to JSON file"
+    )
+    parser.add_argument(
+        "--quiet", "-q",
+        action="store_true",
+        help="Suppress iteration output"
+    )
+    parser.add_argument(
+        "--verbose", "-v",
+        action="store_true",
+        help="Enable verbose logging"
+    )
+
+    args = parser.parse_args()
+
+    # Configure logging
+    if args.verbose:
+        logging.basicConfig(
+            level=logging.DEBUG,
+            format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        )
+    else:
+        logging.basicConfig(level=logging.WARNING)
+
+    # Run the demo
+    asyncio.run(run_demo(args))
+
+
+if __name__ == "__main__":
+    main()