feat: Add experiments framework and novelty-driven agent loop

- Add complete experiments directory with pilot study infrastructure
  - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective)
  - Human assessment tool with React frontend and FastAPI backend
  - AUT flexibility analysis with jump signal detection
  - Result visualization and metrics computation

- Add novelty-driven agent loop module (experiments/novelty_loop/)
  - NoveltyDrivenTaskAgent with expert perspective perturbation
  - Three termination strategies: breakthrough, exhaust, coverage
  - Interactive CLI demo with colored output
  - Embedding-based novelty scoring

- Add DDC knowledge domain classification data (en/zh)
- Add CLAUDE.md project documentation
- Update research report with experiment findings

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-20 10:16:21 +08:00
parent 26a56a2a07
commit 43c025e060
81 changed files with 18766 additions and 2 deletions

313
experiments/novelty_loop/demo.py Executable file
View File

@@ -0,0 +1,313 @@
#!/usr/bin/env python3
"""
Novelty-Driven Task Generation Demo
Interactive CLI for exploring the novelty-driven task generation agent.
Examples:
# Basic usage with default settings
python demo.py "Improve urban transportation"
# Custom threshold and iterations
python demo.py "Design a better bicycle" --threshold 0.35 --max-iter 15
# Use Chinese language
python demo.py "改进城市交通" --language zh
# Use exhaustion strategy (explore until stuck)
python demo.py "Sustainable energy solutions" --strategy exhaust
# Use coverage strategy (find N distinct clusters)
python demo.py "Future of education" --strategy coverage --clusters 5
# Save results to file
python demo.py "Smart home innovations" --output results.json
# Verbose mode with detailed logging
python demo.py "Healthcare improvements" --verbose
"""
import argparse
import asyncio
import json
import logging
import sys
from datetime import datetime
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from experiments.novelty_loop.agent import (
NoveltyDrivenTaskAgent,
ExhaustFrontierAgent,
CoverageTargetAgent,
GeneratedTask,
TaskGenerationResult
)
# ANSI color codes for terminal output
class Colors:
HEADER = '\033[95m'
BLUE = '\033[94m'
CYAN = '\033[96m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m'
def print_header(text: str):
"""Print a styled header."""
print(f"\n{Colors.BOLD}{Colors.HEADER}{'='*60}{Colors.END}")
print(f"{Colors.BOLD}{Colors.HEADER}{text.center(60)}{Colors.END}")
print(f"{Colors.BOLD}{Colors.HEADER}{'='*60}{Colors.END}\n")
def print_iteration(task: GeneratedTask):
"""Print iteration result with colors."""
status_color = Colors.GREEN if task.is_breakthrough else Colors.CYAN
print(f"\n{Colors.BOLD}Iteration {task.iteration}{Colors.END}")
print(f" {Colors.YELLOW}Expert:{Colors.END} {task.expert} ({task.expert_domain})")
print(f" {Colors.YELLOW}Task:{Colors.END} {task.task}")
novelty_bar = "" * int(task.novelty_score * 20) + "" * (20 - int(task.novelty_score * 20))
print(f" {Colors.YELLOW}Novelty:{Colors.END} [{novelty_bar}] {task.novelty_score:.4f}")
if task.is_breakthrough:
print(f" {Colors.GREEN}{Colors.BOLD}★ BREAKTHROUGH! ★{Colors.END}")
def print_result(result: TaskGenerationResult):
"""Print final result summary."""
print_header("RESULTS")
print(f"{Colors.BOLD}Seed Problem:{Colors.END} {result.seed_problem}")
print(f"{Colors.BOLD}Total Iterations:{Colors.END} {result.total_iterations}")
print(f"{Colors.BOLD}Terminated By:{Colors.END} {result.terminated_by}")
if result.novelty_trajectory:
print(f"\n{Colors.BOLD}Novelty Statistics:{Colors.END}")
print(f" Mean Novelty: {result.novelty_trajectory.mean_novelty:.4f}")
print(f" Max Novelty: {result.novelty_trajectory.max_novelty:.4f}")
print(f" Jump Ratio: {result.novelty_trajectory.jump_ratio:.2%}")
if result.breakthrough_task:
print(f"\n{Colors.GREEN}{Colors.BOLD}{'='*60}{Colors.END}")
print(f"{Colors.GREEN}{Colors.BOLD}BREAKTHROUGH TASK{Colors.END}")
print(f"{Colors.GREEN}{Colors.BOLD}{'='*60}{Colors.END}")
print(f"\n{Colors.BOLD}Expert:{Colors.END} {result.breakthrough_task.expert}")
print(f"{Colors.BOLD}Domain:{Colors.END} {result.breakthrough_task.expert_domain}")
print(f"{Colors.BOLD}Task:{Colors.END}")
print(f" {Colors.CYAN}{result.breakthrough_task.task}{Colors.END}")
print(f"\n{Colors.BOLD}Novelty Score:{Colors.END} {result.breakthrough_task.novelty_score:.4f}")
print(f"{Colors.BOLD}Found at Iteration:{Colors.END} {result.breakthrough_task.iteration}")
# Show trajectory summary
print(f"\n{Colors.BOLD}Exploration Trajectory:{Colors.END}")
for task in result.trajectory:
marker = "" if task.is_breakthrough else ""
novelty_indicator = "" * int(task.novelty_score * 10)
print(f" {marker} [{task.iteration:2d}] {task.expert:20s} | {novelty_indicator:10s} {task.novelty_score:.3f}")
def save_result(result: TaskGenerationResult, output_path: str):
"""Save result to JSON file."""
with open(output_path, "w", encoding="utf-8") as f:
json.dump(result.to_dict(), f, ensure_ascii=False, indent=2)
print(f"\n{Colors.GREEN}Results saved to: {output_path}{Colors.END}")
async def run_demo(args):
"""Run the novelty-driven task generation demo."""
print_header("NOVELTY-DRIVEN TASK GENERATION")
print(f"{Colors.BOLD}Configuration:{Colors.END}")
print(f" Seed Problem: {args.seed_problem}")
print(f" Strategy: {args.strategy}")
print(f" Novelty Threshold: {args.threshold}")
print(f" Max Iterations: {args.max_iter}")
print(f" Language: {args.language}")
print(f" LLM Model: {args.model}")
# Create appropriate agent based on strategy
common_kwargs = {
"max_iterations": args.max_iter,
"llm_model": args.model,
"embedding_model": args.embedding_model,
"language": args.language,
"temperature": args.temperature,
"on_iteration": print_iteration if not args.quiet else None
}
if args.strategy == "breakthrough":
agent = NoveltyDrivenTaskAgent(
novelty_threshold=args.threshold,
**common_kwargs
)
elif args.strategy == "exhaust":
agent = ExhaustFrontierAgent(
exhaustion_threshold=args.exhaust_threshold,
window_size=args.window_size,
min_iterations=args.min_iter,
**common_kwargs
)
elif args.strategy == "coverage":
agent = CoverageTargetAgent(
target_clusters=args.clusters,
cluster_threshold=args.cluster_threshold,
**common_kwargs
)
else:
print(f"{Colors.RED}Unknown strategy: {args.strategy}{Colors.END}")
return
print(f"\n{Colors.BOLD}Starting generation loop...{Colors.END}")
print("-" * 60)
try:
result = await agent.run(args.seed_problem)
print_result(result)
if args.output:
save_result(result, args.output)
except Exception as e:
print(f"\n{Colors.RED}Error: {e}{Colors.END}")
if args.verbose:
import traceback
traceback.print_exc()
finally:
await agent.close()
def main():
parser = argparse.ArgumentParser(
description="Novelty-Driven Task Generation Demo",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
# Required argument
parser.add_argument(
"seed_problem",
help="The seed problem or challenge to explore"
)
# Strategy selection
parser.add_argument(
"--strategy", "-s",
choices=["breakthrough", "exhaust", "coverage"],
default="breakthrough",
help="Termination strategy (default: breakthrough)"
)
# Common options
parser.add_argument(
"--threshold", "-t",
type=float,
default=0.4,
help="Novelty threshold for breakthrough (default: 0.4)"
)
parser.add_argument(
"--max-iter", "-m",
type=int,
default=20,
help="Maximum iterations (default: 20)"
)
parser.add_argument(
"--language", "-l",
choices=["en", "zh"],
default="en",
help="Language for prompts and experts (default: en)"
)
# Model options
parser.add_argument(
"--model",
default="qwen3:8b",
help="LLM model for task generation (default: qwen3:8b)"
)
parser.add_argument(
"--embedding-model",
default="qwen3-embedding:4b",
help="Embedding model (default: qwen3-embedding:4b)"
)
parser.add_argument(
"--temperature",
type=float,
default=0.7,
help="LLM temperature (default: 0.7)"
)
# Exhaust strategy options
parser.add_argument(
"--exhaust-threshold",
type=float,
default=0.15,
help="Exhaustion threshold for 'exhaust' strategy (default: 0.15)"
)
parser.add_argument(
"--window-size",
type=int,
default=3,
help="Window size for exhaustion check (default: 3)"
)
parser.add_argument(
"--min-iter",
type=int,
default=5,
help="Minimum iterations before exhaustion check (default: 5)"
)
# Coverage strategy options
parser.add_argument(
"--clusters",
type=int,
default=5,
help="Target clusters for 'coverage' strategy (default: 5)"
)
parser.add_argument(
"--cluster-threshold",
type=float,
default=0.7,
help="Cluster similarity threshold (default: 0.7)"
)
# Output options
parser.add_argument(
"--output", "-o",
help="Save results to JSON file"
)
parser.add_argument(
"--quiet", "-q",
action="store_true",
help="Suppress iteration output"
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose logging"
)
args = parser.parse_args()
# Configure logging
if args.verbose:
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
else:
logging.basicConfig(level=logging.WARNING)
# Run the demo
asyncio.run(run_demo(args))
if __name__ == "__main__":
main()