feat: Add experiments framework and novelty-driven agent loop

- Add complete experiments directory with pilot study infrastructure
  - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective)
  - Human assessment tool with React frontend and FastAPI backend
  - AUT flexibility analysis with jump signal detection
  - Result visualization and metrics computation

- Add novelty-driven agent loop module (experiments/novelty_loop/)
  - NoveltyDrivenTaskAgent with expert perspective perturbation
  - Three termination strategies: breakthrough, exhaust, coverage
  - Interactive CLI demo with colored output
  - Embedding-based novelty scoring

- Add DDC knowledge domain classification data (en/zh)
- Add CLAUDE.md project documentation
- Update research report with experiment findings

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-20 10:16:21 +08:00
parent 26a56a2a07
commit 43c025e060
81 changed files with 18766 additions and 2 deletions

View File

@@ -0,0 +1,290 @@
"""
Main experiment runner for the 5-condition idea generation study.
Usage:
# Run single query through all conditions
python -m experiments.generate_ideas --pilot --query "Chair"
# Run all pilot queries
python -m experiments.generate_ideas --pilot
# Run specific conditions
python -m experiments.generate_ideas --query "Bicycle" --conditions c1_direct c4_full_pipeline
"""
import sys
import json
import argparse
import asyncio
import logging
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional
# Add backend to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
from experiments.config import (
CONDITIONS, CONDITION_NAMES, DATA_DIR, RESULTS_DIR, EXPERIMENT_CONFIG
)
from experiments.conditions import (
c1_generate, c2_generate, c3_generate, c4_generate, c5_generate
)
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Condition function mapping
CONDITION_FUNCTIONS = {
"c1_direct": c1_generate,
"c2_expert_only": c2_generate,
"c3_attribute_only": c3_generate,
"c4_full_pipeline": c4_generate,
"c5_random_perspective": c5_generate,
}
def load_queries() -> List[Dict[str, Any]]:
"""Load pilot queries from data file."""
queries_file = DATA_DIR / "queries.json"
with open(queries_file, "r", encoding="utf-8") as f:
data = json.load(f)
return data.get("queries", [])
def save_results(results: List[Dict[str, Any]], filename: str) -> Path:
"""Save results to JSON file."""
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
output_path = RESULTS_DIR / filename
with open(output_path, "w", encoding="utf-8") as f:
json.dump(results, f, indent=2, ensure_ascii=False)
return output_path
async def run_condition(
query: str,
condition: str
) -> Dict[str, Any]:
"""Run a single condition for a query."""
if condition not in CONDITION_FUNCTIONS:
raise ValueError(f"Unknown condition: {condition}")
generate_fn = CONDITION_FUNCTIONS[condition]
result = await generate_fn(query)
return result
async def run_experiment(
queries: Optional[List[str]] = None,
conditions: Optional[List[str]] = None,
save_intermediate: bool = True
) -> Dict[str, Any]:
"""
Run the full experiment.
Args:
queries: List of queries to run (None = all pilot queries)
conditions: List of conditions to run (None = all conditions)
save_intermediate: Whether to save results after each query
Returns:
Complete experiment results
"""
# Load queries if not provided
if queries is None:
query_data = load_queries()
queries_to_run = [(q["id"], q["query"], q["category"]) for q in query_data]
else:
queries_to_run = [(f"Q{i}", q, "custom") for i, q in enumerate(queries)]
# Default to all conditions
conditions = conditions or CONDITIONS
logger.info(f"Starting experiment with {len(queries_to_run)} queries and {len(conditions)} conditions")
logger.info(f"Conditions: {', '.join(conditions)}")
experiment_results = {
"experiment_id": datetime.now().strftime("%Y%m%d_%H%M%S"),
"config": EXPERIMENT_CONFIG,
"conditions": conditions,
"query_count": len(queries_to_run),
"results": [],
"summary": {}
}
for query_id, query, category in queries_to_run:
logger.info(f"\n{'='*60}")
logger.info(f"Processing query: {query} (ID: {query_id}, Category: {category})")
logger.info(f"{'='*60}")
query_results = {
"query_id": query_id,
"query": query,
"category": category,
"conditions": {}
}
for condition in conditions:
logger.info(f"\n Running {CONDITION_NAMES.get(condition, condition)}...")
try:
result = await run_condition(query, condition)
query_results["conditions"][condition] = {
"success": True,
"idea_count": result["idea_count"],
"ideas": result["ideas"],
"ideas_with_source": result.get("ideas_with_source", []),
"metadata": result["metadata"]
}
logger.info(f" Generated {result['idea_count']} ideas")
except Exception as e:
logger.error(f" Error in {condition}: {e}")
query_results["conditions"][condition] = {
"success": False,
"error": str(e),
"idea_count": 0,
"ideas": []
}
experiment_results["results"].append(query_results)
# Save intermediate results
if save_intermediate:
save_results(
experiment_results,
f"experiment_{experiment_results['experiment_id']}_intermediate.json"
)
# Calculate summary statistics
experiment_results["summary"] = calculate_summary(experiment_results)
# Save final results
output_path = save_results(
experiment_results,
f"experiment_{experiment_results['experiment_id']}_complete.json"
)
logger.info(f"\n{'='*60}")
logger.info("Experiment complete!")
logger.info(f"Results saved to: {output_path}")
logger.info(f"{'='*60}")
return experiment_results
def calculate_summary(results: Dict[str, Any]) -> Dict[str, Any]:
"""Calculate summary statistics for the experiment."""
summary = {
"total_queries": len(results["results"]),
"conditions": {}
}
for condition in results["conditions"]:
condition_stats = {
"total_ideas": 0,
"successful_queries": 0,
"failed_queries": 0,
"avg_ideas_per_query": 0
}
for query_result in results["results"]:
cond_result = query_result["conditions"].get(condition, {})
if cond_result.get("success", False):
condition_stats["successful_queries"] += 1
condition_stats["total_ideas"] += cond_result.get("idea_count", 0)
else:
condition_stats["failed_queries"] += 1
if condition_stats["successful_queries"] > 0:
condition_stats["avg_ideas_per_query"] = (
condition_stats["total_ideas"] / condition_stats["successful_queries"]
)
summary["conditions"][condition] = condition_stats
return summary
def print_summary(results: Dict[str, Any]):
"""Print a formatted summary of the experiment."""
print("\n" + "=" * 70)
print("EXPERIMENT SUMMARY")
print("=" * 70)
summary = results.get("summary", {})
print(f"\nTotal queries processed: {summary.get('total_queries', 0)}")
print("\nResults by condition:")
print("-" * 70)
print(f"{'Condition':<30} {'Success':<10} {'Total Ideas':<15} {'Avg/Query':<10}")
print("-" * 70)
for condition, stats in summary.get("conditions", {}).items():
name = CONDITION_NAMES.get(condition, condition)
success = stats.get("successful_queries", 0)
total = stats.get("total_ideas", 0)
avg = stats.get("avg_ideas_per_query", 0)
print(f"{name:<30} {success:<10} {total:<15} {avg:<10.1f}")
print("-" * 70)
async def main():
parser = argparse.ArgumentParser(
description="Run the 5-condition idea generation experiment"
)
parser.add_argument(
"--pilot",
action="store_true",
help="Run pilot experiment with all 10 queries"
)
parser.add_argument(
"--query",
type=str,
help="Run single query (e.g., 'Chair')"
)
parser.add_argument(
"--conditions",
nargs="+",
choices=CONDITIONS,
help="Specific conditions to run"
)
parser.add_argument(
"--no-save-intermediate",
action="store_true",
help="Don't save intermediate results"
)
args = parser.parse_args()
# Determine queries to run
if args.query:
queries = [args.query]
elif args.pilot:
queries = None # Will load all pilot queries
else:
parser.print_help()
print("\nError: Must specify --pilot or --query")
sys.exit(1)
# Run experiment
results = await run_experiment(
queries=queries,
conditions=args.conditions,
save_intermediate=not args.no_save_intermediate
)
# Print summary
print_summary(results)
if __name__ == "__main__":
asyncio.run(main())