feat: Add experiments framework and novelty-driven agent loop
- Add complete experiments directory with pilot study infrastructure - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective) - Human assessment tool with React frontend and FastAPI backend - AUT flexibility analysis with jump signal detection - Result visualization and metrics computation - Add novelty-driven agent loop module (experiments/novelty_loop/) - NoveltyDrivenTaskAgent with expert perspective perturbation - Three termination strategies: breakthrough, exhaust, coverage - Interactive CLI demo with colored output - Embedding-based novelty scoring - Add DDC knowledge domain classification data (en/zh) - Add CLAUDE.md project documentation - Update research report with experiment findings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
290
experiments/generate_ideas.py
Normal file
290
experiments/generate_ideas.py
Normal file
@@ -0,0 +1,290 @@
|
||||
"""
|
||||
Main experiment runner for the 5-condition idea generation study.
|
||||
|
||||
Usage:
|
||||
# Run single query through all conditions
|
||||
python -m experiments.generate_ideas --pilot --query "Chair"
|
||||
|
||||
# Run all pilot queries
|
||||
python -m experiments.generate_ideas --pilot
|
||||
|
||||
# Run specific conditions
|
||||
python -m experiments.generate_ideas --query "Bicycle" --conditions c1_direct c4_full_pipeline
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
# Add backend to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
|
||||
|
||||
from experiments.config import (
|
||||
CONDITIONS, CONDITION_NAMES, DATA_DIR, RESULTS_DIR, EXPERIMENT_CONFIG
|
||||
)
|
||||
from experiments.conditions import (
|
||||
c1_generate, c2_generate, c3_generate, c4_generate, c5_generate
|
||||
)
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Condition function mapping
|
||||
CONDITION_FUNCTIONS = {
|
||||
"c1_direct": c1_generate,
|
||||
"c2_expert_only": c2_generate,
|
||||
"c3_attribute_only": c3_generate,
|
||||
"c4_full_pipeline": c4_generate,
|
||||
"c5_random_perspective": c5_generate,
|
||||
}
|
||||
|
||||
|
||||
def load_queries() -> List[Dict[str, Any]]:
|
||||
"""Load pilot queries from data file."""
|
||||
queries_file = DATA_DIR / "queries.json"
|
||||
with open(queries_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return data.get("queries", [])
|
||||
|
||||
|
||||
def save_results(results: List[Dict[str, Any]], filename: str) -> Path:
|
||||
"""Save results to JSON file."""
|
||||
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
output_path = RESULTS_DIR / filename
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(results, f, indent=2, ensure_ascii=False)
|
||||
return output_path
|
||||
|
||||
|
||||
async def run_condition(
|
||||
query: str,
|
||||
condition: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Run a single condition for a query."""
|
||||
if condition not in CONDITION_FUNCTIONS:
|
||||
raise ValueError(f"Unknown condition: {condition}")
|
||||
|
||||
generate_fn = CONDITION_FUNCTIONS[condition]
|
||||
result = await generate_fn(query)
|
||||
return result
|
||||
|
||||
|
||||
async def run_experiment(
|
||||
queries: Optional[List[str]] = None,
|
||||
conditions: Optional[List[str]] = None,
|
||||
save_intermediate: bool = True
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Run the full experiment.
|
||||
|
||||
Args:
|
||||
queries: List of queries to run (None = all pilot queries)
|
||||
conditions: List of conditions to run (None = all conditions)
|
||||
save_intermediate: Whether to save results after each query
|
||||
|
||||
Returns:
|
||||
Complete experiment results
|
||||
"""
|
||||
# Load queries if not provided
|
||||
if queries is None:
|
||||
query_data = load_queries()
|
||||
queries_to_run = [(q["id"], q["query"], q["category"]) for q in query_data]
|
||||
else:
|
||||
queries_to_run = [(f"Q{i}", q, "custom") for i, q in enumerate(queries)]
|
||||
|
||||
# Default to all conditions
|
||||
conditions = conditions or CONDITIONS
|
||||
|
||||
logger.info(f"Starting experiment with {len(queries_to_run)} queries and {len(conditions)} conditions")
|
||||
logger.info(f"Conditions: {', '.join(conditions)}")
|
||||
|
||||
experiment_results = {
|
||||
"experiment_id": datetime.now().strftime("%Y%m%d_%H%M%S"),
|
||||
"config": EXPERIMENT_CONFIG,
|
||||
"conditions": conditions,
|
||||
"query_count": len(queries_to_run),
|
||||
"results": [],
|
||||
"summary": {}
|
||||
}
|
||||
|
||||
for query_id, query, category in queries_to_run:
|
||||
logger.info(f"\n{'='*60}")
|
||||
logger.info(f"Processing query: {query} (ID: {query_id}, Category: {category})")
|
||||
logger.info(f"{'='*60}")
|
||||
|
||||
query_results = {
|
||||
"query_id": query_id,
|
||||
"query": query,
|
||||
"category": category,
|
||||
"conditions": {}
|
||||
}
|
||||
|
||||
for condition in conditions:
|
||||
logger.info(f"\n Running {CONDITION_NAMES.get(condition, condition)}...")
|
||||
|
||||
try:
|
||||
result = await run_condition(query, condition)
|
||||
|
||||
query_results["conditions"][condition] = {
|
||||
"success": True,
|
||||
"idea_count": result["idea_count"],
|
||||
"ideas": result["ideas"],
|
||||
"ideas_with_source": result.get("ideas_with_source", []),
|
||||
"metadata": result["metadata"]
|
||||
}
|
||||
|
||||
logger.info(f" Generated {result['idea_count']} ideas")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f" Error in {condition}: {e}")
|
||||
query_results["conditions"][condition] = {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"idea_count": 0,
|
||||
"ideas": []
|
||||
}
|
||||
|
||||
experiment_results["results"].append(query_results)
|
||||
|
||||
# Save intermediate results
|
||||
if save_intermediate:
|
||||
save_results(
|
||||
experiment_results,
|
||||
f"experiment_{experiment_results['experiment_id']}_intermediate.json"
|
||||
)
|
||||
|
||||
# Calculate summary statistics
|
||||
experiment_results["summary"] = calculate_summary(experiment_results)
|
||||
|
||||
# Save final results
|
||||
output_path = save_results(
|
||||
experiment_results,
|
||||
f"experiment_{experiment_results['experiment_id']}_complete.json"
|
||||
)
|
||||
|
||||
logger.info(f"\n{'='*60}")
|
||||
logger.info("Experiment complete!")
|
||||
logger.info(f"Results saved to: {output_path}")
|
||||
logger.info(f"{'='*60}")
|
||||
|
||||
return experiment_results
|
||||
|
||||
|
||||
def calculate_summary(results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Calculate summary statistics for the experiment."""
|
||||
summary = {
|
||||
"total_queries": len(results["results"]),
|
||||
"conditions": {}
|
||||
}
|
||||
|
||||
for condition in results["conditions"]:
|
||||
condition_stats = {
|
||||
"total_ideas": 0,
|
||||
"successful_queries": 0,
|
||||
"failed_queries": 0,
|
||||
"avg_ideas_per_query": 0
|
||||
}
|
||||
|
||||
for query_result in results["results"]:
|
||||
cond_result = query_result["conditions"].get(condition, {})
|
||||
if cond_result.get("success", False):
|
||||
condition_stats["successful_queries"] += 1
|
||||
condition_stats["total_ideas"] += cond_result.get("idea_count", 0)
|
||||
else:
|
||||
condition_stats["failed_queries"] += 1
|
||||
|
||||
if condition_stats["successful_queries"] > 0:
|
||||
condition_stats["avg_ideas_per_query"] = (
|
||||
condition_stats["total_ideas"] / condition_stats["successful_queries"]
|
||||
)
|
||||
|
||||
summary["conditions"][condition] = condition_stats
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def print_summary(results: Dict[str, Any]):
|
||||
"""Print a formatted summary of the experiment."""
|
||||
print("\n" + "=" * 70)
|
||||
print("EXPERIMENT SUMMARY")
|
||||
print("=" * 70)
|
||||
|
||||
summary = results.get("summary", {})
|
||||
print(f"\nTotal queries processed: {summary.get('total_queries', 0)}")
|
||||
|
||||
print("\nResults by condition:")
|
||||
print("-" * 70)
|
||||
print(f"{'Condition':<30} {'Success':<10} {'Total Ideas':<15} {'Avg/Query':<10}")
|
||||
print("-" * 70)
|
||||
|
||||
for condition, stats in summary.get("conditions", {}).items():
|
||||
name = CONDITION_NAMES.get(condition, condition)
|
||||
success = stats.get("successful_queries", 0)
|
||||
total = stats.get("total_ideas", 0)
|
||||
avg = stats.get("avg_ideas_per_query", 0)
|
||||
print(f"{name:<30} {success:<10} {total:<15} {avg:<10.1f}")
|
||||
|
||||
print("-" * 70)
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run the 5-condition idea generation experiment"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pilot",
|
||||
action="store_true",
|
||||
help="Run pilot experiment with all 10 queries"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--query",
|
||||
type=str,
|
||||
help="Run single query (e.g., 'Chair')"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--conditions",
|
||||
nargs="+",
|
||||
choices=CONDITIONS,
|
||||
help="Specific conditions to run"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-save-intermediate",
|
||||
action="store_true",
|
||||
help="Don't save intermediate results"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine queries to run
|
||||
if args.query:
|
||||
queries = [args.query]
|
||||
elif args.pilot:
|
||||
queries = None # Will load all pilot queries
|
||||
else:
|
||||
parser.print_help()
|
||||
print("\nError: Must specify --pilot or --query")
|
||||
sys.exit(1)
|
||||
|
||||
# Run experiment
|
||||
results = await run_experiment(
|
||||
queries=queries,
|
||||
conditions=args.conditions,
|
||||
save_intermediate=not args.no_save_intermediate
|
||||
)
|
||||
|
||||
# Print summary
|
||||
print_summary(results)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user