- Add complete experiments directory with pilot study infrastructure - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective) - Human assessment tool with React frontend and FastAPI backend - AUT flexibility analysis with jump signal detection - Result visualization and metrics computation - Add novelty-driven agent loop module (experiments/novelty_loop/) - NoveltyDrivenTaskAgent with expert perspective perturbation - Three termination strategies: breakthrough, exhaust, coverage - Interactive CLI demo with colored output - Embedding-based novelty scoring - Add DDC knowledge domain classification data (en/zh) - Add CLAUDE.md project documentation - Update research report with experiment findings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
291 lines
8.5 KiB
Python
291 lines
8.5 KiB
Python
"""
|
|
Main experiment runner for the 5-condition idea generation study.
|
|
|
|
Usage:
|
|
# Run single query through all conditions
|
|
python -m experiments.generate_ideas --pilot --query "Chair"
|
|
|
|
# Run all pilot queries
|
|
python -m experiments.generate_ideas --pilot
|
|
|
|
# Run specific conditions
|
|
python -m experiments.generate_ideas --query "Bicycle" --conditions c1_direct c4_full_pipeline
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import asyncio
|
|
import logging
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
# Add backend to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
|
|
|
|
from experiments.config import (
|
|
CONDITIONS, CONDITION_NAMES, DATA_DIR, RESULTS_DIR, EXPERIMENT_CONFIG
|
|
)
|
|
from experiments.conditions import (
|
|
c1_generate, c2_generate, c3_generate, c4_generate, c5_generate
|
|
)
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# Condition function mapping
|
|
CONDITION_FUNCTIONS = {
|
|
"c1_direct": c1_generate,
|
|
"c2_expert_only": c2_generate,
|
|
"c3_attribute_only": c3_generate,
|
|
"c4_full_pipeline": c4_generate,
|
|
"c5_random_perspective": c5_generate,
|
|
}
|
|
|
|
|
|
def load_queries() -> List[Dict[str, Any]]:
|
|
"""Load pilot queries from data file."""
|
|
queries_file = DATA_DIR / "queries.json"
|
|
with open(queries_file, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
return data.get("queries", [])
|
|
|
|
|
|
def save_results(results: List[Dict[str, Any]], filename: str) -> Path:
|
|
"""Save results to JSON file."""
|
|
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
output_path = RESULTS_DIR / filename
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
json.dump(results, f, indent=2, ensure_ascii=False)
|
|
return output_path
|
|
|
|
|
|
async def run_condition(
|
|
query: str,
|
|
condition: str
|
|
) -> Dict[str, Any]:
|
|
"""Run a single condition for a query."""
|
|
if condition not in CONDITION_FUNCTIONS:
|
|
raise ValueError(f"Unknown condition: {condition}")
|
|
|
|
generate_fn = CONDITION_FUNCTIONS[condition]
|
|
result = await generate_fn(query)
|
|
return result
|
|
|
|
|
|
async def run_experiment(
|
|
queries: Optional[List[str]] = None,
|
|
conditions: Optional[List[str]] = None,
|
|
save_intermediate: bool = True
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Run the full experiment.
|
|
|
|
Args:
|
|
queries: List of queries to run (None = all pilot queries)
|
|
conditions: List of conditions to run (None = all conditions)
|
|
save_intermediate: Whether to save results after each query
|
|
|
|
Returns:
|
|
Complete experiment results
|
|
"""
|
|
# Load queries if not provided
|
|
if queries is None:
|
|
query_data = load_queries()
|
|
queries_to_run = [(q["id"], q["query"], q["category"]) for q in query_data]
|
|
else:
|
|
queries_to_run = [(f"Q{i}", q, "custom") for i, q in enumerate(queries)]
|
|
|
|
# Default to all conditions
|
|
conditions = conditions or CONDITIONS
|
|
|
|
logger.info(f"Starting experiment with {len(queries_to_run)} queries and {len(conditions)} conditions")
|
|
logger.info(f"Conditions: {', '.join(conditions)}")
|
|
|
|
experiment_results = {
|
|
"experiment_id": datetime.now().strftime("%Y%m%d_%H%M%S"),
|
|
"config": EXPERIMENT_CONFIG,
|
|
"conditions": conditions,
|
|
"query_count": len(queries_to_run),
|
|
"results": [],
|
|
"summary": {}
|
|
}
|
|
|
|
for query_id, query, category in queries_to_run:
|
|
logger.info(f"\n{'='*60}")
|
|
logger.info(f"Processing query: {query} (ID: {query_id}, Category: {category})")
|
|
logger.info(f"{'='*60}")
|
|
|
|
query_results = {
|
|
"query_id": query_id,
|
|
"query": query,
|
|
"category": category,
|
|
"conditions": {}
|
|
}
|
|
|
|
for condition in conditions:
|
|
logger.info(f"\n Running {CONDITION_NAMES.get(condition, condition)}...")
|
|
|
|
try:
|
|
result = await run_condition(query, condition)
|
|
|
|
query_results["conditions"][condition] = {
|
|
"success": True,
|
|
"idea_count": result["idea_count"],
|
|
"ideas": result["ideas"],
|
|
"ideas_with_source": result.get("ideas_with_source", []),
|
|
"metadata": result["metadata"]
|
|
}
|
|
|
|
logger.info(f" Generated {result['idea_count']} ideas")
|
|
|
|
except Exception as e:
|
|
logger.error(f" Error in {condition}: {e}")
|
|
query_results["conditions"][condition] = {
|
|
"success": False,
|
|
"error": str(e),
|
|
"idea_count": 0,
|
|
"ideas": []
|
|
}
|
|
|
|
experiment_results["results"].append(query_results)
|
|
|
|
# Save intermediate results
|
|
if save_intermediate:
|
|
save_results(
|
|
experiment_results,
|
|
f"experiment_{experiment_results['experiment_id']}_intermediate.json"
|
|
)
|
|
|
|
# Calculate summary statistics
|
|
experiment_results["summary"] = calculate_summary(experiment_results)
|
|
|
|
# Save final results
|
|
output_path = save_results(
|
|
experiment_results,
|
|
f"experiment_{experiment_results['experiment_id']}_complete.json"
|
|
)
|
|
|
|
logger.info(f"\n{'='*60}")
|
|
logger.info("Experiment complete!")
|
|
logger.info(f"Results saved to: {output_path}")
|
|
logger.info(f"{'='*60}")
|
|
|
|
return experiment_results
|
|
|
|
|
|
def calculate_summary(results: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Calculate summary statistics for the experiment."""
|
|
summary = {
|
|
"total_queries": len(results["results"]),
|
|
"conditions": {}
|
|
}
|
|
|
|
for condition in results["conditions"]:
|
|
condition_stats = {
|
|
"total_ideas": 0,
|
|
"successful_queries": 0,
|
|
"failed_queries": 0,
|
|
"avg_ideas_per_query": 0
|
|
}
|
|
|
|
for query_result in results["results"]:
|
|
cond_result = query_result["conditions"].get(condition, {})
|
|
if cond_result.get("success", False):
|
|
condition_stats["successful_queries"] += 1
|
|
condition_stats["total_ideas"] += cond_result.get("idea_count", 0)
|
|
else:
|
|
condition_stats["failed_queries"] += 1
|
|
|
|
if condition_stats["successful_queries"] > 0:
|
|
condition_stats["avg_ideas_per_query"] = (
|
|
condition_stats["total_ideas"] / condition_stats["successful_queries"]
|
|
)
|
|
|
|
summary["conditions"][condition] = condition_stats
|
|
|
|
return summary
|
|
|
|
|
|
def print_summary(results: Dict[str, Any]):
|
|
"""Print a formatted summary of the experiment."""
|
|
print("\n" + "=" * 70)
|
|
print("EXPERIMENT SUMMARY")
|
|
print("=" * 70)
|
|
|
|
summary = results.get("summary", {})
|
|
print(f"\nTotal queries processed: {summary.get('total_queries', 0)}")
|
|
|
|
print("\nResults by condition:")
|
|
print("-" * 70)
|
|
print(f"{'Condition':<30} {'Success':<10} {'Total Ideas':<15} {'Avg/Query':<10}")
|
|
print("-" * 70)
|
|
|
|
for condition, stats in summary.get("conditions", {}).items():
|
|
name = CONDITION_NAMES.get(condition, condition)
|
|
success = stats.get("successful_queries", 0)
|
|
total = stats.get("total_ideas", 0)
|
|
avg = stats.get("avg_ideas_per_query", 0)
|
|
print(f"{name:<30} {success:<10} {total:<15} {avg:<10.1f}")
|
|
|
|
print("-" * 70)
|
|
|
|
|
|
async def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Run the 5-condition idea generation experiment"
|
|
)
|
|
parser.add_argument(
|
|
"--pilot",
|
|
action="store_true",
|
|
help="Run pilot experiment with all 10 queries"
|
|
)
|
|
parser.add_argument(
|
|
"--query",
|
|
type=str,
|
|
help="Run single query (e.g., 'Chair')"
|
|
)
|
|
parser.add_argument(
|
|
"--conditions",
|
|
nargs="+",
|
|
choices=CONDITIONS,
|
|
help="Specific conditions to run"
|
|
)
|
|
parser.add_argument(
|
|
"--no-save-intermediate",
|
|
action="store_true",
|
|
help="Don't save intermediate results"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Determine queries to run
|
|
if args.query:
|
|
queries = [args.query]
|
|
elif args.pilot:
|
|
queries = None # Will load all pilot queries
|
|
else:
|
|
parser.print_help()
|
|
print("\nError: Must specify --pilot or --query")
|
|
sys.exit(1)
|
|
|
|
# Run experiment
|
|
results = await run_experiment(
|
|
queries=queries,
|
|
conditions=args.conditions,
|
|
save_intermediate=not args.no_save_intermediate
|
|
)
|
|
|
|
# Print summary
|
|
print_summary(results)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|