""" Main experiment runner for the 5-condition idea generation study. Usage: # Run single query through all conditions python -m experiments.generate_ideas --pilot --query "Chair" # Run all pilot queries python -m experiments.generate_ideas --pilot # Run specific conditions python -m experiments.generate_ideas --query "Bicycle" --conditions c1_direct c4_full_pipeline """ import sys import json import argparse import asyncio import logging from datetime import datetime from pathlib import Path from typing import List, Dict, Any, Optional # Add backend to path for imports sys.path.insert(0, str(Path(__file__).parent.parent / "backend")) from experiments.config import ( CONDITIONS, CONDITION_NAMES, DATA_DIR, RESULTS_DIR, EXPERIMENT_CONFIG ) from experiments.conditions import ( c1_generate, c2_generate, c3_generate, c4_generate, c5_generate ) # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Condition function mapping CONDITION_FUNCTIONS = { "c1_direct": c1_generate, "c2_expert_only": c2_generate, "c3_attribute_only": c3_generate, "c4_full_pipeline": c4_generate, "c5_random_perspective": c5_generate, } def load_queries() -> List[Dict[str, Any]]: """Load pilot queries from data file.""" queries_file = DATA_DIR / "queries.json" with open(queries_file, "r", encoding="utf-8") as f: data = json.load(f) return data.get("queries", []) def save_results(results: List[Dict[str, Any]], filename: str) -> Path: """Save results to JSON file.""" RESULTS_DIR.mkdir(parents=True, exist_ok=True) output_path = RESULTS_DIR / filename with open(output_path, "w", encoding="utf-8") as f: json.dump(results, f, indent=2, ensure_ascii=False) return output_path async def run_condition( query: str, condition: str ) -> Dict[str, Any]: """Run a single condition for a query.""" if condition not in CONDITION_FUNCTIONS: raise ValueError(f"Unknown condition: {condition}") generate_fn = CONDITION_FUNCTIONS[condition] result = await generate_fn(query) return result async def run_experiment( queries: Optional[List[str]] = None, conditions: Optional[List[str]] = None, save_intermediate: bool = True ) -> Dict[str, Any]: """ Run the full experiment. Args: queries: List of queries to run (None = all pilot queries) conditions: List of conditions to run (None = all conditions) save_intermediate: Whether to save results after each query Returns: Complete experiment results """ # Load queries if not provided if queries is None: query_data = load_queries() queries_to_run = [(q["id"], q["query"], q["category"]) for q in query_data] else: queries_to_run = [(f"Q{i}", q, "custom") for i, q in enumerate(queries)] # Default to all conditions conditions = conditions or CONDITIONS logger.info(f"Starting experiment with {len(queries_to_run)} queries and {len(conditions)} conditions") logger.info(f"Conditions: {', '.join(conditions)}") experiment_results = { "experiment_id": datetime.now().strftime("%Y%m%d_%H%M%S"), "config": EXPERIMENT_CONFIG, "conditions": conditions, "query_count": len(queries_to_run), "results": [], "summary": {} } for query_id, query, category in queries_to_run: logger.info(f"\n{'='*60}") logger.info(f"Processing query: {query} (ID: {query_id}, Category: {category})") logger.info(f"{'='*60}") query_results = { "query_id": query_id, "query": query, "category": category, "conditions": {} } for condition in conditions: logger.info(f"\n Running {CONDITION_NAMES.get(condition, condition)}...") try: result = await run_condition(query, condition) query_results["conditions"][condition] = { "success": True, "idea_count": result["idea_count"], "ideas": result["ideas"], "ideas_with_source": result.get("ideas_with_source", []), "metadata": result["metadata"] } logger.info(f" Generated {result['idea_count']} ideas") except Exception as e: logger.error(f" Error in {condition}: {e}") query_results["conditions"][condition] = { "success": False, "error": str(e), "idea_count": 0, "ideas": [] } experiment_results["results"].append(query_results) # Save intermediate results if save_intermediate: save_results( experiment_results, f"experiment_{experiment_results['experiment_id']}_intermediate.json" ) # Calculate summary statistics experiment_results["summary"] = calculate_summary(experiment_results) # Save final results output_path = save_results( experiment_results, f"experiment_{experiment_results['experiment_id']}_complete.json" ) logger.info(f"\n{'='*60}") logger.info("Experiment complete!") logger.info(f"Results saved to: {output_path}") logger.info(f"{'='*60}") return experiment_results def calculate_summary(results: Dict[str, Any]) -> Dict[str, Any]: """Calculate summary statistics for the experiment.""" summary = { "total_queries": len(results["results"]), "conditions": {} } for condition in results["conditions"]: condition_stats = { "total_ideas": 0, "successful_queries": 0, "failed_queries": 0, "avg_ideas_per_query": 0 } for query_result in results["results"]: cond_result = query_result["conditions"].get(condition, {}) if cond_result.get("success", False): condition_stats["successful_queries"] += 1 condition_stats["total_ideas"] += cond_result.get("idea_count", 0) else: condition_stats["failed_queries"] += 1 if condition_stats["successful_queries"] > 0: condition_stats["avg_ideas_per_query"] = ( condition_stats["total_ideas"] / condition_stats["successful_queries"] ) summary["conditions"][condition] = condition_stats return summary def print_summary(results: Dict[str, Any]): """Print a formatted summary of the experiment.""" print("\n" + "=" * 70) print("EXPERIMENT SUMMARY") print("=" * 70) summary = results.get("summary", {}) print(f"\nTotal queries processed: {summary.get('total_queries', 0)}") print("\nResults by condition:") print("-" * 70) print(f"{'Condition':<30} {'Success':<10} {'Total Ideas':<15} {'Avg/Query':<10}") print("-" * 70) for condition, stats in summary.get("conditions", {}).items(): name = CONDITION_NAMES.get(condition, condition) success = stats.get("successful_queries", 0) total = stats.get("total_ideas", 0) avg = stats.get("avg_ideas_per_query", 0) print(f"{name:<30} {success:<10} {total:<15} {avg:<10.1f}") print("-" * 70) async def main(): parser = argparse.ArgumentParser( description="Run the 5-condition idea generation experiment" ) parser.add_argument( "--pilot", action="store_true", help="Run pilot experiment with all 10 queries" ) parser.add_argument( "--query", type=str, help="Run single query (e.g., 'Chair')" ) parser.add_argument( "--conditions", nargs="+", choices=CONDITIONS, help="Specific conditions to run" ) parser.add_argument( "--no-save-intermediate", action="store_true", help="Don't save intermediate results" ) args = parser.parse_args() # Determine queries to run if args.query: queries = [args.query] elif args.pilot: queries = None # Will load all pilot queries else: parser.print_help() print("\nError: Must specify --pilot or --query") sys.exit(1) # Run experiment results = await run_experiment( queries=queries, conditions=args.conditions, save_intermediate=not args.no_save_intermediate ) # Print summary print_summary(results) if __name__ == "__main__": asyncio.run(main())