feat: Add experiments framework and novelty-driven agent loop

- Add complete experiments directory with pilot study infrastructure - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective) - Human assessment tool with React frontend and FastAPI backend - AUT flexibility analysis with jump signal detection - Result visualization and metrics computation - Add novelty-driven agent loop module (experiments/novelty_loop/) - NoveltyDrivenTaskAgent with expert perspective perturbation - Three termination strategies: breakthrough, exhaust, coverage - Interactive CLI demo with colored output - Embedding-based novelty scoring - Add DDC knowledge domain classification data (en/zh) - Add CLAUDE.md project documentation - Update research report with experiment findings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 10:16:21 +08:00
parent 26a56a2a07
commit 43c025e060
81 changed files with 18766 additions and 2 deletions
--- a/experiments/assessment/backend/app.py
+++ b/experiments/assessment/backend/app.py
@@ -0,0 +1,374 @@
+"""
+FastAPI backend for human assessment of creative ideas.
+"""
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+
+try:
+    from . import database as db
+    from .models import (
+        DIMENSION_DEFINITIONS,
+        ExportData,
+        ExportRating,
+        IdeaForRating,
+        Progress,
+        QueryInfo,
+        QueryWithIdeas,
+        Rater,
+        RaterCreate,
+        RaterProgress,
+        Rating,
+        RatingSubmit,
+        Statistics,
+    )
+except ImportError:
+    import database as db
+    from models import (
+        DIMENSION_DEFINITIONS,
+        ExportData,
+        ExportRating,
+        IdeaForRating,
+        Progress,
+        QueryInfo,
+        QueryWithIdeas,
+        Rater,
+        RaterCreate,
+        RaterProgress,
+        Rating,
+        RatingSubmit,
+        Statistics,
+    )
+
+
+# Load assessment data
+DATA_PATH = Path(__file__).parent.parent / 'data' / 'assessment_items.json'
+
+
+def load_assessment_data() -> dict[str, Any]:
+    """Load the assessment items data."""
+    if not DATA_PATH.exists():
+        raise RuntimeError(f"Assessment data not found at {DATA_PATH}. Run prepare_data.py first.")
+    with open(DATA_PATH, 'r', encoding='utf-8') as f:
+        return json.load(f)
+
+
+# Initialize FastAPI app
+app = FastAPI(
+    title="Creative Idea Assessment API",
+    description="API for human assessment of creative ideas using Torrance-inspired metrics",
+    version="1.0.0"
+)
+
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+# Cache for assessment data
+_assessment_data: dict[str, Any] | None = None
+
+
+def get_assessment_data() -> dict[str, Any]:
+    """Get cached assessment data."""
+    global _assessment_data
+    if _assessment_data is None:
+        _assessment_data = load_assessment_data()
+    return _assessment_data
+
+
+# Rater endpoints
+@app.get("/api/raters", response_model=list[Rater])
+def list_raters() -> list[dict[str, Any]]:
+    """List all registered raters."""
+    return db.list_raters()
+
+
+@app.post("/api/raters", response_model=Rater)
+def create_or_get_rater(rater_data: RaterCreate) -> dict[str, Any]:
+    """Register a new rater or get existing one."""
+    return db.create_rater(rater_data.rater_id, rater_data.name)
+
+
+@app.get("/api/raters/{rater_id}", response_model=Rater)
+def get_rater(rater_id: str) -> dict[str, Any]:
+    """Get a specific rater."""
+    rater = db.get_rater(rater_id)
+    if not rater:
+        raise HTTPException(status_code=404, detail="Rater not found")
+    return rater
+
+
+# Query endpoints
+@app.get("/api/queries", response_model=list[QueryInfo])
+def list_queries() -> list[dict[str, Any]]:
+    """List all queries available for assessment."""
+    data = get_assessment_data()
+    return [
+        {
+            'query_id': q['query_id'],
+            'query_text': q['query_text'],
+            'category': q.get('category', ''),
+            'idea_count': q['idea_count']
+        }
+        for q in data['queries']
+    ]
+
+
+@app.get("/api/queries/{query_id}", response_model=QueryWithIdeas)
+def get_query_with_ideas(query_id: str) -> dict[str, Any]:
+    """Get a query with all its ideas for rating (without hidden metadata)."""
+    data = get_assessment_data()
+
+    for query in data['queries']:
+        if query['query_id'] == query_id:
+            ideas = [
+                IdeaForRating(
+                    idea_id=idea['idea_id'],
+                    text=idea['text'],
+                    index=idx
+                )
+                for idx, idea in enumerate(query['ideas'])
+            ]
+            return QueryWithIdeas(
+                query_id=query['query_id'],
+                query_text=query['query_text'],
+                category=query.get('category', ''),
+                ideas=ideas,
+                total_count=len(ideas)
+            )
+
+    raise HTTPException(status_code=404, detail="Query not found")
+
+
+@app.get("/api/queries/{query_id}/unrated", response_model=QueryWithIdeas)
+def get_unrated_ideas(query_id: str, rater_id: str) -> dict[str, Any]:
+    """Get unrated ideas for a query by a specific rater."""
+    data = get_assessment_data()
+
+    for query in data['queries']:
+        if query['query_id'] == query_id:
+            # Get already rated idea IDs
+            rated_ids = db.get_rated_idea_ids(rater_id, query_id)
+
+            # Filter to unrated ideas
+            unrated_ideas = [
+                IdeaForRating(
+                    idea_id=idea['idea_id'],
+                    text=idea['text'],
+                    index=idx
+                )
+                for idx, idea in enumerate(query['ideas'])
+                if idea['idea_id'] not in rated_ids
+            ]
+
+            return QueryWithIdeas(
+                query_id=query['query_id'],
+                query_text=query['query_text'],
+                category=query.get('category', ''),
+                ideas=unrated_ideas,
+                total_count=query['idea_count']
+            )
+
+    raise HTTPException(status_code=404, detail="Query not found")
+
+
+# Rating endpoints
+@app.post("/api/ratings", response_model=dict[str, Any])
+def submit_rating(rating: RatingSubmit) -> dict[str, Any]:
+    """Submit a rating for an idea."""
+    # Validate that rater exists
+    rater = db.get_rater(rating.rater_id)
+    if not rater:
+        raise HTTPException(status_code=404, detail="Rater not found. Please register first.")
+
+    # Validate idea exists
+    data = get_assessment_data()
+    idea_found = False
+    for query in data['queries']:
+        for idea in query['ideas']:
+            if idea['idea_id'] == rating.idea_id:
+                idea_found = True
+                break
+        if idea_found:
+            break
+
+    if not idea_found:
+        raise HTTPException(status_code=404, detail="Idea not found")
+
+    # If not skipped, require all ratings
+    if not rating.skipped:
+        if rating.originality is None or rating.elaboration is None or rating.coherence is None or rating.usefulness is None:
+            raise HTTPException(
+                status_code=400,
+                detail="All dimensions must be rated unless skipping"
+            )
+
+    # Save rating
+    return db.save_rating(
+        rater_id=rating.rater_id,
+        idea_id=rating.idea_id,
+        query_id=rating.query_id,
+        originality=rating.originality,
+        elaboration=rating.elaboration,
+        coherence=rating.coherence,
+        usefulness=rating.usefulness,
+        skipped=rating.skipped
+    )
+
+
+@app.get("/api/ratings/{rater_id}/{idea_id}", response_model=Rating | None)
+def get_rating(rater_id: str, idea_id: str) -> dict[str, Any] | None:
+    """Get a specific rating."""
+    return db.get_rating(rater_id, idea_id)
+
+
+@app.get("/api/ratings/rater/{rater_id}", response_model=list[Rating])
+def get_ratings_by_rater(rater_id: str) -> list[dict[str, Any]]:
+    """Get all ratings by a rater."""
+    return db.get_ratings_by_rater(rater_id)
+
+
+# Progress endpoints
+@app.get("/api/progress/{rater_id}", response_model=RaterProgress)
+def get_rater_progress(rater_id: str) -> RaterProgress:
+    """Get complete progress for a rater."""
+    rater = db.get_rater(rater_id)
+    if not rater:
+        raise HTTPException(status_code=404, detail="Rater not found")
+
+    data = get_assessment_data()
+
+    # Get rated idea counts per query
+    ratings = db.get_ratings_by_rater(rater_id)
+    ratings_per_query: dict[str, int] = {}
+    for r in ratings:
+        qid = r['query_id']
+        ratings_per_query[qid] = ratings_per_query.get(qid, 0) + 1
+
+    # Build progress list
+    query_progress = []
+    total_completed = 0
+    total_ideas = 0
+
+    for query in data['queries']:
+        qid = query['query_id']
+        completed = ratings_per_query.get(qid, 0)
+        total = query['idea_count']
+
+        query_progress.append(Progress(
+            rater_id=rater_id,
+            query_id=qid,
+            completed_count=completed,
+            total_count=total
+        ))
+
+        total_completed += completed
+        total_ideas += total
+
+    percentage = (total_completed / total_ideas * 100) if total_ideas > 0 else 0
+
+    return RaterProgress(
+        rater_id=rater_id,
+        queries=query_progress,
+        total_completed=total_completed,
+        total_ideas=total_ideas,
+        percentage=round(percentage, 1)
+    )
+
+
+# Statistics endpoint
+@app.get("/api/statistics", response_model=Statistics)
+def get_statistics() -> Statistics:
+    """Get overall assessment statistics."""
+    stats = db.get_statistics()
+    return Statistics(**stats)
+
+
+# Dimension definitions endpoint
+@app.get("/api/dimensions")
+def get_dimensions() -> dict[str, Any]:
+    """Get dimension definitions for the UI."""
+    return DIMENSION_DEFINITIONS
+
+
+# Export endpoint
+@app.get("/api/export", response_model=ExportData)
+def export_ratings() -> ExportData:
+    """Export all ratings with hidden metadata for analysis."""
+    data = get_assessment_data()
+    all_ratings = db.get_all_ratings()
+
+    # Build idea lookup with hidden metadata
+    idea_lookup: dict[str, dict[str, Any]] = {}
+    query_lookup: dict[str, str] = {}
+
+    for query in data['queries']:
+        query_lookup[query['query_id']] = query['query_text']
+        for idea in query['ideas']:
+            idea_lookup[idea['idea_id']] = {
+                'text': idea['text'],
+                'condition': idea['_hidden']['condition'],
+                'expert_name': idea['_hidden']['expert_name'],
+                'keyword': idea['_hidden']['keyword']
+            }
+
+    # Build export ratings
+    export_ratings = []
+    for r in all_ratings:
+        idea_data = idea_lookup.get(r['idea_id'], {})
+        export_ratings.append(ExportRating(
+            rater_id=r['rater_id'],
+            idea_id=r['idea_id'],
+            query_id=r['query_id'],
+            query_text=query_lookup.get(r['query_id'], ''),
+            idea_text=idea_data.get('text', ''),
+            originality=r['originality'],
+            elaboration=r['elaboration'],
+            coherence=r['coherence'],
+            usefulness=r['usefulness'],
+            skipped=bool(r['skipped']),
+            condition=idea_data.get('condition', ''),
+            expert_name=idea_data.get('expert_name', ''),
+            keyword=idea_data.get('keyword', ''),
+            timestamp=r['timestamp']
+        ))
+
+    return ExportData(
+        experiment_id=data['experiment_id'],
+        export_timestamp=datetime.utcnow(),
+        rater_count=len(db.list_raters()),
+        rating_count=len(export_ratings),
+        ratings=export_ratings
+    )
+
+
+# Health check
+@app.get("/api/health")
+def health_check() -> dict[str, str]:
+    """Health check endpoint."""
+    return {"status": "healthy"}
+
+
+# Info endpoint
+@app.get("/api/info")
+def get_info() -> dict[str, Any]:
+    """Get assessment session info."""
+    data = get_assessment_data()
+    return {
+        'experiment_id': data['experiment_id'],
+        'total_ideas': data['total_ideas'],
+        'query_count': data['query_count'],
+        'conditions': data['conditions'],
+        'randomization_seed': data['randomization_seed']
+    }