feat: Add experiments framework and novelty-driven agent loop

- Add complete experiments directory with pilot study infrastructure - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective) - Human assessment tool with React frontend and FastAPI backend - AUT flexibility analysis with jump signal detection - Result visualization and metrics computation - Add novelty-driven agent loop module (experiments/novelty_loop/) - NoveltyDrivenTaskAgent with expert perspective perturbation - Three termination strategies: breakthrough, exhaust, coverage - Interactive CLI demo with colored output - Embedding-based novelty scoring - Add DDC knowledge domain classification data (en/zh) - Add CLAUDE.md project documentation - Update research report with experiment findings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 10:16:21 +08:00
parent 26a56a2a07
commit 43c025e060
81 changed files with 18766 additions and 2 deletions
--- a/experiments/assessment/backend/models.py
+++ b/experiments/assessment/backend/models.py
@@ -0,0 +1,183 @@
+"""
+Pydantic models for the assessment API.
+"""
+
+from datetime import datetime
+from pydantic import BaseModel, Field
+
+
+# Request models
+class RaterCreate(BaseModel):
+    """Request to create or login as a rater."""
+    rater_id: str = Field(..., min_length=1, max_length=50, description="Unique rater identifier")
+    name: str | None = Field(None, max_length=100, description="Optional display name")
+
+
+class RatingSubmit(BaseModel):
+    """Request to submit a rating."""
+    rater_id: str = Field(..., description="Rater identifier")
+    idea_id: str = Field(..., description="Idea identifier")
+    query_id: str = Field(..., description="Query identifier")
+    originality: int | None = Field(None, ge=1, le=5, description="Originality score 1-5")
+    elaboration: int | None = Field(None, ge=1, le=5, description="Elaboration score 1-5")
+    coherence: int | None = Field(None, ge=1, le=5, description="Coherence score 1-5")
+    usefulness: int | None = Field(None, ge=1, le=5, description="Usefulness score 1-5")
+    skipped: bool = Field(False, description="Whether the idea was skipped")
+
+
+# Response models
+class Rater(BaseModel):
+    """Rater information."""
+    rater_id: str
+    name: str | None
+    created_at: datetime | None = None
+
+
+class Rating(BaseModel):
+    """A single rating."""
+    id: int
+    rater_id: str
+    idea_id: str
+    query_id: str
+    originality: int | None
+    elaboration: int | None
+    coherence: int | None
+    usefulness: int | None
+    skipped: int
+    timestamp: datetime | None
+
+
+class Progress(BaseModel):
+    """Progress for a rater on a query."""
+    rater_id: str
+    query_id: str
+    completed_count: int
+    total_count: int
+    started_at: datetime | None = None
+    updated_at: datetime | None = None
+
+
+class QueryInfo(BaseModel):
+    """Information about a query."""
+    query_id: str
+    query_text: str
+    category: str
+    idea_count: int
+
+
+class IdeaForRating(BaseModel):
+    """An idea presented for rating (without hidden metadata)."""
+    idea_id: str
+    text: str
+    index: int  # Position in the randomized list for this query
+
+
+class QueryWithIdeas(BaseModel):
+    """A query with its ideas for rating."""
+    query_id: str
+    query_text: str
+    category: str
+    ideas: list[IdeaForRating]
+    total_count: int
+
+
+class Statistics(BaseModel):
+    """Overall statistics."""
+    rater_count: int
+    rating_count: int
+    skip_count: int
+    rated_ideas: int
+
+
+class RaterProgress(BaseModel):
+    """Complete progress summary for a rater."""
+    rater_id: str
+    queries: list[Progress]
+    total_completed: int
+    total_ideas: int
+    percentage: float
+
+
+# Export response models
+class ExportRating(BaseModel):
+    """Rating with hidden metadata for export."""
+    rater_id: str
+    idea_id: str
+    query_id: str
+    query_text: str
+    idea_text: str
+    originality: int | None
+    elaboration: int | None
+    coherence: int | None
+    usefulness: int | None
+    skipped: bool
+    condition: str
+    expert_name: str
+    keyword: str
+    timestamp: datetime | None
+
+
+class ExportData(BaseModel):
+    """Full export data structure."""
+    experiment_id: str
+    export_timestamp: datetime
+    rater_count: int
+    rating_count: int
+    ratings: list[ExportRating]
+
+
+# Dimension definitions (for frontend)
+DIMENSION_DEFINITIONS = {
+    "originality": {
+        "name": "Originality",
+        "question": "How unexpected or surprising is this idea? Would most people NOT think of this?",
+        "scale": {
+            1: "Very common/obvious idea anyone would suggest",
+            2: "Somewhat common, slight variation on expected ideas",
+            3: "Moderately original, some unexpected elements",
+            4: "Quite original, notably different approach",
+            5: "Highly unexpected, truly novel concept"
+        },
+        "low_label": "Common",
+        "high_label": "Unexpected"
+    },
+    "elaboration": {
+        "name": "Elaboration",
+        "question": "How detailed and well-developed is this idea?",
+        "scale": {
+            1: "Vague, minimal detail, just a concept",
+            2: "Basic idea with little specificity",
+            3: "Moderately detailed, some specifics provided",
+            4: "Well-developed with clear implementation hints",
+            5: "Highly specific, thoroughly developed concept"
+        },
+        "low_label": "Vague",
+        "high_label": "Detailed"
+    },
+    "coherence": {
+        "name": "Coherence",
+        "question": "Does this idea make logical sense and relate to the query object?",
+        "scale": {
+            1: "Nonsensical, irrelevant, or incomprehensible",
+            2: "Mostly unclear, weak connection to query",
+            3: "Partially coherent, some logical gaps",
+            4: "Mostly coherent with minor issues",
+            5: "Fully coherent, clearly relates to query"
+        },
+        "low_label": "Nonsense",
+        "high_label": "Coherent"
+    },
+    "usefulness": {
+        "name": "Usefulness",
+        "question": "Could this idea have practical value or inspire real innovation?",
+        "scale": {
+            1: "No practical value whatsoever",
+            2: "Minimal usefulness, highly impractical",
+            3: "Some potential value with major limitations",
+            4: "Useful idea with realistic applications",
+            5: "Highly useful, clear practical value"
+        },
+        "low_label": "Useless",
+        "high_label": "Useful"
+    }
+}