Files
gbanyan 43c025e060 feat: Add experiments framework and novelty-driven agent loop
- Add complete experiments directory with pilot study infrastructure
  - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective)
  - Human assessment tool with React frontend and FastAPI backend
  - AUT flexibility analysis with jump signal detection
  - Result visualization and metrics computation

- Add novelty-driven agent loop module (experiments/novelty_loop/)
  - NoveltyDrivenTaskAgent with expert perspective perturbation
  - Three termination strategies: breakthrough, exhaust, coverage
  - Interactive CLI demo with colored output
  - Embedding-based novelty scoring

- Add DDC knowledge domain classification data (en/zh)
- Add CLAUDE.md project documentation
- Update research report with experiment findings

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 10:16:21 +08:00

184 lines
5.3 KiB
Python

"""
Pydantic models for the assessment API.
"""
from datetime import datetime
from pydantic import BaseModel, Field
# Request models
class RaterCreate(BaseModel):
"""Request to create or login as a rater."""
rater_id: str = Field(..., min_length=1, max_length=50, description="Unique rater identifier")
name: str | None = Field(None, max_length=100, description="Optional display name")
class RatingSubmit(BaseModel):
"""Request to submit a rating."""
rater_id: str = Field(..., description="Rater identifier")
idea_id: str = Field(..., description="Idea identifier")
query_id: str = Field(..., description="Query identifier")
originality: int | None = Field(None, ge=1, le=5, description="Originality score 1-5")
elaboration: int | None = Field(None, ge=1, le=5, description="Elaboration score 1-5")
coherence: int | None = Field(None, ge=1, le=5, description="Coherence score 1-5")
usefulness: int | None = Field(None, ge=1, le=5, description="Usefulness score 1-5")
skipped: bool = Field(False, description="Whether the idea was skipped")
# Response models
class Rater(BaseModel):
"""Rater information."""
rater_id: str
name: str | None
created_at: datetime | None = None
class Rating(BaseModel):
"""A single rating."""
id: int
rater_id: str
idea_id: str
query_id: str
originality: int | None
elaboration: int | None
coherence: int | None
usefulness: int | None
skipped: int
timestamp: datetime | None
class Progress(BaseModel):
"""Progress for a rater on a query."""
rater_id: str
query_id: str
completed_count: int
total_count: int
started_at: datetime | None = None
updated_at: datetime | None = None
class QueryInfo(BaseModel):
"""Information about a query."""
query_id: str
query_text: str
category: str
idea_count: int
class IdeaForRating(BaseModel):
"""An idea presented for rating (without hidden metadata)."""
idea_id: str
text: str
index: int # Position in the randomized list for this query
class QueryWithIdeas(BaseModel):
"""A query with its ideas for rating."""
query_id: str
query_text: str
category: str
ideas: list[IdeaForRating]
total_count: int
class Statistics(BaseModel):
"""Overall statistics."""
rater_count: int
rating_count: int
skip_count: int
rated_ideas: int
class RaterProgress(BaseModel):
"""Complete progress summary for a rater."""
rater_id: str
queries: list[Progress]
total_completed: int
total_ideas: int
percentage: float
# Export response models
class ExportRating(BaseModel):
"""Rating with hidden metadata for export."""
rater_id: str
idea_id: str
query_id: str
query_text: str
idea_text: str
originality: int | None
elaboration: int | None
coherence: int | None
usefulness: int | None
skipped: bool
condition: str
expert_name: str
keyword: str
timestamp: datetime | None
class ExportData(BaseModel):
"""Full export data structure."""
experiment_id: str
export_timestamp: datetime
rater_count: int
rating_count: int
ratings: list[ExportRating]
# Dimension definitions (for frontend)
DIMENSION_DEFINITIONS = {
"originality": {
"name": "Originality",
"question": "How unexpected or surprising is this idea? Would most people NOT think of this?",
"scale": {
1: "Very common/obvious idea anyone would suggest",
2: "Somewhat common, slight variation on expected ideas",
3: "Moderately original, some unexpected elements",
4: "Quite original, notably different approach",
5: "Highly unexpected, truly novel concept"
},
"low_label": "Common",
"high_label": "Unexpected"
},
"elaboration": {
"name": "Elaboration",
"question": "How detailed and well-developed is this idea?",
"scale": {
1: "Vague, minimal detail, just a concept",
2: "Basic idea with little specificity",
3: "Moderately detailed, some specifics provided",
4: "Well-developed with clear implementation hints",
5: "Highly specific, thoroughly developed concept"
},
"low_label": "Vague",
"high_label": "Detailed"
},
"coherence": {
"name": "Coherence",
"question": "Does this idea make logical sense and relate to the query object?",
"scale": {
1: "Nonsensical, irrelevant, or incomprehensible",
2: "Mostly unclear, weak connection to query",
3: "Partially coherent, some logical gaps",
4: "Mostly coherent with minor issues",
5: "Fully coherent, clearly relates to query"
},
"low_label": "Nonsense",
"high_label": "Coherent"
},
"usefulness": {
"name": "Usefulness",
"question": "Could this idea have practical value or inspire real innovation?",
"scale": {
1: "No practical value whatsoever",
2: "Minimal usefulness, highly impractical",
3: "Some potential value with major limitations",
4: "Useful idea with realistic applications",
5: "Highly useful, clear practical value"
},
"low_label": "Useless",
"high_label": "Useful"
}
}