feat: Add experiments framework and novelty-driven agent loop
- Add complete experiments directory with pilot study infrastructure - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective) - Human assessment tool with React frontend and FastAPI backend - AUT flexibility analysis with jump signal detection - Result visualization and metrics computation - Add novelty-driven agent loop module (experiments/novelty_loop/) - NoveltyDrivenTaskAgent with expert perspective perturbation - Three termination strategies: breakthrough, exhaust, coverage - Interactive CLI demo with colored output - Embedding-based novelty scoring - Add DDC knowledge domain classification data (en/zh) - Add CLAUDE.md project documentation - Update research report with experiment findings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
1
experiments/assessment/backend/__init__.py
Normal file
1
experiments/assessment/backend/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Assessment backend package."""
|
||||
374
experiments/assessment/backend/app.py
Normal file
374
experiments/assessment/backend/app.py
Normal file
@@ -0,0 +1,374 @@
|
||||
"""
|
||||
FastAPI backend for human assessment of creative ideas.
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
try:
|
||||
from . import database as db
|
||||
from .models import (
|
||||
DIMENSION_DEFINITIONS,
|
||||
ExportData,
|
||||
ExportRating,
|
||||
IdeaForRating,
|
||||
Progress,
|
||||
QueryInfo,
|
||||
QueryWithIdeas,
|
||||
Rater,
|
||||
RaterCreate,
|
||||
RaterProgress,
|
||||
Rating,
|
||||
RatingSubmit,
|
||||
Statistics,
|
||||
)
|
||||
except ImportError:
|
||||
import database as db
|
||||
from models import (
|
||||
DIMENSION_DEFINITIONS,
|
||||
ExportData,
|
||||
ExportRating,
|
||||
IdeaForRating,
|
||||
Progress,
|
||||
QueryInfo,
|
||||
QueryWithIdeas,
|
||||
Rater,
|
||||
RaterCreate,
|
||||
RaterProgress,
|
||||
Rating,
|
||||
RatingSubmit,
|
||||
Statistics,
|
||||
)
|
||||
|
||||
|
||||
# Load assessment data
|
||||
DATA_PATH = Path(__file__).parent.parent / 'data' / 'assessment_items.json'
|
||||
|
||||
|
||||
def load_assessment_data() -> dict[str, Any]:
|
||||
"""Load the assessment items data."""
|
||||
if not DATA_PATH.exists():
|
||||
raise RuntimeError(f"Assessment data not found at {DATA_PATH}. Run prepare_data.py first.")
|
||||
with open(DATA_PATH, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
# Initialize FastAPI app
|
||||
app = FastAPI(
|
||||
title="Creative Idea Assessment API",
|
||||
description="API for human assessment of creative ideas using Torrance-inspired metrics",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# CORS middleware
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# Cache for assessment data
|
||||
_assessment_data: dict[str, Any] | None = None
|
||||
|
||||
|
||||
def get_assessment_data() -> dict[str, Any]:
|
||||
"""Get cached assessment data."""
|
||||
global _assessment_data
|
||||
if _assessment_data is None:
|
||||
_assessment_data = load_assessment_data()
|
||||
return _assessment_data
|
||||
|
||||
|
||||
# Rater endpoints
|
||||
@app.get("/api/raters", response_model=list[Rater])
|
||||
def list_raters() -> list[dict[str, Any]]:
|
||||
"""List all registered raters."""
|
||||
return db.list_raters()
|
||||
|
||||
|
||||
@app.post("/api/raters", response_model=Rater)
|
||||
def create_or_get_rater(rater_data: RaterCreate) -> dict[str, Any]:
|
||||
"""Register a new rater or get existing one."""
|
||||
return db.create_rater(rater_data.rater_id, rater_data.name)
|
||||
|
||||
|
||||
@app.get("/api/raters/{rater_id}", response_model=Rater)
|
||||
def get_rater(rater_id: str) -> dict[str, Any]:
|
||||
"""Get a specific rater."""
|
||||
rater = db.get_rater(rater_id)
|
||||
if not rater:
|
||||
raise HTTPException(status_code=404, detail="Rater not found")
|
||||
return rater
|
||||
|
||||
|
||||
# Query endpoints
|
||||
@app.get("/api/queries", response_model=list[QueryInfo])
|
||||
def list_queries() -> list[dict[str, Any]]:
|
||||
"""List all queries available for assessment."""
|
||||
data = get_assessment_data()
|
||||
return [
|
||||
{
|
||||
'query_id': q['query_id'],
|
||||
'query_text': q['query_text'],
|
||||
'category': q.get('category', ''),
|
||||
'idea_count': q['idea_count']
|
||||
}
|
||||
for q in data['queries']
|
||||
]
|
||||
|
||||
|
||||
@app.get("/api/queries/{query_id}", response_model=QueryWithIdeas)
|
||||
def get_query_with_ideas(query_id: str) -> dict[str, Any]:
|
||||
"""Get a query with all its ideas for rating (without hidden metadata)."""
|
||||
data = get_assessment_data()
|
||||
|
||||
for query in data['queries']:
|
||||
if query['query_id'] == query_id:
|
||||
ideas = [
|
||||
IdeaForRating(
|
||||
idea_id=idea['idea_id'],
|
||||
text=idea['text'],
|
||||
index=idx
|
||||
)
|
||||
for idx, idea in enumerate(query['ideas'])
|
||||
]
|
||||
return QueryWithIdeas(
|
||||
query_id=query['query_id'],
|
||||
query_text=query['query_text'],
|
||||
category=query.get('category', ''),
|
||||
ideas=ideas,
|
||||
total_count=len(ideas)
|
||||
)
|
||||
|
||||
raise HTTPException(status_code=404, detail="Query not found")
|
||||
|
||||
|
||||
@app.get("/api/queries/{query_id}/unrated", response_model=QueryWithIdeas)
|
||||
def get_unrated_ideas(query_id: str, rater_id: str) -> dict[str, Any]:
|
||||
"""Get unrated ideas for a query by a specific rater."""
|
||||
data = get_assessment_data()
|
||||
|
||||
for query in data['queries']:
|
||||
if query['query_id'] == query_id:
|
||||
# Get already rated idea IDs
|
||||
rated_ids = db.get_rated_idea_ids(rater_id, query_id)
|
||||
|
||||
# Filter to unrated ideas
|
||||
unrated_ideas = [
|
||||
IdeaForRating(
|
||||
idea_id=idea['idea_id'],
|
||||
text=idea['text'],
|
||||
index=idx
|
||||
)
|
||||
for idx, idea in enumerate(query['ideas'])
|
||||
if idea['idea_id'] not in rated_ids
|
||||
]
|
||||
|
||||
return QueryWithIdeas(
|
||||
query_id=query['query_id'],
|
||||
query_text=query['query_text'],
|
||||
category=query.get('category', ''),
|
||||
ideas=unrated_ideas,
|
||||
total_count=query['idea_count']
|
||||
)
|
||||
|
||||
raise HTTPException(status_code=404, detail="Query not found")
|
||||
|
||||
|
||||
# Rating endpoints
|
||||
@app.post("/api/ratings", response_model=dict[str, Any])
|
||||
def submit_rating(rating: RatingSubmit) -> dict[str, Any]:
|
||||
"""Submit a rating for an idea."""
|
||||
# Validate that rater exists
|
||||
rater = db.get_rater(rating.rater_id)
|
||||
if not rater:
|
||||
raise HTTPException(status_code=404, detail="Rater not found. Please register first.")
|
||||
|
||||
# Validate idea exists
|
||||
data = get_assessment_data()
|
||||
idea_found = False
|
||||
for query in data['queries']:
|
||||
for idea in query['ideas']:
|
||||
if idea['idea_id'] == rating.idea_id:
|
||||
idea_found = True
|
||||
break
|
||||
if idea_found:
|
||||
break
|
||||
|
||||
if not idea_found:
|
||||
raise HTTPException(status_code=404, detail="Idea not found")
|
||||
|
||||
# If not skipped, require all ratings
|
||||
if not rating.skipped:
|
||||
if rating.originality is None or rating.elaboration is None or rating.coherence is None or rating.usefulness is None:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="All dimensions must be rated unless skipping"
|
||||
)
|
||||
|
||||
# Save rating
|
||||
return db.save_rating(
|
||||
rater_id=rating.rater_id,
|
||||
idea_id=rating.idea_id,
|
||||
query_id=rating.query_id,
|
||||
originality=rating.originality,
|
||||
elaboration=rating.elaboration,
|
||||
coherence=rating.coherence,
|
||||
usefulness=rating.usefulness,
|
||||
skipped=rating.skipped
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/ratings/{rater_id}/{idea_id}", response_model=Rating | None)
|
||||
def get_rating(rater_id: str, idea_id: str) -> dict[str, Any] | None:
|
||||
"""Get a specific rating."""
|
||||
return db.get_rating(rater_id, idea_id)
|
||||
|
||||
|
||||
@app.get("/api/ratings/rater/{rater_id}", response_model=list[Rating])
|
||||
def get_ratings_by_rater(rater_id: str) -> list[dict[str, Any]]:
|
||||
"""Get all ratings by a rater."""
|
||||
return db.get_ratings_by_rater(rater_id)
|
||||
|
||||
|
||||
# Progress endpoints
|
||||
@app.get("/api/progress/{rater_id}", response_model=RaterProgress)
|
||||
def get_rater_progress(rater_id: str) -> RaterProgress:
|
||||
"""Get complete progress for a rater."""
|
||||
rater = db.get_rater(rater_id)
|
||||
if not rater:
|
||||
raise HTTPException(status_code=404, detail="Rater not found")
|
||||
|
||||
data = get_assessment_data()
|
||||
|
||||
# Get rated idea counts per query
|
||||
ratings = db.get_ratings_by_rater(rater_id)
|
||||
ratings_per_query: dict[str, int] = {}
|
||||
for r in ratings:
|
||||
qid = r['query_id']
|
||||
ratings_per_query[qid] = ratings_per_query.get(qid, 0) + 1
|
||||
|
||||
# Build progress list
|
||||
query_progress = []
|
||||
total_completed = 0
|
||||
total_ideas = 0
|
||||
|
||||
for query in data['queries']:
|
||||
qid = query['query_id']
|
||||
completed = ratings_per_query.get(qid, 0)
|
||||
total = query['idea_count']
|
||||
|
||||
query_progress.append(Progress(
|
||||
rater_id=rater_id,
|
||||
query_id=qid,
|
||||
completed_count=completed,
|
||||
total_count=total
|
||||
))
|
||||
|
||||
total_completed += completed
|
||||
total_ideas += total
|
||||
|
||||
percentage = (total_completed / total_ideas * 100) if total_ideas > 0 else 0
|
||||
|
||||
return RaterProgress(
|
||||
rater_id=rater_id,
|
||||
queries=query_progress,
|
||||
total_completed=total_completed,
|
||||
total_ideas=total_ideas,
|
||||
percentage=round(percentage, 1)
|
||||
)
|
||||
|
||||
|
||||
# Statistics endpoint
|
||||
@app.get("/api/statistics", response_model=Statistics)
|
||||
def get_statistics() -> Statistics:
|
||||
"""Get overall assessment statistics."""
|
||||
stats = db.get_statistics()
|
||||
return Statistics(**stats)
|
||||
|
||||
|
||||
# Dimension definitions endpoint
|
||||
@app.get("/api/dimensions")
|
||||
def get_dimensions() -> dict[str, Any]:
|
||||
"""Get dimension definitions for the UI."""
|
||||
return DIMENSION_DEFINITIONS
|
||||
|
||||
|
||||
# Export endpoint
|
||||
@app.get("/api/export", response_model=ExportData)
|
||||
def export_ratings() -> ExportData:
|
||||
"""Export all ratings with hidden metadata for analysis."""
|
||||
data = get_assessment_data()
|
||||
all_ratings = db.get_all_ratings()
|
||||
|
||||
# Build idea lookup with hidden metadata
|
||||
idea_lookup: dict[str, dict[str, Any]] = {}
|
||||
query_lookup: dict[str, str] = {}
|
||||
|
||||
for query in data['queries']:
|
||||
query_lookup[query['query_id']] = query['query_text']
|
||||
for idea in query['ideas']:
|
||||
idea_lookup[idea['idea_id']] = {
|
||||
'text': idea['text'],
|
||||
'condition': idea['_hidden']['condition'],
|
||||
'expert_name': idea['_hidden']['expert_name'],
|
||||
'keyword': idea['_hidden']['keyword']
|
||||
}
|
||||
|
||||
# Build export ratings
|
||||
export_ratings = []
|
||||
for r in all_ratings:
|
||||
idea_data = idea_lookup.get(r['idea_id'], {})
|
||||
export_ratings.append(ExportRating(
|
||||
rater_id=r['rater_id'],
|
||||
idea_id=r['idea_id'],
|
||||
query_id=r['query_id'],
|
||||
query_text=query_lookup.get(r['query_id'], ''),
|
||||
idea_text=idea_data.get('text', ''),
|
||||
originality=r['originality'],
|
||||
elaboration=r['elaboration'],
|
||||
coherence=r['coherence'],
|
||||
usefulness=r['usefulness'],
|
||||
skipped=bool(r['skipped']),
|
||||
condition=idea_data.get('condition', ''),
|
||||
expert_name=idea_data.get('expert_name', ''),
|
||||
keyword=idea_data.get('keyword', ''),
|
||||
timestamp=r['timestamp']
|
||||
))
|
||||
|
||||
return ExportData(
|
||||
experiment_id=data['experiment_id'],
|
||||
export_timestamp=datetime.utcnow(),
|
||||
rater_count=len(db.list_raters()),
|
||||
rating_count=len(export_ratings),
|
||||
ratings=export_ratings
|
||||
)
|
||||
|
||||
|
||||
# Health check
|
||||
@app.get("/api/health")
|
||||
def health_check() -> dict[str, str]:
|
||||
"""Health check endpoint."""
|
||||
return {"status": "healthy"}
|
||||
|
||||
|
||||
# Info endpoint
|
||||
@app.get("/api/info")
|
||||
def get_info() -> dict[str, Any]:
|
||||
"""Get assessment session info."""
|
||||
data = get_assessment_data()
|
||||
return {
|
||||
'experiment_id': data['experiment_id'],
|
||||
'total_ideas': data['total_ideas'],
|
||||
'query_count': data['query_count'],
|
||||
'conditions': data['conditions'],
|
||||
'randomization_seed': data['randomization_seed']
|
||||
}
|
||||
309
experiments/assessment/backend/database.py
Normal file
309
experiments/assessment/backend/database.py
Normal file
@@ -0,0 +1,309 @@
|
||||
"""
|
||||
SQLite database setup and operations for assessment ratings storage.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Generator
|
||||
|
||||
|
||||
# Database path
|
||||
DB_PATH = Path(__file__).parent.parent / 'results' / 'ratings.db'
|
||||
|
||||
|
||||
def get_db_path() -> Path:
|
||||
"""Get the database path, ensuring directory exists."""
|
||||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
return DB_PATH
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_connection() -> Generator[sqlite3.Connection, None, None]:
|
||||
"""Get a database connection as a context manager."""
|
||||
conn = sqlite3.connect(get_db_path())
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
"""Initialize the database with required tables."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Raters table
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS raters (
|
||||
rater_id TEXT PRIMARY KEY,
|
||||
name TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
''')
|
||||
|
||||
# Ratings table
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS ratings (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
rater_id TEXT NOT NULL,
|
||||
idea_id TEXT NOT NULL,
|
||||
query_id TEXT NOT NULL,
|
||||
originality INTEGER CHECK(originality BETWEEN 1 AND 5),
|
||||
elaboration INTEGER CHECK(elaboration BETWEEN 1 AND 5),
|
||||
coherence INTEGER CHECK(coherence BETWEEN 1 AND 5),
|
||||
usefulness INTEGER CHECK(usefulness BETWEEN 1 AND 5),
|
||||
skipped INTEGER DEFAULT 0,
|
||||
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (rater_id) REFERENCES raters(rater_id),
|
||||
UNIQUE(rater_id, idea_id)
|
||||
)
|
||||
''')
|
||||
|
||||
# Progress table
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS progress (
|
||||
rater_id TEXT NOT NULL,
|
||||
query_id TEXT NOT NULL,
|
||||
completed_count INTEGER DEFAULT 0,
|
||||
total_count INTEGER DEFAULT 0,
|
||||
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (rater_id, query_id),
|
||||
FOREIGN KEY (rater_id) REFERENCES raters(rater_id)
|
||||
)
|
||||
''')
|
||||
|
||||
# Create indexes for common queries
|
||||
cursor.execute('''
|
||||
CREATE INDEX IF NOT EXISTS idx_ratings_rater
|
||||
ON ratings(rater_id)
|
||||
''')
|
||||
cursor.execute('''
|
||||
CREATE INDEX IF NOT EXISTS idx_ratings_idea
|
||||
ON ratings(idea_id)
|
||||
''')
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
# Rater operations
|
||||
def create_rater(rater_id: str, name: str | None = None) -> dict[str, Any]:
|
||||
"""Create a new rater."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
try:
|
||||
cursor.execute(
|
||||
'INSERT INTO raters (rater_id, name) VALUES (?, ?)',
|
||||
(rater_id, name or rater_id)
|
||||
)
|
||||
conn.commit()
|
||||
return {'rater_id': rater_id, 'name': name or rater_id, 'created': True}
|
||||
except sqlite3.IntegrityError:
|
||||
# Rater already exists
|
||||
return get_rater(rater_id)
|
||||
|
||||
|
||||
def get_rater(rater_id: str) -> dict[str, Any] | None:
|
||||
"""Get a rater by ID."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT * FROM raters WHERE rater_id = ?', (rater_id,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
|
||||
def list_raters() -> list[dict[str, Any]]:
|
||||
"""List all raters."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT * FROM raters ORDER BY created_at')
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
|
||||
# Rating operations
|
||||
def save_rating(
|
||||
rater_id: str,
|
||||
idea_id: str,
|
||||
query_id: str,
|
||||
originality: int | None,
|
||||
elaboration: int | None,
|
||||
coherence: int | None,
|
||||
usefulness: int | None,
|
||||
skipped: bool = False
|
||||
) -> dict[str, Any]:
|
||||
"""Save or update a rating."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
INSERT INTO ratings (rater_id, idea_id, query_id, originality, elaboration, coherence, usefulness, skipped, timestamp)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(rater_id, idea_id) DO UPDATE SET
|
||||
originality = excluded.originality,
|
||||
elaboration = excluded.elaboration,
|
||||
coherence = excluded.coherence,
|
||||
usefulness = excluded.usefulness,
|
||||
skipped = excluded.skipped,
|
||||
timestamp = excluded.timestamp
|
||||
''', (rater_id, idea_id, query_id, originality, elaboration, coherence, usefulness, int(skipped), datetime.utcnow()))
|
||||
conn.commit()
|
||||
|
||||
# Update progress
|
||||
update_progress(rater_id, query_id)
|
||||
|
||||
return {
|
||||
'rater_id': rater_id,
|
||||
'idea_id': idea_id,
|
||||
'saved': True
|
||||
}
|
||||
|
||||
|
||||
def get_rating(rater_id: str, idea_id: str) -> dict[str, Any] | None:
|
||||
"""Get a specific rating."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
'SELECT * FROM ratings WHERE rater_id = ? AND idea_id = ?',
|
||||
(rater_id, idea_id)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
|
||||
def get_ratings_by_rater(rater_id: str) -> list[dict[str, Any]]:
|
||||
"""Get all ratings by a rater."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
'SELECT * FROM ratings WHERE rater_id = ? ORDER BY timestamp',
|
||||
(rater_id,)
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
|
||||
def get_ratings_by_idea(idea_id: str) -> list[dict[str, Any]]:
|
||||
"""Get all ratings for an idea."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
'SELECT * FROM ratings WHERE idea_id = ? ORDER BY rater_id',
|
||||
(idea_id,)
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
|
||||
def get_all_ratings() -> list[dict[str, Any]]:
|
||||
"""Get all ratings."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT * FROM ratings ORDER BY timestamp')
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
|
||||
# Progress operations
|
||||
def update_progress(rater_id: str, query_id: str) -> None:
|
||||
"""Update progress for a rater on a query."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Count completed ratings for this query
|
||||
cursor.execute('''
|
||||
SELECT COUNT(*) as count FROM ratings
|
||||
WHERE rater_id = ? AND query_id = ?
|
||||
''', (rater_id, query_id))
|
||||
completed = cursor.fetchone()['count']
|
||||
|
||||
# Update or insert progress
|
||||
cursor.execute('''
|
||||
INSERT INTO progress (rater_id, query_id, completed_count, updated_at)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(rater_id, query_id) DO UPDATE SET
|
||||
completed_count = excluded.completed_count,
|
||||
updated_at = excluded.updated_at
|
||||
''', (rater_id, query_id, completed, datetime.utcnow()))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def set_progress_total(rater_id: str, query_id: str, total: int) -> None:
|
||||
"""Set the total count for a query's progress."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
INSERT INTO progress (rater_id, query_id, total_count, completed_count)
|
||||
VALUES (?, ?, ?, 0)
|
||||
ON CONFLICT(rater_id, query_id) DO UPDATE SET
|
||||
total_count = excluded.total_count
|
||||
''', (rater_id, query_id, total))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_progress(rater_id: str) -> list[dict[str, Any]]:
|
||||
"""Get progress for all queries for a rater."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
'SELECT * FROM progress WHERE rater_id = ? ORDER BY query_id',
|
||||
(rater_id,)
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
|
||||
def get_progress_for_query(rater_id: str, query_id: str) -> dict[str, Any] | None:
|
||||
"""Get progress for a specific query."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
'SELECT * FROM progress WHERE rater_id = ? AND query_id = ?',
|
||||
(rater_id, query_id)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
|
||||
def get_rated_idea_ids(rater_id: str, query_id: str) -> set[str]:
|
||||
"""Get the set of idea IDs already rated by a rater for a query."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
'SELECT idea_id FROM ratings WHERE rater_id = ? AND query_id = ?',
|
||||
(rater_id, query_id)
|
||||
)
|
||||
return {row['idea_id'] for row in cursor.fetchall()}
|
||||
|
||||
|
||||
# Statistics
|
||||
def get_statistics() -> dict[str, Any]:
|
||||
"""Get overall statistics."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('SELECT COUNT(*) as count FROM raters')
|
||||
rater_count = cursor.fetchone()['count']
|
||||
|
||||
cursor.execute('SELECT COUNT(*) as count FROM ratings WHERE skipped = 0')
|
||||
rating_count = cursor.fetchone()['count']
|
||||
|
||||
cursor.execute('SELECT COUNT(*) as count FROM ratings WHERE skipped = 1')
|
||||
skip_count = cursor.fetchone()['count']
|
||||
|
||||
cursor.execute('SELECT COUNT(DISTINCT idea_id) as count FROM ratings')
|
||||
rated_ideas = cursor.fetchone()['count']
|
||||
|
||||
return {
|
||||
'rater_count': rater_count,
|
||||
'rating_count': rating_count,
|
||||
'skip_count': skip_count,
|
||||
'rated_ideas': rated_ideas
|
||||
}
|
||||
|
||||
|
||||
# Initialize on import
|
||||
init_db()
|
||||
183
experiments/assessment/backend/models.py
Normal file
183
experiments/assessment/backend/models.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""
|
||||
Pydantic models for the assessment API.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# Request models
|
||||
class RaterCreate(BaseModel):
|
||||
"""Request to create or login as a rater."""
|
||||
rater_id: str = Field(..., min_length=1, max_length=50, description="Unique rater identifier")
|
||||
name: str | None = Field(None, max_length=100, description="Optional display name")
|
||||
|
||||
|
||||
class RatingSubmit(BaseModel):
|
||||
"""Request to submit a rating."""
|
||||
rater_id: str = Field(..., description="Rater identifier")
|
||||
idea_id: str = Field(..., description="Idea identifier")
|
||||
query_id: str = Field(..., description="Query identifier")
|
||||
originality: int | None = Field(None, ge=1, le=5, description="Originality score 1-5")
|
||||
elaboration: int | None = Field(None, ge=1, le=5, description="Elaboration score 1-5")
|
||||
coherence: int | None = Field(None, ge=1, le=5, description="Coherence score 1-5")
|
||||
usefulness: int | None = Field(None, ge=1, le=5, description="Usefulness score 1-5")
|
||||
skipped: bool = Field(False, description="Whether the idea was skipped")
|
||||
|
||||
|
||||
# Response models
|
||||
class Rater(BaseModel):
|
||||
"""Rater information."""
|
||||
rater_id: str
|
||||
name: str | None
|
||||
created_at: datetime | None = None
|
||||
|
||||
|
||||
class Rating(BaseModel):
|
||||
"""A single rating."""
|
||||
id: int
|
||||
rater_id: str
|
||||
idea_id: str
|
||||
query_id: str
|
||||
originality: int | None
|
||||
elaboration: int | None
|
||||
coherence: int | None
|
||||
usefulness: int | None
|
||||
skipped: int
|
||||
timestamp: datetime | None
|
||||
|
||||
|
||||
class Progress(BaseModel):
|
||||
"""Progress for a rater on a query."""
|
||||
rater_id: str
|
||||
query_id: str
|
||||
completed_count: int
|
||||
total_count: int
|
||||
started_at: datetime | None = None
|
||||
updated_at: datetime | None = None
|
||||
|
||||
|
||||
class QueryInfo(BaseModel):
|
||||
"""Information about a query."""
|
||||
query_id: str
|
||||
query_text: str
|
||||
category: str
|
||||
idea_count: int
|
||||
|
||||
|
||||
class IdeaForRating(BaseModel):
|
||||
"""An idea presented for rating (without hidden metadata)."""
|
||||
idea_id: str
|
||||
text: str
|
||||
index: int # Position in the randomized list for this query
|
||||
|
||||
|
||||
class QueryWithIdeas(BaseModel):
|
||||
"""A query with its ideas for rating."""
|
||||
query_id: str
|
||||
query_text: str
|
||||
category: str
|
||||
ideas: list[IdeaForRating]
|
||||
total_count: int
|
||||
|
||||
|
||||
class Statistics(BaseModel):
|
||||
"""Overall statistics."""
|
||||
rater_count: int
|
||||
rating_count: int
|
||||
skip_count: int
|
||||
rated_ideas: int
|
||||
|
||||
|
||||
class RaterProgress(BaseModel):
|
||||
"""Complete progress summary for a rater."""
|
||||
rater_id: str
|
||||
queries: list[Progress]
|
||||
total_completed: int
|
||||
total_ideas: int
|
||||
percentage: float
|
||||
|
||||
|
||||
# Export response models
|
||||
class ExportRating(BaseModel):
|
||||
"""Rating with hidden metadata for export."""
|
||||
rater_id: str
|
||||
idea_id: str
|
||||
query_id: str
|
||||
query_text: str
|
||||
idea_text: str
|
||||
originality: int | None
|
||||
elaboration: int | None
|
||||
coherence: int | None
|
||||
usefulness: int | None
|
||||
skipped: bool
|
||||
condition: str
|
||||
expert_name: str
|
||||
keyword: str
|
||||
timestamp: datetime | None
|
||||
|
||||
|
||||
class ExportData(BaseModel):
|
||||
"""Full export data structure."""
|
||||
experiment_id: str
|
||||
export_timestamp: datetime
|
||||
rater_count: int
|
||||
rating_count: int
|
||||
ratings: list[ExportRating]
|
||||
|
||||
|
||||
# Dimension definitions (for frontend)
|
||||
DIMENSION_DEFINITIONS = {
|
||||
"originality": {
|
||||
"name": "Originality",
|
||||
"question": "How unexpected or surprising is this idea? Would most people NOT think of this?",
|
||||
"scale": {
|
||||
1: "Very common/obvious idea anyone would suggest",
|
||||
2: "Somewhat common, slight variation on expected ideas",
|
||||
3: "Moderately original, some unexpected elements",
|
||||
4: "Quite original, notably different approach",
|
||||
5: "Highly unexpected, truly novel concept"
|
||||
},
|
||||
"low_label": "Common",
|
||||
"high_label": "Unexpected"
|
||||
},
|
||||
"elaboration": {
|
||||
"name": "Elaboration",
|
||||
"question": "How detailed and well-developed is this idea?",
|
||||
"scale": {
|
||||
1: "Vague, minimal detail, just a concept",
|
||||
2: "Basic idea with little specificity",
|
||||
3: "Moderately detailed, some specifics provided",
|
||||
4: "Well-developed with clear implementation hints",
|
||||
5: "Highly specific, thoroughly developed concept"
|
||||
},
|
||||
"low_label": "Vague",
|
||||
"high_label": "Detailed"
|
||||
},
|
||||
"coherence": {
|
||||
"name": "Coherence",
|
||||
"question": "Does this idea make logical sense and relate to the query object?",
|
||||
"scale": {
|
||||
1: "Nonsensical, irrelevant, or incomprehensible",
|
||||
2: "Mostly unclear, weak connection to query",
|
||||
3: "Partially coherent, some logical gaps",
|
||||
4: "Mostly coherent with minor issues",
|
||||
5: "Fully coherent, clearly relates to query"
|
||||
},
|
||||
"low_label": "Nonsense",
|
||||
"high_label": "Coherent"
|
||||
},
|
||||
"usefulness": {
|
||||
"name": "Usefulness",
|
||||
"question": "Could this idea have practical value or inspire real innovation?",
|
||||
"scale": {
|
||||
1: "No practical value whatsoever",
|
||||
2: "Minimal usefulness, highly impractical",
|
||||
3: "Some potential value with major limitations",
|
||||
4: "Useful idea with realistic applications",
|
||||
5: "Highly useful, clear practical value"
|
||||
},
|
||||
"low_label": "Useless",
|
||||
"high_label": "Useful"
|
||||
}
|
||||
}
|
||||
3
experiments/assessment/backend/requirements.txt
Normal file
3
experiments/assessment/backend/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
fastapi>=0.109.0
|
||||
uvicorn>=0.27.0
|
||||
pydantic>=2.5.0
|
||||
Reference in New Issue
Block a user