Files
novelty-seeking/experiments/novelty_loop/agent.py
gbanyan 43c025e060 feat: Add experiments framework and novelty-driven agent loop
- Add complete experiments directory with pilot study infrastructure
  - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective)
  - Human assessment tool with React frontend and FastAPI backend
  - AUT flexibility analysis with jump signal detection
  - Result visualization and metrics computation

- Add novelty-driven agent loop module (experiments/novelty_loop/)
  - NoveltyDrivenTaskAgent with expert perspective perturbation
  - Three termination strategies: breakthrough, exhaust, coverage
  - Interactive CLI demo with colored output
  - Embedding-based novelty scoring

- Add DDC knowledge domain classification data (en/zh)
- Add CLAUDE.md project documentation
- Update research report with experiment findings

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 10:16:21 +08:00

726 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Novelty-Driven Task Agent - An autonomous agent that generates tasks using novelty as termination condition.
This agent operates in a while loop, generating tasks from diverse expert perspectives,
and terminates when it finds a task that exceeds the novelty threshold (a "breakthrough").
The core innovation is using novelty assessment to help the agent "jump out" of its
trained data distribution (semantic gravity), finding truly novel ideas.
Architecture:
Seed Problem → Expert Sample → Task Generation → Novelty Assessment → Continue/Stop
Termination Strategy: "Seek Breakthrough"
- Continue until novelty > threshold
- Find the first truly novel task and stop
Research Foundation:
- Novelty Search (Lehman & Stanley): Reward novelty, not objectives
- Curiosity-driven Exploration (Pathak et al.): Intrinsic motivation via prediction error
- Quality-Diversity (MAP-Elites): Maintain diverse high-quality solutions
"""
import asyncio
import json
import logging
import random
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, List, Optional
import httpx
import numpy as np
from .novelty_metrics import NoveltyMetrics, NoveltyScore, NoveltyTrajectory
logger = logging.getLogger(__name__)
# ============================================================================
# Data Classes
# ============================================================================
@dataclass
class GeneratedTask:
"""A single generated task with metadata."""
task: str
expert: str
expert_domain: str
novelty_score: float
iteration: int
is_breakthrough: bool = False
embedding: Optional[np.ndarray] = None
@dataclass
class TaskGenerationResult:
"""Result of a complete novelty-driven task generation session."""
seed_problem: str
breakthrough_task: Optional[GeneratedTask] = None
trajectory: List[GeneratedTask] = field(default_factory=list)
total_iterations: int = 0
terminated_by: str = "unknown" # "breakthrough", "max_iterations", "error"
novelty_trajectory: Optional[NoveltyTrajectory] = None
start_time: Optional[str] = None
end_time: Optional[str] = None
config: dict = field(default_factory=dict)
def to_dict(self) -> dict:
"""Convert to dictionary for JSON serialization."""
return {
"seed_problem": self.seed_problem,
"breakthrough_task": {
"task": self.breakthrough_task.task,
"expert": self.breakthrough_task.expert,
"expert_domain": self.breakthrough_task.expert_domain,
"novelty_score": self.breakthrough_task.novelty_score,
"iteration": self.breakthrough_task.iteration
} if self.breakthrough_task else None,
"trajectory": [
{
"task": t.task,
"expert": t.expert,
"expert_domain": t.expert_domain,
"novelty_score": t.novelty_score,
"iteration": t.iteration,
"is_breakthrough": t.is_breakthrough
}
for t in self.trajectory
],
"total_iterations": self.total_iterations,
"terminated_by": self.terminated_by,
"novelty_stats": {
"mean_novelty": self.novelty_trajectory.mean_novelty if self.novelty_trajectory else 0,
"max_novelty": self.novelty_trajectory.max_novelty if self.novelty_trajectory else 0,
"jump_ratio": self.novelty_trajectory.jump_ratio if self.novelty_trajectory else 0,
"cumulative_novelty": self.novelty_trajectory.final_cumulative_novelty if self.novelty_trajectory else 0
},
"start_time": self.start_time,
"end_time": self.end_time,
"config": self.config
}
# ============================================================================
# Expert/Domain Providers
# ============================================================================
class ExpertProvider:
"""Provides random experts from curated occupation lists."""
def __init__(self, data_dir: Optional[Path] = None, language: str = "en"):
"""
Args:
data_dir: Path to data directory containing occupation JSON files
language: Language code ("en" or "zh")
"""
if data_dir is None:
# Default to backend data directory
data_dir = Path(__file__).parent.parent.parent / "backend" / "app" / "data"
self.data_dir = data_dir
self.language = language
self._occupations: List[dict] = []
self._load_occupations()
def _load_occupations(self):
"""Load occupations from JSON file."""
file_path = self.data_dir / f"curated_occupations_{self.language}.json"
if not file_path.exists():
logger.warning(f"Occupation file not found: {file_path}")
# Fallback to some default experts
self._occupations = [
{"name": "Marine Biologist", "domain": "Science"},
{"name": "Choreographer", "domain": "Arts"},
{"name": "Urban Planner", "domain": "Architecture"},
{"name": "Chef", "domain": "Culinary"},
{"name": "Astronomer", "domain": "Science"},
]
return
try:
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
self._occupations = data.get("occupations", [])
logger.info(f"Loaded {len(self._occupations)} occupations from {file_path.name}")
except Exception as e:
logger.error(f"Error loading occupations: {e}")
self._occupations = []
def get_random_expert(self) -> dict:
"""Get a random expert with name and domain."""
if not self._occupations:
return {"name": "Expert", "domain": "General"}
return random.choice(self._occupations)
def get_random_experts(self, count: int) -> List[dict]:
"""Get multiple random experts without replacement."""
if len(self._occupations) <= count:
return self._occupations.copy()
return random.sample(self._occupations, count)
class DomainProvider:
"""Provides random knowledge domains from DDC classification."""
def __init__(self, data_dir: Optional[Path] = None, language: str = "en"):
if data_dir is None:
data_dir = Path(__file__).parent.parent.parent / "backend" / "app" / "data"
self.data_dir = data_dir
self.language = language
self._domains: List[dict] = []
self._load_domains()
def _load_domains(self):
"""Load domains from JSON file."""
file_path = self.data_dir / f"ddc_domains_{self.language}.json"
if not file_path.exists():
logger.warning(f"Domain file not found: {file_path}")
self._domains = []
return
try:
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
self._domains = data.get("domains", [])
logger.info(f"Loaded {len(self._domains)} domains from {file_path.name}")
except Exception as e:
logger.error(f"Error loading domains: {e}")
def get_random_domain(self, level: Optional[str] = None) -> dict:
"""Get a random domain, optionally filtered by level."""
domains = self._domains
if level:
domains = [d for d in domains if d.get("level") == level]
if not domains:
return {"name": "General Knowledge", "code": "000"}
return random.choice(domains)
# ============================================================================
# Novelty-Driven Task Agent
# ============================================================================
class NoveltyDrivenTaskAgent:
"""
An autonomous agent that generates tasks using novelty as the termination condition.
The agent operates in a loop:
1. Sample a random expert perspective
2. Generate a task from that expert's viewpoint
3. Compute the task's novelty (distance from centroid of previous tasks)
4. If novelty > threshold → STOP (found breakthrough!)
5. Otherwise → Continue with next expert
Example:
agent = NoveltyDrivenTaskAgent(novelty_threshold=0.4)
result = await agent.run("Improve urban transportation")
# result.breakthrough_task contains the novel task found
# result.trajectory shows the exploration path
"""
def __init__(
self,
novelty_threshold: float = 0.4,
max_iterations: int = 20,
ollama_base_url: str = "http://localhost:11435",
llm_model: str = "qwen3:8b",
embedding_model: str = "qwen3-embedding:4b",
language: str = "en",
data_dir: Optional[Path] = None,
on_iteration: Optional[Callable[[GeneratedTask], None]] = None,
temperature: float = 0.7
):
"""
Args:
novelty_threshold: Novelty score threshold for breakthrough (0.0-1.0)
max_iterations: Maximum iterations before stopping
ollama_base_url: Ollama API endpoint
llm_model: Model for task generation
embedding_model: Model for embeddings
language: Language for prompts and experts ("en" or "zh")
data_dir: Path to data directory for expert/domain files
on_iteration: Callback function called after each iteration
temperature: LLM temperature for generation
"""
self.novelty_threshold = novelty_threshold
self.max_iterations = max_iterations
self.ollama_base_url = ollama_base_url
self.llm_model = llm_model
self.embedding_model = embedding_model
self.language = language
self.temperature = temperature
self.on_iteration = on_iteration
# Initialize providers
self.expert_provider = ExpertProvider(data_dir, language)
self.domain_provider = DomainProvider(data_dir, language)
# Initialize novelty metrics
self.novelty_metrics = NoveltyMetrics(
similarity_threshold=0.7,
jump_detection_enabled=True
)
# HTTP client
self._client: Optional[httpx.AsyncClient] = None
async def _get_client(self) -> httpx.AsyncClient:
"""Get or create HTTP client."""
if self._client is None:
self._client = httpx.AsyncClient(timeout=120.0)
return self._client
async def close(self):
"""Close HTTP client."""
if self._client is not None:
await self._client.aclose()
self._client = None
async def _generate_text(self, prompt: str) -> str:
"""Generate text using Ollama LLM."""
client = await self._get_client()
url = f"{self.ollama_base_url}/api/generate"
# Add /no_think prefix for qwen models to disable thinking
if self.llm_model.lower().startswith("qwen"):
prompt = f"/no_think\n{prompt}"
try:
response = await client.post(url, json={
"model": self.llm_model,
"prompt": prompt,
"stream": False,
"options": {
"temperature": self.temperature
}
})
response.raise_for_status()
result = response.json()
return result.get("response", "").strip()
except Exception as e:
logger.error(f"LLM generation error: {e}")
raise
async def _get_embedding(self, text: str) -> np.ndarray:
"""Get embedding vector for text."""
client = await self._get_client()
url = f"{self.ollama_base_url}/api/embed"
try:
response = await client.post(url, json={
"model": self.embedding_model,
"input": text
})
response.raise_for_status()
result = response.json()
return np.array(result["embeddings"][0])
except Exception as e:
logger.error(f"Embedding error: {e}")
raise
def _build_task_prompt(
self,
seed_problem: str,
expert: dict,
previous_tasks: List[str]
) -> str:
"""Build the prompt for task generation."""
expert_name = expert.get("name", "Expert")
expert_domain = expert.get("domain", "General")
# Build context from previous tasks (if any)
context = ""
if previous_tasks:
recent = previous_tasks[-3:] # Last 3 tasks
context = "\n\nPrevious suggestions (generate something DIFFERENT):\n"
for t in recent:
context += f"- {t}\n"
if self.language == "zh":
prompt = f"""你是一位 {expert_name}{expert_domain})。
给定问题:{seed_problem}
请从你的专业角度出发,提出一个独特的改进任务或探索方向。
这个任务应该结合你的专业知识,提供一个非传统但有价值的视角。
{context}
请直接给出任务描述,不要添加解释。任务应该具体、可行、且与众不同。
任务:"""
else:
prompt = f"""You are a {expert_name} ({expert_domain}).
Given problem: {seed_problem}
From your professional perspective, propose a unique task or exploration direction to improve or innovate on this problem.
The task should leverage your domain expertise to provide an unconventional but valuable angle.
{context}
Provide just the task description without explanation. The task should be specific, actionable, and distinctive.
Task:"""
return prompt
async def _generate_task(
self,
seed_problem: str,
expert: dict,
previous_tasks: List[str]
) -> str:
"""Generate a task from an expert's perspective."""
prompt = self._build_task_prompt(seed_problem, expert, previous_tasks)
task = await self._generate_text(prompt)
# Clean up the response
task = task.strip()
# Remove common prefixes
for prefix in ["Task:", "任务:", "Here's", "I suggest", "Based on"]:
if task.lower().startswith(prefix.lower()):
task = task[len(prefix):].strip()
return task
async def run(
self,
seed_problem: str,
used_experts: Optional[List[dict]] = None
) -> TaskGenerationResult:
"""
Run the novelty-driven task generation loop.
Args:
seed_problem: The initial problem/challenge to explore
used_experts: Optional list of experts to avoid (for multi-run scenarios)
Returns:
TaskGenerationResult with breakthrough task (if found) and full trajectory
"""
# Reset state
self.novelty_metrics.reset()
result = TaskGenerationResult(
seed_problem=seed_problem,
start_time=datetime.now(timezone.utc).isoformat(),
config={
"novelty_threshold": self.novelty_threshold,
"max_iterations": self.max_iterations,
"llm_model": self.llm_model,
"embedding_model": self.embedding_model,
"language": self.language
}
)
used_expert_names = set()
if used_experts:
used_expert_names = {e["name"] for e in used_experts}
previous_tasks: List[str] = []
logger.info(f"Starting novelty loop: '{seed_problem}' (threshold={self.novelty_threshold})")
try:
for iteration in range(self.max_iterations):
# 1. Sample a random expert (avoid duplicates)
attempts = 0
expert = self.expert_provider.get_random_expert()
while expert["name"] in used_expert_names and attempts < 10:
expert = self.expert_provider.get_random_expert()
attempts += 1
used_expert_names.add(expert["name"])
logger.info(f"Iteration {iteration + 1}: Expert = {expert['name']} ({expert['domain']})")
# 2. Generate task
task = await self._generate_task(seed_problem, expert, previous_tasks)
previous_tasks.append(task)
# 3. Get embedding
embedding = await self._get_embedding(task)
# 4. Compute novelty
novelty = self.novelty_metrics.compute_novelty(embedding)
self.novelty_metrics.add_embedding(embedding, novelty)
# 5. Create task record
generated_task = GeneratedTask(
task=task,
expert=expert["name"],
expert_domain=expert["domain"],
novelty_score=novelty.score,
iteration=iteration + 1,
is_breakthrough=novelty.score > self.novelty_threshold,
embedding=embedding
)
result.trajectory.append(generated_task)
logger.info(f" Task: {task[:80]}...")
logger.info(f" Novelty: {novelty.score:.4f} (threshold: {self.novelty_threshold})")
# Callback
if self.on_iteration:
self.on_iteration(generated_task)
# 6. Check for breakthrough
if novelty.score > self.novelty_threshold:
result.breakthrough_task = generated_task
result.terminated_by = "breakthrough"
result.total_iterations = iteration + 1
logger.info(f" BREAKTHROUGH! Stopping after {iteration + 1} iterations")
break
else:
# Max iterations reached without breakthrough
result.terminated_by = "max_iterations"
result.total_iterations = self.max_iterations
logger.info(f"Max iterations ({self.max_iterations}) reached without breakthrough")
# Find the most novel task as a fallback
if result.trajectory:
best_task = max(result.trajectory, key=lambda t: t.novelty_score)
best_task.is_breakthrough = True # Mark as best found
result.breakthrough_task = best_task
except Exception as e:
logger.error(f"Error during generation: {e}")
result.terminated_by = f"error: {str(e)}"
result.total_iterations = len(result.trajectory)
# Finalize
result.end_time = datetime.now(timezone.utc).isoformat()
result.novelty_trajectory = self.novelty_metrics.trajectory
return result
# ============================================================================
# Alternative Termination Strategies
# ============================================================================
class ExhaustFrontierAgent(NoveltyDrivenTaskAgent):
"""
Alternative strategy: Continue while novelty is high, stop when it drops.
This explores the "novelty frontier" more thoroughly, finding multiple novel
ideas before stopping when exploration becomes repetitive.
"""
def __init__(
self,
exhaustion_threshold: float = 0.15,
window_size: int = 3,
min_iterations: int = 5,
**kwargs
):
"""
Args:
exhaustion_threshold: Stop when recent average novelty drops below this
window_size: Number of recent iterations to average
min_iterations: Minimum iterations before checking exhaustion
**kwargs: Passed to parent class
"""
super().__init__(**kwargs)
self.exhaustion_threshold = exhaustion_threshold
self.window_size = window_size
self.min_iterations = min_iterations
async def run(self, seed_problem: str, **kwargs) -> TaskGenerationResult:
"""Override to use exhaustion-based termination."""
# Reset state
self.novelty_metrics.reset()
result = TaskGenerationResult(
seed_problem=seed_problem,
start_time=datetime.now(timezone.utc).isoformat(),
config={
"strategy": "exhaust_frontier",
"exhaustion_threshold": self.exhaustion_threshold,
"window_size": self.window_size,
"min_iterations": self.min_iterations,
"max_iterations": self.max_iterations,
"llm_model": self.llm_model
}
)
used_expert_names = set()
previous_tasks: List[str] = []
novelty_history: List[float] = []
try:
for iteration in range(self.max_iterations):
# Sample expert
expert = self.expert_provider.get_random_expert()
while expert["name"] in used_expert_names and len(used_expert_names) < 200:
expert = self.expert_provider.get_random_expert()
used_expert_names.add(expert["name"])
# Generate and evaluate
task = await self._generate_task(seed_problem, expert, previous_tasks)
previous_tasks.append(task)
embedding = await self._get_embedding(task)
novelty = self.novelty_metrics.compute_novelty(embedding)
self.novelty_metrics.add_embedding(embedding, novelty)
novelty_history.append(novelty.score)
generated_task = GeneratedTask(
task=task,
expert=expert["name"],
expert_domain=expert["domain"],
novelty_score=novelty.score,
iteration=iteration + 1
)
result.trajectory.append(generated_task)
if self.on_iteration:
self.on_iteration(generated_task)
# Check exhaustion condition
if iteration >= self.min_iterations:
recent_avg = np.mean(novelty_history[-self.window_size:])
if recent_avg < self.exhaustion_threshold:
result.terminated_by = f"exhaustion (avg={recent_avg:.3f})"
result.total_iterations = iteration + 1
break
else:
result.terminated_by = "max_iterations"
result.total_iterations = self.max_iterations
# Find all "novel" tasks
novel_tasks = [t for t in result.trajectory if t.novelty_score > self.exhaustion_threshold]
if novel_tasks:
result.breakthrough_task = max(novel_tasks, key=lambda t: t.novelty_score)
result.breakthrough_task.is_breakthrough = True
except Exception as e:
result.terminated_by = f"error: {str(e)}"
result.total_iterations = len(result.trajectory)
result.end_time = datetime.now(timezone.utc).isoformat()
result.novelty_trajectory = self.novelty_metrics.trajectory
return result
class CoverageTargetAgent(NoveltyDrivenTaskAgent):
"""
Alternative strategy: Continue until N distinct clusters are covered.
This ensures a diverse portfolio of ideas across different conceptual areas.
"""
def __init__(
self,
target_clusters: int = 5,
cluster_threshold: float = 0.7,
**kwargs
):
"""
Args:
target_clusters: Target number of distinct clusters to find
cluster_threshold: Similarity threshold for cluster membership
**kwargs: Passed to parent class
"""
super().__init__(**kwargs)
self.target_clusters = target_clusters
self.cluster_threshold = cluster_threshold
def _count_clusters(self, embeddings: List[np.ndarray]) -> int:
"""Count distinct clusters using greedy clustering."""
if not embeddings:
return 0
clusters = []
for emb in embeddings:
found_cluster = False
for cluster_centroid in clusters:
similarity = NoveltyMetrics.cosine_similarity(emb, cluster_centroid)
if similarity >= self.cluster_threshold:
found_cluster = True
break
if not found_cluster:
clusters.append(emb)
return len(clusters)
async def run(self, seed_problem: str, **kwargs) -> TaskGenerationResult:
"""Override to use coverage-based termination."""
self.novelty_metrics.reset()
result = TaskGenerationResult(
seed_problem=seed_problem,
start_time=datetime.now(timezone.utc).isoformat(),
config={
"strategy": "coverage_target",
"target_clusters": self.target_clusters,
"cluster_threshold": self.cluster_threshold,
"max_iterations": self.max_iterations
}
)
used_expert_names = set()
previous_tasks: List[str] = []
all_embeddings: List[np.ndarray] = []
try:
for iteration in range(self.max_iterations):
expert = self.expert_provider.get_random_expert()
while expert["name"] in used_expert_names and len(used_expert_names) < 200:
expert = self.expert_provider.get_random_expert()
used_expert_names.add(expert["name"])
task = await self._generate_task(seed_problem, expert, previous_tasks)
previous_tasks.append(task)
embedding = await self._get_embedding(task)
all_embeddings.append(embedding)
novelty = self.novelty_metrics.compute_novelty(embedding)
self.novelty_metrics.add_embedding(embedding, novelty)
generated_task = GeneratedTask(
task=task,
expert=expert["name"],
expert_domain=expert["domain"],
novelty_score=novelty.score,
iteration=iteration + 1
)
result.trajectory.append(generated_task)
if self.on_iteration:
self.on_iteration(generated_task)
# Check coverage
cluster_count = self._count_clusters(all_embeddings)
if cluster_count >= self.target_clusters:
result.terminated_by = f"coverage ({cluster_count} clusters)"
result.total_iterations = iteration + 1
break
else:
final_clusters = self._count_clusters(all_embeddings)
result.terminated_by = f"max_iterations ({final_clusters} clusters)"
result.total_iterations = self.max_iterations
# Find most novel task
if result.trajectory:
best_task = max(result.trajectory, key=lambda t: t.novelty_score)
best_task.is_breakthrough = True
result.breakthrough_task = best_task
except Exception as e:
result.terminated_by = f"error: {str(e)}"
result.total_iterations = len(result.trajectory)
result.end_time = datetime.now(timezone.utc).isoformat()
result.novelty_trajectory = self.novelty_metrics.trajectory
return result