- Add complete experiments directory with pilot study infrastructure - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective) - Human assessment tool with React frontend and FastAPI backend - AUT flexibility analysis with jump signal detection - Result visualization and metrics computation - Add novelty-driven agent loop module (experiments/novelty_loop/) - NoveltyDrivenTaskAgent with expert perspective perturbation - Three termination strategies: breakthrough, exhaust, coverage - Interactive CLI demo with colored output - Embedding-based novelty scoring - Add DDC knowledge domain classification data (en/zh) - Add CLAUDE.md project documentation - Update research report with experiment findings Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
726 lines
27 KiB
Python
726 lines
27 KiB
Python
"""
|
||
Novelty-Driven Task Agent - An autonomous agent that generates tasks using novelty as termination condition.
|
||
|
||
This agent operates in a while loop, generating tasks from diverse expert perspectives,
|
||
and terminates when it finds a task that exceeds the novelty threshold (a "breakthrough").
|
||
|
||
The core innovation is using novelty assessment to help the agent "jump out" of its
|
||
trained data distribution (semantic gravity), finding truly novel ideas.
|
||
|
||
Architecture:
|
||
Seed Problem → Expert Sample → Task Generation → Novelty Assessment → Continue/Stop
|
||
|
||
Termination Strategy: "Seek Breakthrough"
|
||
- Continue until novelty > threshold
|
||
- Find the first truly novel task and stop
|
||
|
||
Research Foundation:
|
||
- Novelty Search (Lehman & Stanley): Reward novelty, not objectives
|
||
- Curiosity-driven Exploration (Pathak et al.): Intrinsic motivation via prediction error
|
||
- Quality-Diversity (MAP-Elites): Maintain diverse high-quality solutions
|
||
"""
|
||
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import random
|
||
from dataclasses import dataclass, field
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any, Callable, List, Optional
|
||
|
||
import httpx
|
||
import numpy as np
|
||
|
||
from .novelty_metrics import NoveltyMetrics, NoveltyScore, NoveltyTrajectory
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
# ============================================================================
|
||
# Data Classes
|
||
# ============================================================================
|
||
|
||
@dataclass
|
||
class GeneratedTask:
|
||
"""A single generated task with metadata."""
|
||
task: str
|
||
expert: str
|
||
expert_domain: str
|
||
novelty_score: float
|
||
iteration: int
|
||
is_breakthrough: bool = False
|
||
embedding: Optional[np.ndarray] = None
|
||
|
||
|
||
@dataclass
|
||
class TaskGenerationResult:
|
||
"""Result of a complete novelty-driven task generation session."""
|
||
seed_problem: str
|
||
breakthrough_task: Optional[GeneratedTask] = None
|
||
trajectory: List[GeneratedTask] = field(default_factory=list)
|
||
total_iterations: int = 0
|
||
terminated_by: str = "unknown" # "breakthrough", "max_iterations", "error"
|
||
novelty_trajectory: Optional[NoveltyTrajectory] = None
|
||
start_time: Optional[str] = None
|
||
end_time: Optional[str] = None
|
||
config: dict = field(default_factory=dict)
|
||
|
||
def to_dict(self) -> dict:
|
||
"""Convert to dictionary for JSON serialization."""
|
||
return {
|
||
"seed_problem": self.seed_problem,
|
||
"breakthrough_task": {
|
||
"task": self.breakthrough_task.task,
|
||
"expert": self.breakthrough_task.expert,
|
||
"expert_domain": self.breakthrough_task.expert_domain,
|
||
"novelty_score": self.breakthrough_task.novelty_score,
|
||
"iteration": self.breakthrough_task.iteration
|
||
} if self.breakthrough_task else None,
|
||
"trajectory": [
|
||
{
|
||
"task": t.task,
|
||
"expert": t.expert,
|
||
"expert_domain": t.expert_domain,
|
||
"novelty_score": t.novelty_score,
|
||
"iteration": t.iteration,
|
||
"is_breakthrough": t.is_breakthrough
|
||
}
|
||
for t in self.trajectory
|
||
],
|
||
"total_iterations": self.total_iterations,
|
||
"terminated_by": self.terminated_by,
|
||
"novelty_stats": {
|
||
"mean_novelty": self.novelty_trajectory.mean_novelty if self.novelty_trajectory else 0,
|
||
"max_novelty": self.novelty_trajectory.max_novelty if self.novelty_trajectory else 0,
|
||
"jump_ratio": self.novelty_trajectory.jump_ratio if self.novelty_trajectory else 0,
|
||
"cumulative_novelty": self.novelty_trajectory.final_cumulative_novelty if self.novelty_trajectory else 0
|
||
},
|
||
"start_time": self.start_time,
|
||
"end_time": self.end_time,
|
||
"config": self.config
|
||
}
|
||
|
||
|
||
# ============================================================================
|
||
# Expert/Domain Providers
|
||
# ============================================================================
|
||
|
||
class ExpertProvider:
|
||
"""Provides random experts from curated occupation lists."""
|
||
|
||
def __init__(self, data_dir: Optional[Path] = None, language: str = "en"):
|
||
"""
|
||
Args:
|
||
data_dir: Path to data directory containing occupation JSON files
|
||
language: Language code ("en" or "zh")
|
||
"""
|
||
if data_dir is None:
|
||
# Default to backend data directory
|
||
data_dir = Path(__file__).parent.parent.parent / "backend" / "app" / "data"
|
||
|
||
self.data_dir = data_dir
|
||
self.language = language
|
||
self._occupations: List[dict] = []
|
||
self._load_occupations()
|
||
|
||
def _load_occupations(self):
|
||
"""Load occupations from JSON file."""
|
||
file_path = self.data_dir / f"curated_occupations_{self.language}.json"
|
||
|
||
if not file_path.exists():
|
||
logger.warning(f"Occupation file not found: {file_path}")
|
||
# Fallback to some default experts
|
||
self._occupations = [
|
||
{"name": "Marine Biologist", "domain": "Science"},
|
||
{"name": "Choreographer", "domain": "Arts"},
|
||
{"name": "Urban Planner", "domain": "Architecture"},
|
||
{"name": "Chef", "domain": "Culinary"},
|
||
{"name": "Astronomer", "domain": "Science"},
|
||
]
|
||
return
|
||
|
||
try:
|
||
with open(file_path, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
self._occupations = data.get("occupations", [])
|
||
logger.info(f"Loaded {len(self._occupations)} occupations from {file_path.name}")
|
||
except Exception as e:
|
||
logger.error(f"Error loading occupations: {e}")
|
||
self._occupations = []
|
||
|
||
def get_random_expert(self) -> dict:
|
||
"""Get a random expert with name and domain."""
|
||
if not self._occupations:
|
||
return {"name": "Expert", "domain": "General"}
|
||
return random.choice(self._occupations)
|
||
|
||
def get_random_experts(self, count: int) -> List[dict]:
|
||
"""Get multiple random experts without replacement."""
|
||
if len(self._occupations) <= count:
|
||
return self._occupations.copy()
|
||
return random.sample(self._occupations, count)
|
||
|
||
|
||
class DomainProvider:
|
||
"""Provides random knowledge domains from DDC classification."""
|
||
|
||
def __init__(self, data_dir: Optional[Path] = None, language: str = "en"):
|
||
if data_dir is None:
|
||
data_dir = Path(__file__).parent.parent.parent / "backend" / "app" / "data"
|
||
|
||
self.data_dir = data_dir
|
||
self.language = language
|
||
self._domains: List[dict] = []
|
||
self._load_domains()
|
||
|
||
def _load_domains(self):
|
||
"""Load domains from JSON file."""
|
||
file_path = self.data_dir / f"ddc_domains_{self.language}.json"
|
||
|
||
if not file_path.exists():
|
||
logger.warning(f"Domain file not found: {file_path}")
|
||
self._domains = []
|
||
return
|
||
|
||
try:
|
||
with open(file_path, "r", encoding="utf-8") as f:
|
||
data = json.load(f)
|
||
self._domains = data.get("domains", [])
|
||
logger.info(f"Loaded {len(self._domains)} domains from {file_path.name}")
|
||
except Exception as e:
|
||
logger.error(f"Error loading domains: {e}")
|
||
|
||
def get_random_domain(self, level: Optional[str] = None) -> dict:
|
||
"""Get a random domain, optionally filtered by level."""
|
||
domains = self._domains
|
||
if level:
|
||
domains = [d for d in domains if d.get("level") == level]
|
||
|
||
if not domains:
|
||
return {"name": "General Knowledge", "code": "000"}
|
||
return random.choice(domains)
|
||
|
||
|
||
# ============================================================================
|
||
# Novelty-Driven Task Agent
|
||
# ============================================================================
|
||
|
||
class NoveltyDrivenTaskAgent:
|
||
"""
|
||
An autonomous agent that generates tasks using novelty as the termination condition.
|
||
|
||
The agent operates in a loop:
|
||
1. Sample a random expert perspective
|
||
2. Generate a task from that expert's viewpoint
|
||
3. Compute the task's novelty (distance from centroid of previous tasks)
|
||
4. If novelty > threshold → STOP (found breakthrough!)
|
||
5. Otherwise → Continue with next expert
|
||
|
||
Example:
|
||
agent = NoveltyDrivenTaskAgent(novelty_threshold=0.4)
|
||
result = await agent.run("Improve urban transportation")
|
||
|
||
# result.breakthrough_task contains the novel task found
|
||
# result.trajectory shows the exploration path
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
novelty_threshold: float = 0.4,
|
||
max_iterations: int = 20,
|
||
ollama_base_url: str = "http://localhost:11435",
|
||
llm_model: str = "qwen3:8b",
|
||
embedding_model: str = "qwen3-embedding:4b",
|
||
language: str = "en",
|
||
data_dir: Optional[Path] = None,
|
||
on_iteration: Optional[Callable[[GeneratedTask], None]] = None,
|
||
temperature: float = 0.7
|
||
):
|
||
"""
|
||
Args:
|
||
novelty_threshold: Novelty score threshold for breakthrough (0.0-1.0)
|
||
max_iterations: Maximum iterations before stopping
|
||
ollama_base_url: Ollama API endpoint
|
||
llm_model: Model for task generation
|
||
embedding_model: Model for embeddings
|
||
language: Language for prompts and experts ("en" or "zh")
|
||
data_dir: Path to data directory for expert/domain files
|
||
on_iteration: Callback function called after each iteration
|
||
temperature: LLM temperature for generation
|
||
"""
|
||
self.novelty_threshold = novelty_threshold
|
||
self.max_iterations = max_iterations
|
||
self.ollama_base_url = ollama_base_url
|
||
self.llm_model = llm_model
|
||
self.embedding_model = embedding_model
|
||
self.language = language
|
||
self.temperature = temperature
|
||
self.on_iteration = on_iteration
|
||
|
||
# Initialize providers
|
||
self.expert_provider = ExpertProvider(data_dir, language)
|
||
self.domain_provider = DomainProvider(data_dir, language)
|
||
|
||
# Initialize novelty metrics
|
||
self.novelty_metrics = NoveltyMetrics(
|
||
similarity_threshold=0.7,
|
||
jump_detection_enabled=True
|
||
)
|
||
|
||
# HTTP client
|
||
self._client: Optional[httpx.AsyncClient] = None
|
||
|
||
async def _get_client(self) -> httpx.AsyncClient:
|
||
"""Get or create HTTP client."""
|
||
if self._client is None:
|
||
self._client = httpx.AsyncClient(timeout=120.0)
|
||
return self._client
|
||
|
||
async def close(self):
|
||
"""Close HTTP client."""
|
||
if self._client is not None:
|
||
await self._client.aclose()
|
||
self._client = None
|
||
|
||
async def _generate_text(self, prompt: str) -> str:
|
||
"""Generate text using Ollama LLM."""
|
||
client = await self._get_client()
|
||
url = f"{self.ollama_base_url}/api/generate"
|
||
|
||
# Add /no_think prefix for qwen models to disable thinking
|
||
if self.llm_model.lower().startswith("qwen"):
|
||
prompt = f"/no_think\n{prompt}"
|
||
|
||
try:
|
||
response = await client.post(url, json={
|
||
"model": self.llm_model,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"options": {
|
||
"temperature": self.temperature
|
||
}
|
||
})
|
||
response.raise_for_status()
|
||
result = response.json()
|
||
return result.get("response", "").strip()
|
||
except Exception as e:
|
||
logger.error(f"LLM generation error: {e}")
|
||
raise
|
||
|
||
async def _get_embedding(self, text: str) -> np.ndarray:
|
||
"""Get embedding vector for text."""
|
||
client = await self._get_client()
|
||
url = f"{self.ollama_base_url}/api/embed"
|
||
|
||
try:
|
||
response = await client.post(url, json={
|
||
"model": self.embedding_model,
|
||
"input": text
|
||
})
|
||
response.raise_for_status()
|
||
result = response.json()
|
||
return np.array(result["embeddings"][0])
|
||
except Exception as e:
|
||
logger.error(f"Embedding error: {e}")
|
||
raise
|
||
|
||
def _build_task_prompt(
|
||
self,
|
||
seed_problem: str,
|
||
expert: dict,
|
||
previous_tasks: List[str]
|
||
) -> str:
|
||
"""Build the prompt for task generation."""
|
||
expert_name = expert.get("name", "Expert")
|
||
expert_domain = expert.get("domain", "General")
|
||
|
||
# Build context from previous tasks (if any)
|
||
context = ""
|
||
if previous_tasks:
|
||
recent = previous_tasks[-3:] # Last 3 tasks
|
||
context = "\n\nPrevious suggestions (generate something DIFFERENT):\n"
|
||
for t in recent:
|
||
context += f"- {t}\n"
|
||
|
||
if self.language == "zh":
|
||
prompt = f"""你是一位 {expert_name}({expert_domain})。
|
||
|
||
给定问题:{seed_problem}
|
||
|
||
请从你的专业角度出发,提出一个独特的改进任务或探索方向。
|
||
这个任务应该结合你的专业知识,提供一个非传统但有价值的视角。
|
||
{context}
|
||
请直接给出任务描述,不要添加解释。任务应该具体、可行、且与众不同。
|
||
|
||
任务:"""
|
||
else:
|
||
prompt = f"""You are a {expert_name} ({expert_domain}).
|
||
|
||
Given problem: {seed_problem}
|
||
|
||
From your professional perspective, propose a unique task or exploration direction to improve or innovate on this problem.
|
||
The task should leverage your domain expertise to provide an unconventional but valuable angle.
|
||
{context}
|
||
Provide just the task description without explanation. The task should be specific, actionable, and distinctive.
|
||
|
||
Task:"""
|
||
|
||
return prompt
|
||
|
||
async def _generate_task(
|
||
self,
|
||
seed_problem: str,
|
||
expert: dict,
|
||
previous_tasks: List[str]
|
||
) -> str:
|
||
"""Generate a task from an expert's perspective."""
|
||
prompt = self._build_task_prompt(seed_problem, expert, previous_tasks)
|
||
task = await self._generate_text(prompt)
|
||
|
||
# Clean up the response
|
||
task = task.strip()
|
||
# Remove common prefixes
|
||
for prefix in ["Task:", "任务:", "Here's", "I suggest", "Based on"]:
|
||
if task.lower().startswith(prefix.lower()):
|
||
task = task[len(prefix):].strip()
|
||
|
||
return task
|
||
|
||
async def run(
|
||
self,
|
||
seed_problem: str,
|
||
used_experts: Optional[List[dict]] = None
|
||
) -> TaskGenerationResult:
|
||
"""
|
||
Run the novelty-driven task generation loop.
|
||
|
||
Args:
|
||
seed_problem: The initial problem/challenge to explore
|
||
used_experts: Optional list of experts to avoid (for multi-run scenarios)
|
||
|
||
Returns:
|
||
TaskGenerationResult with breakthrough task (if found) and full trajectory
|
||
"""
|
||
# Reset state
|
||
self.novelty_metrics.reset()
|
||
|
||
result = TaskGenerationResult(
|
||
seed_problem=seed_problem,
|
||
start_time=datetime.now(timezone.utc).isoformat(),
|
||
config={
|
||
"novelty_threshold": self.novelty_threshold,
|
||
"max_iterations": self.max_iterations,
|
||
"llm_model": self.llm_model,
|
||
"embedding_model": self.embedding_model,
|
||
"language": self.language
|
||
}
|
||
)
|
||
|
||
used_expert_names = set()
|
||
if used_experts:
|
||
used_expert_names = {e["name"] for e in used_experts}
|
||
|
||
previous_tasks: List[str] = []
|
||
|
||
logger.info(f"Starting novelty loop: '{seed_problem}' (threshold={self.novelty_threshold})")
|
||
|
||
try:
|
||
for iteration in range(self.max_iterations):
|
||
# 1. Sample a random expert (avoid duplicates)
|
||
attempts = 0
|
||
expert = self.expert_provider.get_random_expert()
|
||
while expert["name"] in used_expert_names and attempts < 10:
|
||
expert = self.expert_provider.get_random_expert()
|
||
attempts += 1
|
||
used_expert_names.add(expert["name"])
|
||
|
||
logger.info(f"Iteration {iteration + 1}: Expert = {expert['name']} ({expert['domain']})")
|
||
|
||
# 2. Generate task
|
||
task = await self._generate_task(seed_problem, expert, previous_tasks)
|
||
previous_tasks.append(task)
|
||
|
||
# 3. Get embedding
|
||
embedding = await self._get_embedding(task)
|
||
|
||
# 4. Compute novelty
|
||
novelty = self.novelty_metrics.compute_novelty(embedding)
|
||
self.novelty_metrics.add_embedding(embedding, novelty)
|
||
|
||
# 5. Create task record
|
||
generated_task = GeneratedTask(
|
||
task=task,
|
||
expert=expert["name"],
|
||
expert_domain=expert["domain"],
|
||
novelty_score=novelty.score,
|
||
iteration=iteration + 1,
|
||
is_breakthrough=novelty.score > self.novelty_threshold,
|
||
embedding=embedding
|
||
)
|
||
result.trajectory.append(generated_task)
|
||
|
||
logger.info(f" Task: {task[:80]}...")
|
||
logger.info(f" Novelty: {novelty.score:.4f} (threshold: {self.novelty_threshold})")
|
||
|
||
# Callback
|
||
if self.on_iteration:
|
||
self.on_iteration(generated_task)
|
||
|
||
# 6. Check for breakthrough
|
||
if novelty.score > self.novelty_threshold:
|
||
result.breakthrough_task = generated_task
|
||
result.terminated_by = "breakthrough"
|
||
result.total_iterations = iteration + 1
|
||
logger.info(f" BREAKTHROUGH! Stopping after {iteration + 1} iterations")
|
||
break
|
||
|
||
else:
|
||
# Max iterations reached without breakthrough
|
||
result.terminated_by = "max_iterations"
|
||
result.total_iterations = self.max_iterations
|
||
logger.info(f"Max iterations ({self.max_iterations}) reached without breakthrough")
|
||
|
||
# Find the most novel task as a fallback
|
||
if result.trajectory:
|
||
best_task = max(result.trajectory, key=lambda t: t.novelty_score)
|
||
best_task.is_breakthrough = True # Mark as best found
|
||
result.breakthrough_task = best_task
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error during generation: {e}")
|
||
result.terminated_by = f"error: {str(e)}"
|
||
result.total_iterations = len(result.trajectory)
|
||
|
||
# Finalize
|
||
result.end_time = datetime.now(timezone.utc).isoformat()
|
||
result.novelty_trajectory = self.novelty_metrics.trajectory
|
||
|
||
return result
|
||
|
||
|
||
# ============================================================================
|
||
# Alternative Termination Strategies
|
||
# ============================================================================
|
||
|
||
class ExhaustFrontierAgent(NoveltyDrivenTaskAgent):
|
||
"""
|
||
Alternative strategy: Continue while novelty is high, stop when it drops.
|
||
|
||
This explores the "novelty frontier" more thoroughly, finding multiple novel
|
||
ideas before stopping when exploration becomes repetitive.
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
exhaustion_threshold: float = 0.15,
|
||
window_size: int = 3,
|
||
min_iterations: int = 5,
|
||
**kwargs
|
||
):
|
||
"""
|
||
Args:
|
||
exhaustion_threshold: Stop when recent average novelty drops below this
|
||
window_size: Number of recent iterations to average
|
||
min_iterations: Minimum iterations before checking exhaustion
|
||
**kwargs: Passed to parent class
|
||
"""
|
||
super().__init__(**kwargs)
|
||
self.exhaustion_threshold = exhaustion_threshold
|
||
self.window_size = window_size
|
||
self.min_iterations = min_iterations
|
||
|
||
async def run(self, seed_problem: str, **kwargs) -> TaskGenerationResult:
|
||
"""Override to use exhaustion-based termination."""
|
||
# Reset state
|
||
self.novelty_metrics.reset()
|
||
|
||
result = TaskGenerationResult(
|
||
seed_problem=seed_problem,
|
||
start_time=datetime.now(timezone.utc).isoformat(),
|
||
config={
|
||
"strategy": "exhaust_frontier",
|
||
"exhaustion_threshold": self.exhaustion_threshold,
|
||
"window_size": self.window_size,
|
||
"min_iterations": self.min_iterations,
|
||
"max_iterations": self.max_iterations,
|
||
"llm_model": self.llm_model
|
||
}
|
||
)
|
||
|
||
used_expert_names = set()
|
||
previous_tasks: List[str] = []
|
||
novelty_history: List[float] = []
|
||
|
||
try:
|
||
for iteration in range(self.max_iterations):
|
||
# Sample expert
|
||
expert = self.expert_provider.get_random_expert()
|
||
while expert["name"] in used_expert_names and len(used_expert_names) < 200:
|
||
expert = self.expert_provider.get_random_expert()
|
||
used_expert_names.add(expert["name"])
|
||
|
||
# Generate and evaluate
|
||
task = await self._generate_task(seed_problem, expert, previous_tasks)
|
||
previous_tasks.append(task)
|
||
embedding = await self._get_embedding(task)
|
||
novelty = self.novelty_metrics.compute_novelty(embedding)
|
||
self.novelty_metrics.add_embedding(embedding, novelty)
|
||
|
||
novelty_history.append(novelty.score)
|
||
|
||
generated_task = GeneratedTask(
|
||
task=task,
|
||
expert=expert["name"],
|
||
expert_domain=expert["domain"],
|
||
novelty_score=novelty.score,
|
||
iteration=iteration + 1
|
||
)
|
||
result.trajectory.append(generated_task)
|
||
|
||
if self.on_iteration:
|
||
self.on_iteration(generated_task)
|
||
|
||
# Check exhaustion condition
|
||
if iteration >= self.min_iterations:
|
||
recent_avg = np.mean(novelty_history[-self.window_size:])
|
||
if recent_avg < self.exhaustion_threshold:
|
||
result.terminated_by = f"exhaustion (avg={recent_avg:.3f})"
|
||
result.total_iterations = iteration + 1
|
||
break
|
||
|
||
else:
|
||
result.terminated_by = "max_iterations"
|
||
result.total_iterations = self.max_iterations
|
||
|
||
# Find all "novel" tasks
|
||
novel_tasks = [t for t in result.trajectory if t.novelty_score > self.exhaustion_threshold]
|
||
if novel_tasks:
|
||
result.breakthrough_task = max(novel_tasks, key=lambda t: t.novelty_score)
|
||
result.breakthrough_task.is_breakthrough = True
|
||
|
||
except Exception as e:
|
||
result.terminated_by = f"error: {str(e)}"
|
||
result.total_iterations = len(result.trajectory)
|
||
|
||
result.end_time = datetime.now(timezone.utc).isoformat()
|
||
result.novelty_trajectory = self.novelty_metrics.trajectory
|
||
|
||
return result
|
||
|
||
|
||
class CoverageTargetAgent(NoveltyDrivenTaskAgent):
|
||
"""
|
||
Alternative strategy: Continue until N distinct clusters are covered.
|
||
|
||
This ensures a diverse portfolio of ideas across different conceptual areas.
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
target_clusters: int = 5,
|
||
cluster_threshold: float = 0.7,
|
||
**kwargs
|
||
):
|
||
"""
|
||
Args:
|
||
target_clusters: Target number of distinct clusters to find
|
||
cluster_threshold: Similarity threshold for cluster membership
|
||
**kwargs: Passed to parent class
|
||
"""
|
||
super().__init__(**kwargs)
|
||
self.target_clusters = target_clusters
|
||
self.cluster_threshold = cluster_threshold
|
||
|
||
def _count_clusters(self, embeddings: List[np.ndarray]) -> int:
|
||
"""Count distinct clusters using greedy clustering."""
|
||
if not embeddings:
|
||
return 0
|
||
|
||
clusters = []
|
||
for emb in embeddings:
|
||
found_cluster = False
|
||
for cluster_centroid in clusters:
|
||
similarity = NoveltyMetrics.cosine_similarity(emb, cluster_centroid)
|
||
if similarity >= self.cluster_threshold:
|
||
found_cluster = True
|
||
break
|
||
|
||
if not found_cluster:
|
||
clusters.append(emb)
|
||
|
||
return len(clusters)
|
||
|
||
async def run(self, seed_problem: str, **kwargs) -> TaskGenerationResult:
|
||
"""Override to use coverage-based termination."""
|
||
self.novelty_metrics.reset()
|
||
|
||
result = TaskGenerationResult(
|
||
seed_problem=seed_problem,
|
||
start_time=datetime.now(timezone.utc).isoformat(),
|
||
config={
|
||
"strategy": "coverage_target",
|
||
"target_clusters": self.target_clusters,
|
||
"cluster_threshold": self.cluster_threshold,
|
||
"max_iterations": self.max_iterations
|
||
}
|
||
)
|
||
|
||
used_expert_names = set()
|
||
previous_tasks: List[str] = []
|
||
all_embeddings: List[np.ndarray] = []
|
||
|
||
try:
|
||
for iteration in range(self.max_iterations):
|
||
expert = self.expert_provider.get_random_expert()
|
||
while expert["name"] in used_expert_names and len(used_expert_names) < 200:
|
||
expert = self.expert_provider.get_random_expert()
|
||
used_expert_names.add(expert["name"])
|
||
|
||
task = await self._generate_task(seed_problem, expert, previous_tasks)
|
||
previous_tasks.append(task)
|
||
embedding = await self._get_embedding(task)
|
||
all_embeddings.append(embedding)
|
||
|
||
novelty = self.novelty_metrics.compute_novelty(embedding)
|
||
self.novelty_metrics.add_embedding(embedding, novelty)
|
||
|
||
generated_task = GeneratedTask(
|
||
task=task,
|
||
expert=expert["name"],
|
||
expert_domain=expert["domain"],
|
||
novelty_score=novelty.score,
|
||
iteration=iteration + 1
|
||
)
|
||
result.trajectory.append(generated_task)
|
||
|
||
if self.on_iteration:
|
||
self.on_iteration(generated_task)
|
||
|
||
# Check coverage
|
||
cluster_count = self._count_clusters(all_embeddings)
|
||
if cluster_count >= self.target_clusters:
|
||
result.terminated_by = f"coverage ({cluster_count} clusters)"
|
||
result.total_iterations = iteration + 1
|
||
break
|
||
|
||
else:
|
||
final_clusters = self._count_clusters(all_embeddings)
|
||
result.terminated_by = f"max_iterations ({final_clusters} clusters)"
|
||
result.total_iterations = self.max_iterations
|
||
|
||
# Find most novel task
|
||
if result.trajectory:
|
||
best_task = max(result.trajectory, key=lambda t: t.novelty_score)
|
||
best_task.is_breakthrough = True
|
||
result.breakthrough_task = best_task
|
||
|
||
except Exception as e:
|
||
result.terminated_by = f"error: {str(e)}"
|
||
result.total_iterations = len(result.trajectory)
|
||
|
||
result.end_time = datetime.now(timezone.utc).isoformat()
|
||
result.novelty_trajectory = self.novelty_metrics.trajectory
|
||
|
||
return result
|