novelty-seeking/experiments/novelty_loop/agent.py

"""
Novelty-Driven Task Agent - An autonomous agent that generates tasks using novelty as termination condition.

This agent operates in a while loop, generating tasks from diverse expert perspectives,
and terminates when it finds a task that exceeds the novelty threshold (a "breakthrough").

The core innovation is using novelty assessment to help the agent "jump out" of its
trained data distribution (semantic gravity), finding truly novel ideas.

Architecture:
    Seed Problem → Expert Sample → Task Generation → Novelty Assessment → Continue/Stop

Termination Strategy: "Seek Breakthrough"
    - Continue until novelty > threshold
    - Find the first truly novel task and stop

Research Foundation:
    - Novelty Search (Lehman & Stanley): Reward novelty, not objectives
    - Curiosity-driven Exploration (Pathak et al.): Intrinsic motivation via prediction error
    - Quality-Diversity (MAP-Elites): Maintain diverse high-quality solutions
"""

import asyncio
import json
import logging
import random
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Callable, List, Optional

import httpx
import numpy as np

from .novelty_metrics import NoveltyMetrics, NoveltyScore, NoveltyTrajectory

logger = logging.getLogger(__name__)


# ============================================================================
# Data Classes
# ============================================================================

@dataclass
class GeneratedTask:
    """A single generated task with metadata."""
    task: str
    expert: str
    expert_domain: str
    novelty_score: float
    iteration: int
    is_breakthrough: bool = False
    embedding: Optional[np.ndarray] = None


@dataclass
class TaskGenerationResult:
    """Result of a complete novelty-driven task generation session."""
    seed_problem: str
    breakthrough_task: Optional[GeneratedTask] = None
    trajectory: List[GeneratedTask] = field(default_factory=list)
    total_iterations: int = 0
    terminated_by: str = "unknown"  # "breakthrough", "max_iterations", "error"
    novelty_trajectory: Optional[NoveltyTrajectory] = None
    start_time: Optional[str] = None
    end_time: Optional[str] = None
    config: dict = field(default_factory=dict)

    def to_dict(self) -> dict:
        """Convert to dictionary for JSON serialization."""
        return {
            "seed_problem": self.seed_problem,
            "breakthrough_task": {
                "task": self.breakthrough_task.task,
                "expert": self.breakthrough_task.expert,
                "expert_domain": self.breakthrough_task.expert_domain,
                "novelty_score": self.breakthrough_task.novelty_score,
                "iteration": self.breakthrough_task.iteration
            } if self.breakthrough_task else None,
            "trajectory": [
                {
                    "task": t.task,
                    "expert": t.expert,
                    "expert_domain": t.expert_domain,
                    "novelty_score": t.novelty_score,
                    "iteration": t.iteration,
                    "is_breakthrough": t.is_breakthrough
                }
                for t in self.trajectory
            ],
            "total_iterations": self.total_iterations,
            "terminated_by": self.terminated_by,
            "novelty_stats": {
                "mean_novelty": self.novelty_trajectory.mean_novelty if self.novelty_trajectory else 0,
                "max_novelty": self.novelty_trajectory.max_novelty if self.novelty_trajectory else 0,
                "jump_ratio": self.novelty_trajectory.jump_ratio if self.novelty_trajectory else 0,
                "cumulative_novelty": self.novelty_trajectory.final_cumulative_novelty if self.novelty_trajectory else 0
            },
            "start_time": self.start_time,
            "end_time": self.end_time,
            "config": self.config
        }


# ============================================================================
# Expert/Domain Providers
# ============================================================================

class ExpertProvider:
    """Provides random experts from curated occupation lists."""

    def __init__(self, data_dir: Optional[Path] = None, language: str = "en"):
        """
        Args:
            data_dir: Path to data directory containing occupation JSON files
            language: Language code ("en" or "zh")
        """
        if data_dir is None:
            # Default to backend data directory
            data_dir = Path(__file__).parent.parent.parent / "backend" / "app" / "data"

        self.data_dir = data_dir
        self.language = language
        self._occupations: List[dict] = []
        self._load_occupations()

    def _load_occupations(self):
        """Load occupations from JSON file."""
        file_path = self.data_dir / f"curated_occupations_{self.language}.json"

        if not file_path.exists():
            logger.warning(f"Occupation file not found: {file_path}")
            # Fallback to some default experts
            self._occupations = [
                {"name": "Marine Biologist", "domain": "Science"},
                {"name": "Choreographer", "domain": "Arts"},
                {"name": "Urban Planner", "domain": "Architecture"},
                {"name": "Chef", "domain": "Culinary"},
                {"name": "Astronomer", "domain": "Science"},
            ]
            return

        try:
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            self._occupations = data.get("occupations", [])
            logger.info(f"Loaded {len(self._occupations)} occupations from {file_path.name}")
        except Exception as e:
            logger.error(f"Error loading occupations: {e}")
            self._occupations = []

    def get_random_expert(self) -> dict:
        """Get a random expert with name and domain."""
        if not self._occupations:
            return {"name": "Expert", "domain": "General"}
        return random.choice(self._occupations)

    def get_random_experts(self, count: int) -> List[dict]:
        """Get multiple random experts without replacement."""
        if len(self._occupations) <= count:
            return self._occupations.copy()
        return random.sample(self._occupations, count)


class DomainProvider:
    """Provides random knowledge domains from DDC classification."""

    def __init__(self, data_dir: Optional[Path] = None, language: str = "en"):
        if data_dir is None:
            data_dir = Path(__file__).parent.parent.parent / "backend" / "app" / "data"

        self.data_dir = data_dir
        self.language = language
        self._domains: List[dict] = []
        self._load_domains()

    def _load_domains(self):
        """Load domains from JSON file."""
        file_path = self.data_dir / f"ddc_domains_{self.language}.json"

        if not file_path.exists():
            logger.warning(f"Domain file not found: {file_path}")
            self._domains = []
            return

        try:
            with open(file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            self._domains = data.get("domains", [])
            logger.info(f"Loaded {len(self._domains)} domains from {file_path.name}")
        except Exception as e:
            logger.error(f"Error loading domains: {e}")

    def get_random_domain(self, level: Optional[str] = None) -> dict:
        """Get a random domain, optionally filtered by level."""
        domains = self._domains
        if level:
            domains = [d for d in domains if d.get("level") == level]

        if not domains:
            return {"name": "General Knowledge", "code": "000"}
        return random.choice(domains)


# ============================================================================
# Novelty-Driven Task Agent
# ============================================================================

class NoveltyDrivenTaskAgent:
    """
    An autonomous agent that generates tasks using novelty as the termination condition.

    The agent operates in a loop:
    1. Sample a random expert perspective
    2. Generate a task from that expert's viewpoint
    3. Compute the task's novelty (distance from centroid of previous tasks)
    4. If novelty > threshold → STOP (found breakthrough!)
    5. Otherwise → Continue with next expert

    Example:
        agent = NoveltyDrivenTaskAgent(novelty_threshold=0.4)
        result = await agent.run("Improve urban transportation")

        # result.breakthrough_task contains the novel task found
        # result.trajectory shows the exploration path
    """

    def __init__(
        self,
        novelty_threshold: float = 0.4,
        max_iterations: int = 20,
        ollama_base_url: str = "http://localhost:11435",
        llm_model: str = "qwen3:8b",
        embedding_model: str = "qwen3-embedding:4b",
        language: str = "en",
        data_dir: Optional[Path] = None,
        on_iteration: Optional[Callable[[GeneratedTask], None]] = None,
        temperature: float = 0.7
    ):
        """
        Args:
            novelty_threshold: Novelty score threshold for breakthrough (0.0-1.0)
            max_iterations: Maximum iterations before stopping
            ollama_base_url: Ollama API endpoint
            llm_model: Model for task generation
            embedding_model: Model for embeddings
            language: Language for prompts and experts ("en" or "zh")
            data_dir: Path to data directory for expert/domain files
            on_iteration: Callback function called after each iteration
            temperature: LLM temperature for generation
        """
        self.novelty_threshold = novelty_threshold
        self.max_iterations = max_iterations
        self.ollama_base_url = ollama_base_url
        self.llm_model = llm_model
        self.embedding_model = embedding_model
        self.language = language
        self.temperature = temperature
        self.on_iteration = on_iteration

        # Initialize providers
        self.expert_provider = ExpertProvider(data_dir, language)
        self.domain_provider = DomainProvider(data_dir, language)

        # Initialize novelty metrics
        self.novelty_metrics = NoveltyMetrics(
            similarity_threshold=0.7,
            jump_detection_enabled=True
        )

        # HTTP client
        self._client: Optional[httpx.AsyncClient] = None

    async def _get_client(self) -> httpx.AsyncClient:
        """Get or create HTTP client."""
        if self._client is None:
            self._client = httpx.AsyncClient(timeout=120.0)
        return self._client

    async def close(self):
        """Close HTTP client."""
        if self._client is not None:
            await self._client.aclose()
            self._client = None

    async def _generate_text(self, prompt: str) -> str:
        """Generate text using Ollama LLM."""
        client = await self._get_client()
        url = f"{self.ollama_base_url}/api/generate"

        # Add /no_think prefix for qwen models to disable thinking
        if self.llm_model.lower().startswith("qwen"):
            prompt = f"/no_think\n{prompt}"

        try:
            response = await client.post(url, json={
                "model": self.llm_model,
                "prompt": prompt,
                "stream": False,
                "options": {
                    "temperature": self.temperature
                }
            })
            response.raise_for_status()
            result = response.json()
            return result.get("response", "").strip()
        except Exception as e:
            logger.error(f"LLM generation error: {e}")
            raise

    async def _get_embedding(self, text: str) -> np.ndarray:
        """Get embedding vector for text."""
        client = await self._get_client()
        url = f"{self.ollama_base_url}/api/embed"

        try:
            response = await client.post(url, json={
                "model": self.embedding_model,
                "input": text
            })
            response.raise_for_status()
            result = response.json()
            return np.array(result["embeddings"][0])
        except Exception as e:
            logger.error(f"Embedding error: {e}")
            raise

    def _build_task_prompt(
        self,
        seed_problem: str,
        expert: dict,
        previous_tasks: List[str]
    ) -> str:
        """Build the prompt for task generation."""
        expert_name = expert.get("name", "Expert")
        expert_domain = expert.get("domain", "General")

        # Build context from previous tasks (if any)
        context = ""
        if previous_tasks:
            recent = previous_tasks[-3:]  # Last 3 tasks
            context = "\n\nPrevious suggestions (generate something DIFFERENT):\n"
            for t in recent:
                context += f"- {t}\n"

        if self.language == "zh":
            prompt = f"""你是一位 {expert_name}（{expert_domain}）。

给定问题：{seed_problem}

请从你的专业角度出发，提出一个独特的改进任务或探索方向。
这个任务应该结合你的专业知识，提供一个非传统但有价值的视角。
{context}
请直接给出任务描述，不要添加解释。任务应该具体、可行、且与众不同。

任务："""
        else:
            prompt = f"""You are a {expert_name} ({expert_domain}).

Given problem: {seed_problem}

From your professional perspective, propose a unique task or exploration direction to improve or innovate on this problem.
The task should leverage your domain expertise to provide an unconventional but valuable angle.
{context}
Provide just the task description without explanation. The task should be specific, actionable, and distinctive.

Task:"""

        return prompt

    async def _generate_task(
        self,
        seed_problem: str,
        expert: dict,
        previous_tasks: List[str]
    ) -> str:
        """Generate a task from an expert's perspective."""
        prompt = self._build_task_prompt(seed_problem, expert, previous_tasks)
        task = await self._generate_text(prompt)

        # Clean up the response
        task = task.strip()
        # Remove common prefixes
        for prefix in ["Task:", "任务:", "Here's", "I suggest", "Based on"]:
            if task.lower().startswith(prefix.lower()):
                task = task[len(prefix):].strip()

        return task

    async def run(
        self,
        seed_problem: str,
        used_experts: Optional[List[dict]] = None
    ) -> TaskGenerationResult:
        """
        Run the novelty-driven task generation loop.

        Args:
            seed_problem: The initial problem/challenge to explore
            used_experts: Optional list of experts to avoid (for multi-run scenarios)

        Returns:
            TaskGenerationResult with breakthrough task (if found) and full trajectory
        """
        # Reset state
        self.novelty_metrics.reset()

        result = TaskGenerationResult(
            seed_problem=seed_problem,
            start_time=datetime.now(timezone.utc).isoformat(),
            config={
                "novelty_threshold": self.novelty_threshold,
                "max_iterations": self.max_iterations,
                "llm_model": self.llm_model,
                "embedding_model": self.embedding_model,
                "language": self.language
            }
        )

        used_expert_names = set()
        if used_experts:
            used_expert_names = {e["name"] for e in used_experts}

        previous_tasks: List[str] = []

        logger.info(f"Starting novelty loop: '{seed_problem}' (threshold={self.novelty_threshold})")

        try:
            for iteration in range(self.max_iterations):
                # 1. Sample a random expert (avoid duplicates)
                attempts = 0
                expert = self.expert_provider.get_random_expert()
                while expert["name"] in used_expert_names and attempts < 10:
                    expert = self.expert_provider.get_random_expert()
                    attempts += 1
                used_expert_names.add(expert["name"])

                logger.info(f"Iteration {iteration + 1}: Expert = {expert['name']} ({expert['domain']})")

                # 2. Generate task
                task = await self._generate_task(seed_problem, expert, previous_tasks)
                previous_tasks.append(task)

                # 3. Get embedding
                embedding = await self._get_embedding(task)

                # 4. Compute novelty
                novelty = self.novelty_metrics.compute_novelty(embedding)
                self.novelty_metrics.add_embedding(embedding, novelty)

                # 5. Create task record
                generated_task = GeneratedTask(
                    task=task,
                    expert=expert["name"],
                    expert_domain=expert["domain"],
                    novelty_score=novelty.score,
                    iteration=iteration + 1,
                    is_breakthrough=novelty.score > self.novelty_threshold,
                    embedding=embedding
                )
                result.trajectory.append(generated_task)

                logger.info(f"  Task: {task[:80]}...")
                logger.info(f"  Novelty: {novelty.score:.4f} (threshold: {self.novelty_threshold})")

                # Callback
                if self.on_iteration:
                    self.on_iteration(generated_task)

                # 6. Check for breakthrough
                if novelty.score > self.novelty_threshold:
                    result.breakthrough_task = generated_task
                    result.terminated_by = "breakthrough"
                    result.total_iterations = iteration + 1
                    logger.info(f"  BREAKTHROUGH! Stopping after {iteration + 1} iterations")
                    break

            else:
                # Max iterations reached without breakthrough
                result.terminated_by = "max_iterations"
                result.total_iterations = self.max_iterations
                logger.info(f"Max iterations ({self.max_iterations}) reached without breakthrough")

                # Find the most novel task as a fallback
                if result.trajectory:
                    best_task = max(result.trajectory, key=lambda t: t.novelty_score)
                    best_task.is_breakthrough = True  # Mark as best found
                    result.breakthrough_task = best_task

        except Exception as e:
            logger.error(f"Error during generation: {e}")
            result.terminated_by = f"error: {str(e)}"
            result.total_iterations = len(result.trajectory)

        # Finalize
        result.end_time = datetime.now(timezone.utc).isoformat()
        result.novelty_trajectory = self.novelty_metrics.trajectory

        return result


# ============================================================================
# Alternative Termination Strategies
# ============================================================================

class ExhaustFrontierAgent(NoveltyDrivenTaskAgent):
    """
    Alternative strategy: Continue while novelty is high, stop when it drops.

    This explores the "novelty frontier" more thoroughly, finding multiple novel
    ideas before stopping when exploration becomes repetitive.
    """

    def __init__(
        self,
        exhaustion_threshold: float = 0.15,
        window_size: int = 3,
        min_iterations: int = 5,
        **kwargs
    ):
        """
        Args:
            exhaustion_threshold: Stop when recent average novelty drops below this
            window_size: Number of recent iterations to average
            min_iterations: Minimum iterations before checking exhaustion
            **kwargs: Passed to parent class
        """
        super().__init__(**kwargs)
        self.exhaustion_threshold = exhaustion_threshold
        self.window_size = window_size
        self.min_iterations = min_iterations

    async def run(self, seed_problem: str, **kwargs) -> TaskGenerationResult:
        """Override to use exhaustion-based termination."""
        # Reset state
        self.novelty_metrics.reset()

        result = TaskGenerationResult(
            seed_problem=seed_problem,
            start_time=datetime.now(timezone.utc).isoformat(),
            config={
                "strategy": "exhaust_frontier",
                "exhaustion_threshold": self.exhaustion_threshold,
                "window_size": self.window_size,
                "min_iterations": self.min_iterations,
                "max_iterations": self.max_iterations,
                "llm_model": self.llm_model
            }
        )

        used_expert_names = set()
        previous_tasks: List[str] = []
        novelty_history: List[float] = []

        try:
            for iteration in range(self.max_iterations):
                # Sample expert
                expert = self.expert_provider.get_random_expert()
                while expert["name"] in used_expert_names and len(used_expert_names) < 200:
                    expert = self.expert_provider.get_random_expert()
                used_expert_names.add(expert["name"])

                # Generate and evaluate
                task = await self._generate_task(seed_problem, expert, previous_tasks)
                previous_tasks.append(task)
                embedding = await self._get_embedding(task)
                novelty = self.novelty_metrics.compute_novelty(embedding)
                self.novelty_metrics.add_embedding(embedding, novelty)

                novelty_history.append(novelty.score)

                generated_task = GeneratedTask(
                    task=task,
                    expert=expert["name"],
                    expert_domain=expert["domain"],
                    novelty_score=novelty.score,
                    iteration=iteration + 1
                )
                result.trajectory.append(generated_task)

                if self.on_iteration:
                    self.on_iteration(generated_task)

                # Check exhaustion condition
                if iteration >= self.min_iterations:
                    recent_avg = np.mean(novelty_history[-self.window_size:])
                    if recent_avg < self.exhaustion_threshold:
                        result.terminated_by = f"exhaustion (avg={recent_avg:.3f})"
                        result.total_iterations = iteration + 1
                        break

            else:
                result.terminated_by = "max_iterations"
                result.total_iterations = self.max_iterations

            # Find all "novel" tasks
            novel_tasks = [t for t in result.trajectory if t.novelty_score > self.exhaustion_threshold]
            if novel_tasks:
                result.breakthrough_task = max(novel_tasks, key=lambda t: t.novelty_score)
                result.breakthrough_task.is_breakthrough = True

        except Exception as e:
            result.terminated_by = f"error: {str(e)}"
            result.total_iterations = len(result.trajectory)

        result.end_time = datetime.now(timezone.utc).isoformat()
        result.novelty_trajectory = self.novelty_metrics.trajectory

        return result


class CoverageTargetAgent(NoveltyDrivenTaskAgent):
    """
    Alternative strategy: Continue until N distinct clusters are covered.

    This ensures a diverse portfolio of ideas across different conceptual areas.
    """

    def __init__(
        self,
        target_clusters: int = 5,
        cluster_threshold: float = 0.7,
        **kwargs
    ):
        """
        Args:
            target_clusters: Target number of distinct clusters to find
            cluster_threshold: Similarity threshold for cluster membership
            **kwargs: Passed to parent class
        """
        super().__init__(**kwargs)
        self.target_clusters = target_clusters
        self.cluster_threshold = cluster_threshold

    def _count_clusters(self, embeddings: List[np.ndarray]) -> int:
        """Count distinct clusters using greedy clustering."""
        if not embeddings:
            return 0

        clusters = []
        for emb in embeddings:
            found_cluster = False
            for cluster_centroid in clusters:
                similarity = NoveltyMetrics.cosine_similarity(emb, cluster_centroid)
                if similarity >= self.cluster_threshold:
                    found_cluster = True
                    break

            if not found_cluster:
                clusters.append(emb)

        return len(clusters)

    async def run(self, seed_problem: str, **kwargs) -> TaskGenerationResult:
        """Override to use coverage-based termination."""
        self.novelty_metrics.reset()

        result = TaskGenerationResult(
            seed_problem=seed_problem,
            start_time=datetime.now(timezone.utc).isoformat(),
            config={
                "strategy": "coverage_target",
                "target_clusters": self.target_clusters,
                "cluster_threshold": self.cluster_threshold,
                "max_iterations": self.max_iterations
            }
        )

        used_expert_names = set()
        previous_tasks: List[str] = []
        all_embeddings: List[np.ndarray] = []

        try:
            for iteration in range(self.max_iterations):
                expert = self.expert_provider.get_random_expert()
                while expert["name"] in used_expert_names and len(used_expert_names) < 200:
                    expert = self.expert_provider.get_random_expert()
                used_expert_names.add(expert["name"])

                task = await self._generate_task(seed_problem, expert, previous_tasks)
                previous_tasks.append(task)
                embedding = await self._get_embedding(task)
                all_embeddings.append(embedding)

                novelty = self.novelty_metrics.compute_novelty(embedding)
                self.novelty_metrics.add_embedding(embedding, novelty)

                generated_task = GeneratedTask(
                    task=task,
                    expert=expert["name"],
                    expert_domain=expert["domain"],
                    novelty_score=novelty.score,
                    iteration=iteration + 1
                )
                result.trajectory.append(generated_task)

                if self.on_iteration:
                    self.on_iteration(generated_task)

                # Check coverage
                cluster_count = self._count_clusters(all_embeddings)
                if cluster_count >= self.target_clusters:
                    result.terminated_by = f"coverage ({cluster_count} clusters)"
                    result.total_iterations = iteration + 1
                    break

            else:
                final_clusters = self._count_clusters(all_embeddings)
                result.terminated_by = f"max_iterations ({final_clusters} clusters)"
                result.total_iterations = self.max_iterations

            # Find most novel task
            if result.trajectory:
                best_task = max(result.trajectory, key=lambda t: t.novelty_score)
                best_task.is_breakthrough = True
                result.breakthrough_task = best_task

        except Exception as e:
            result.terminated_by = f"error: {str(e)}"
            result.total_iterations = len(result.trajectory)

        result.end_time = datetime.now(timezone.utc).isoformat()
        result.novelty_trajectory = self.novelty_metrics.trajectory

        return result