Files
novelty-seeking/experiments/conditions/c4_full_pipeline.py
gbanyan 43c025e060 feat: Add experiments framework and novelty-driven agent loop
- Add complete experiments directory with pilot study infrastructure
  - 5 experimental conditions (direct, expert-only, attribute-only, full-pipeline, random-perspective)
  - Human assessment tool with React frontend and FastAPI backend
  - AUT flexibility analysis with jump signal detection
  - Result visualization and metrics computation

- Add novelty-driven agent loop module (experiments/novelty_loop/)
  - NoveltyDrivenTaskAgent with expert perspective perturbation
  - Three termination strategies: breakthrough, exhaust, coverage
  - Interactive CLI demo with colored output
  - Embedding-based novelty scoring

- Add DDC knowledge domain classification data (en/zh)
- Add CLAUDE.md project documentation
- Update research report with experiment findings

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-20 10:16:21 +08:00

215 lines
7.0 KiB
Python

"""
Condition 4: Full Pipeline (Attributes + Experts)
The complete novelty-seeking system:
1. Attribute decomposition into categories
2. Expert team generation
3. Expert keyword generation for each attribute
4. Description generation for each keyword
"""
import sys
from pathlib import Path
# Add backend to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "backend"))
from typing import List, Dict, Any
from app.services.llm_service import ollama_provider, extract_json_from_response
from app.services.expert_source_service import expert_source_service
from app.prompts.attribute_prompt import get_step1_dynamic_attributes_prompt
from app.prompts.expert_transformation_prompt import (
get_expert_keyword_generation_prompt,
get_single_description_prompt
)
from experiments.config import (
MODEL, TEMPERATURE, FIXED_CATEGORIES, EXPERT_COUNT,
EXPERT_SOURCE, KEYWORDS_PER_EXPERT, PROMPT_LANGUAGE
)
async def generate_ideas(
query: str,
model: str = None,
temperature: float = None,
categories: List[str] = None,
expert_count: int = None,
expert_source: str = None,
keywords_per_expert: int = None,
lang: str = None
) -> Dict[str, Any]:
"""
Generate ideas using the full pipeline (C4).
Args:
query: The object/concept to generate ideas for
model: LLM model to use
temperature: Generation temperature
categories: Categories for attribute decomposition
expert_count: Number of experts
expert_source: Source of experts
keywords_per_expert: Keywords each expert generates per attribute
lang: Language for prompts
Returns:
Dict with ideas and metadata
"""
model = model or MODEL
temperature = temperature or TEMPERATURE
categories = categories or FIXED_CATEGORIES
expert_count = expert_count or EXPERT_COUNT
expert_source = expert_source or EXPERT_SOURCE
keywords_per_expert = keywords_per_expert or KEYWORDS_PER_EXPERT
lang = lang or PROMPT_LANGUAGE
# Step 0: Get experts from curated source
experts_data, actual_source = expert_source_service.get_experts(
source=expert_source,
count=expert_count,
language=lang
)
# Convert to expected format
experts = [
{
"id": f"expert-{i}",
"name": exp.get("name", "Expert"),
"domain": exp.get("domain", ""),
"perspective": exp.get("perspective", "")
}
for i, exp in enumerate(experts_data)
]
# Step 1: Generate attributes
category_defs = [
{"name": cat, "description": f"Related {cat.lower()} of the object", "order": i}
for i, cat in enumerate(categories)
]
attr_prompt = get_step1_dynamic_attributes_prompt(
query=query,
categories=category_defs,
lang=lang
)
attr_response = await ollama_provider.generate(
prompt=attr_prompt,
model=model,
temperature=temperature
)
attributes_by_category = extract_json_from_response(attr_response)
# Step 2: Expert keyword generation for each category/attribute
all_keywords = []
for category in categories:
attrs = attributes_by_category.get(category, [])
for attr in attrs:
# Generate keywords from all experts for this attribute
keyword_prompt = get_expert_keyword_generation_prompt(
category=category,
attribute=attr,
experts=experts,
keywords_per_expert=keywords_per_expert,
lang=lang
)
keyword_response = await ollama_provider.generate(
prompt=keyword_prompt,
model=model,
temperature=temperature
)
keyword_result = extract_json_from_response(keyword_response)
keywords = keyword_result.get("keywords", [])
for kw in keywords:
all_keywords.append({
"category": category,
"attribute": attr,
"keyword": kw.get("keyword", ""),
"expert_id": kw.get("expert_id", ""),
"expert_name": kw.get("expert_name", "")
})
# Step 3: Generate descriptions for each keyword
all_ideas = []
for kw_info in all_keywords:
# Find expert details
expert = next(
(e for e in experts if e["id"] == kw_info["expert_id"]),
{"name": kw_info["expert_name"], "domain": "", "id": kw_info["expert_id"]}
)
desc_prompt = get_single_description_prompt(
query=query,
keyword=kw_info["keyword"],
expert_id=expert["id"],
expert_name=expert["name"],
expert_domain=expert.get("domain", ""),
lang=lang
)
desc_response = await ollama_provider.generate(
prompt=desc_prompt,
model=model,
temperature=temperature
)
desc_result = extract_json_from_response(desc_response)
description = desc_result.get("description", "")
all_ideas.append({
"idea": description,
"keyword": kw_info["keyword"],
"category": kw_info["category"],
"attribute": kw_info["attribute"],
"expert_name": expert["name"],
"expert_domain": expert.get("domain", "")
})
return {
"condition": "c4_full_pipeline",
"query": query,
"ideas": [item["idea"] for item in all_ideas],
"ideas_with_source": all_ideas,
"idea_count": len(all_ideas),
"metadata": {
"model": model,
"temperature": temperature,
"prompt_language": lang,
"categories": categories,
"attributes_by_category": attributes_by_category,
"attribute_count": sum(len(v) for v in attributes_by_category.values()),
"expert_count": expert_count,
"expert_source": actual_source,
"keywords_per_expert": keywords_per_expert,
"total_keywords": len(all_keywords),
"experts": [{"name": e["name"], "domain": e["domain"]} for e in experts],
"mechanism": "full_pipeline_attributes_plus_experts"
}
}
# For testing
if __name__ == "__main__":
import asyncio
async def test():
result = await generate_ideas("Chair")
print(f"Generated {result['idea_count']} ideas using full pipeline:")
print(f" Attributes: {result['metadata']['attribute_count']}")
print(f" Experts: {result['metadata']['expert_count']}")
print(f" Keywords: {result['metadata']['total_keywords']}")
print("\nExperts used:")
for exp in result['metadata']['experts']:
print(f" - {exp['name']} ({exp['domain']})")
print("\nSample ideas:")
for i, item in enumerate(result['ideas_with_source'][:5], 1):
print(f" {i}. [{item['expert_name']}] {item['keyword']}: {item['idea']}")
asyncio.run(test())