chore: save local changes

This commit is contained in:
2026-01-05 22:32:08 +08:00
parent bc281b8e0a
commit ec48709755
42 changed files with 5576 additions and 254 deletions

View File

@@ -3,10 +3,11 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .routers import attributes, transformation, expert_transformation, deduplication
from .routers import attributes, transformation, expert_transformation, deduplication, patent_search
from .services.llm_service import ollama_provider
from .services.embedding_service import embedding_service
from .services.llm_deduplication_service import llm_deduplication_service
from .services.patent_search_service import patent_search_service
@asynccontextmanager
@@ -15,6 +16,7 @@ async def lifespan(app: FastAPI):
await ollama_provider.close()
await embedding_service.close()
await llm_deduplication_service.close()
await patent_search_service.close()
app = FastAPI(
@@ -36,6 +38,7 @@ app.include_router(attributes.router)
app.include_router(transformation.router)
app.include_router(expert_transformation.router)
app.include_router(deduplication.router)
app.include_router(patent_search.router)
@app.get("/")

View File

@@ -1,7 +1,10 @@
from pydantic import BaseModel
from typing import Optional, List, Dict
from typing import Optional, List, Dict, Literal
from enum import Enum
# Language type for prompts
LanguageType = Literal["zh", "en"]
class AttributeNode(BaseModel):
name: str
@@ -47,16 +50,19 @@ class CausalChain(BaseModel):
class StreamAnalyzeRequest(BaseModel):
"""多步驟分析請求(更新為支持動態類別)"""
"""Multi-step analysis request (updated to support dynamic categories)"""
query: str
model: Optional[str] = None
temperature: Optional[float] = 0.7
chain_count: int = 5 # 用戶可設定要生成多少條因果鏈
chain_count: int = 5 # User can set how many causal chains to generate
# 新增:動態類別支持
category_mode: Optional[str] = "dynamic_auto" # CategoryMode enum
# Dynamic category support
category_mode: Optional[str] = "dynamic_auto" # CategoryMode enum value
custom_categories: Optional[List[str]] = None
suggested_category_count: int = 3 # 建議 LLM 生成的類別數量
suggested_category_count: int = 3 # Suggest LLM to generate this many categories
# Language setting
lang: LanguageType = "zh"
class StreamAnalyzeResponse(BaseModel):
@@ -136,13 +142,14 @@ class DAGRelationship(BaseModel):
# ===== Transformation Agent schemas =====
class TransformationRequest(BaseModel):
"""Transformation Agent 請求"""
query: str # 原始查詢 (e.g., "腳踏車")
category: str # 類別名稱 (e.g., "功能")
attributes: List[str] # 該類別的屬性列表
"""Transformation Agent request"""
query: str # Original query (e.g., "bicycle")
category: str # Category name (e.g., "Functions")
attributes: List[str] # Attribute list for this category
model: Optional[str] = None
temperature: Optional[float] = 0.7
keyword_count: int = 3 # 要生成的新關鍵字數量
keyword_count: int = 3 # Number of new keywords to generate
lang: LanguageType = "zh" # Language for prompts
class TransformationDescription(BaseModel):
@@ -215,24 +222,27 @@ class ExpertSource(str, Enum):
class ExpertTransformationRequest(BaseModel):
"""Expert Transformation Agent 請求"""
"""Expert Transformation Agent request"""
query: str
category: str
attributes: List[str]
# Expert parameters
expert_count: int = 3 # 專家數量 (2-8)
keywords_per_expert: int = 1 # 每個專家為每個屬性生成幾個關鍵字 (1-3)
custom_experts: Optional[List[str]] = None # 用戶指定專家 ["藥師", "工程師"]
expert_count: int = 3 # Number of experts (2-8)
keywords_per_expert: int = 1 # Keywords per expert per attribute (1-3)
custom_experts: Optional[List[str]] = None # User-specified experts
# Expert source parameters
expert_source: ExpertSource = ExpertSource.LLM # 專家來源
expert_language: str = "en" # 外部來源的語言 (目前只有英文資料)
expert_source: ExpertSource = ExpertSource.LLM # Expert source
expert_language: str = "en" # Language for external sources
# LLM parameters
model: Optional[str] = None
temperature: Optional[float] = 0.7
# Prompt language
lang: LanguageType = "zh"
# ===== Deduplication Agent schemas =====
@@ -243,11 +253,12 @@ class DeduplicationMethod(str, Enum):
class DeduplicationRequest(BaseModel):
"""去重請求"""
"""Deduplication request"""
descriptions: List[ExpertTransformationDescription]
method: DeduplicationMethod = DeduplicationMethod.EMBEDDING # 去重方法
similarity_threshold: float = 0.85 # 餘弦相似度閾值 (0.0-1.0),僅 Embedding 使用
model: Optional[str] = None # Embedding/LLM 模型
method: DeduplicationMethod = DeduplicationMethod.EMBEDDING # Deduplication method
similarity_threshold: float = 0.85 # Cosine similarity threshold (0.0-1.0), only for Embedding
model: Optional[str] = None # Embedding/LLM model
lang: LanguageType = "zh" # Prompt language (for LLM method)
class DescriptionGroup(BaseModel):

View File

@@ -1,21 +1,37 @@
from typing import List, Optional, Dict
import json
DEFAULT_CATEGORIES = ["材料", "功能", "用途", "使用族群", "特性"]
CATEGORY_DESCRIPTIONS = {
"材料": "物件由什麼材料組成",
"功能": "物件能做什麼",
"用途": "物件在什麼場景使用",
"使用族群": "誰會使用這個物件",
"特性": "物件有什麼特徵",
}
from .language_config import (
LanguageType,
DEFAULT_CATEGORIES,
CATEGORY_DESCRIPTIONS,
)
def get_attribute_prompt(query: str, categories: Optional[List[str]] = None) -> str:
def get_default_categories(lang: LanguageType = "zh") -> List[str]:
return DEFAULT_CATEGORIES.get(lang, DEFAULT_CATEGORIES["zh"])
def get_category_descriptions(lang: LanguageType = "zh") -> Dict[str, str]:
return CATEGORY_DESCRIPTIONS.get(lang, CATEGORY_DESCRIPTIONS["zh"])
def get_attribute_prompt(
query: str,
categories: Optional[List[str]] = None,
lang: LanguageType = "zh"
) -> str:
"""Generate prompt with causal chain structure."""
if lang == "en":
prompt = f"""Analyze the attributes of "{query}" in a causal chain format: Materials→Functions→Usages→User Groups.
prompt = f"""分析「{query}」的屬性,以因果鏈方式呈現:材料→功能→用途→使用族群。
List 3-5 types of materials, each extending into a complete causal chain.
JSON format:
{{"name": "{query}", "children": [{{"name": "Material Name", "category": "Materials", "children": [{{"name": "Function Name", "category": "Functions", "children": [{{"name": "Usage Name", "category": "Usages", "children": [{{"name": "User Group Name", "category": "User Groups"}}]}}]}}]}}]}}
Return JSON only."""
else:
prompt = f"""分析「{query}」的屬性,以因果鏈方式呈現:材料→功能→用途→使用族群。
請列出 3-5 種材料,每種材料延伸出完整因果鏈。
@@ -27,9 +43,18 @@ JSON 格式:
return prompt
def get_step1_attributes_prompt(query: str) -> str:
"""Step 1: 生成各類別的屬性列表(平行結構)"""
return f"""/no_think
def get_step1_attributes_prompt(query: str, lang: LanguageType = "zh") -> str:
"""Step 1: Generate attribute list for each category (parallel structure)"""
if lang == "en":
return f"""/no_think
Analyze "{query}" and list attributes for the following four categories. List 3-5 common attributes for each category.
Return JSON only, in the following format:
{{"materials": ["material1", "material2", "material3"], "functions": ["function1", "function2", "function3"], "usages": ["usage1", "usage2", "usage3"], "users": ["user group1", "user group2", "user group3"]}}
Object: {query}"""
else:
return f"""/no_think
分析「{query}」,列出以下四個類別的屬性。每個類別列出 3-5 個常見屬性。
只回傳 JSON格式如下
@@ -45,21 +70,48 @@ def get_step2_causal_chain_prompt(
usages: List[str],
users: List[str],
existing_chains: List[dict],
chain_index: int
chain_index: int,
lang: LanguageType = "zh"
) -> str:
"""Step 2: 生成單條因果鏈"""
"""Step 2: Generate a single causal chain"""
existing_chains_text = ""
if existing_chains:
chains_list = [
f"- {c['material']}{c['function']}{c['usage']}{c['user']}"
for c in existing_chains
]
existing_chains_text = f"""
if lang == "en":
if existing_chains:
chains_list = [
f"- {c['material']}{c['function']}{c['usage']}{c['user']}"
for c in existing_chains
]
existing_chains_text = f"""
[Already generated causal chains, do not repeat]
{chr(10).join(chains_list)}
"""
return f"""/no_think
Generate causal chain #{chain_index} for "{query}".
[Available Materials] {', '.join(materials)}
[Available Functions] {', '.join(functions)}
[Available Usages] {', '.join(usages)}
[Available User Groups] {', '.join(users)}
{existing_chains_text}
[Rules]
1. Select one attribute from each category to form a logical causal chain
2. The causal relationship must be logical (materials determine functions, functions determine usages, usages determine user groups)
3. Do not repeat existing causal chains
Return JSON only:
{{"material": "selected material", "function": "selected function", "usage": "selected usage", "user": "selected user group"}}"""
else:
if existing_chains:
chains_list = [
f"- {c['material']}{c['function']}{c['usage']}{c['user']}"
for c in existing_chains
]
existing_chains_text = f"""
【已生成的因果鏈,請勿重複】
{chr(10).join(chains_list)}
"""
return f"""/no_think
return f"""/no_think
為「{query}」生成第 {chain_index} 條因果鏈。
【可選材料】{', '.join(materials)}
@@ -76,19 +128,52 @@ def get_step2_causal_chain_prompt(
{{"material": "選擇的材料", "function": "選擇的功能", "usage": "選擇的用途", "user": "選擇的族群"}}"""
def get_flat_attribute_prompt(query: str, categories: Optional[List[str]] = None) -> str:
def get_flat_attribute_prompt(
query: str,
categories: Optional[List[str]] = None,
lang: LanguageType = "zh"
) -> str:
"""Generate prompt with flat/parallel categories (original design)."""
cats = categories if categories else DEFAULT_CATEGORIES
cats = categories if categories else get_default_categories(lang)
cat_descs = get_category_descriptions(lang)
# Build category list
category_lines = []
for cat in cats:
desc = CATEGORY_DESCRIPTIONS.get(cat, f"{cat}的相關屬性")
category_lines.append(f"- {cat}{desc}")
desc = cat_descs.get(cat, f"Related attributes of {cat}" if lang == "en" else f"{cat}的相關屬性")
category_lines.append(f"- {cat}: {desc}")
categories_text = "\n".join(category_lines)
prompt = f"""/no_think
if lang == "en":
prompt = f"""/no_think
You are an object attribute analysis expert. Please break down the user's input object into the following attribute categories.
[Required Categories]
{categories_text}
[Important] The return format must be valid JSON, and each node must have a "name" field:
```json
{{
"name": "Object Name",
"children": [
{{
"name": "Category Name",
"children": [
{{"name": "Attribute 1"}},
{{"name": "Attribute 2"}}
]
}}
]
}}
```
Return JSON only, no other text.
User input: {query}"""
else:
prompt = f"""/no_think
你是一個物件屬性分析專家。請將用戶輸入的物件拆解成以下屬性類別。
【必須包含的類別】
@@ -123,14 +208,42 @@ def get_flat_attribute_prompt(query: str, categories: Optional[List[str]] = None
def get_step0_category_analysis_prompt(
query: str,
suggested_count: int = 3,
exclude_categories: List[str] | None = None
exclude_categories: List[str] | None = None,
lang: LanguageType = "zh"
) -> str:
"""Step 0: LLM 分析建議類別"""
exclude_text = ""
if exclude_categories:
exclude_text = f"\n【禁止使用的類別】{', '.join(exclude_categories)}(這些已經是固定類別,不要重複建議)\n"
"""Step 0: LLM analyzes and suggests categories"""
return f"""/no_think
if lang == "en":
exclude_text = ""
if exclude_categories:
exclude_text = f"\n[Forbidden Categories] {', '.join(exclude_categories)} (These are already fixed categories, do not suggest duplicates)\n"
return f"""/no_think
Analyze "{query}" and suggest {suggested_count} most suitable attribute categories to describe it.
[Common Category References] Characteristics, Shape, Color, Size, Brand, Price Range, Weight, Style, Occasion, Season, Technical Specifications
{exclude_text}
[Important]
1. Choose categories that best describe the essence of this object
2. Categories should have logical relationships
3. Do not choose overly abstract or duplicate categories
4. Must suggest creative categories different from the reference list
Return JSON only:
{{
"categories": [
{{"name": "Category1", "description": "Description1", "order": 0}},
{{"name": "Category2", "description": "Description2", "order": 1}}
]
}}
Object: {query}"""
else:
exclude_text = ""
if exclude_categories:
exclude_text = f"\n【禁止使用的類別】{', '.join(exclude_categories)}(這些已經是固定類別,不要重複建議)\n"
return f"""/no_think
分析「{query}」,建議 {suggested_count} 個最適合的屬性類別來描述它。
【常見類別參考】特性、形狀、顏色、尺寸、品牌、價格區間、重量、風格、場合、季節、技術規格
@@ -154,21 +267,35 @@ def get_step0_category_analysis_prompt(
def get_step1_dynamic_attributes_prompt(
query: str,
categories: List # List[CategoryDefinition]
categories: List, # List[CategoryDefinition]
lang: LanguageType = "zh"
) -> str:
"""動態 Step 1 - 根據類別列表生成屬性"""
# 按 order 排序並構建描述
"""Dynamic Step 1 - Generate attributes based on category list"""
# Sort by order and build description
sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0))
category_desc = "\n".join([
f"- {cat.name if hasattr(cat, 'name') else cat['name']}: {cat.description if hasattr(cat, 'description') else cat.get('description', '相關屬性')}"
f"- {cat.name if hasattr(cat, 'name') else cat['name']}: {cat.description if hasattr(cat, 'description') else cat.get('description', 'Related attributes' if lang == 'en' else '相關屬性')}"
for cat in sorted_cats
])
category_keys = [cat.name if hasattr(cat, 'name') else cat['name'] for cat in sorted_cats]
json_template = {cat: ["屬性1", "屬性2", "屬性3"] for cat in category_keys}
return f"""/no_think
if lang == "en":
json_template = {cat: ["attribute1", "attribute2", "attribute3"] for cat in category_keys}
return f"""/no_think
Analyze "{query}" and list attributes for the following categories. List 3-5 common attributes for each category.
[Category List]
{category_desc}
Return JSON only:
{json.dumps(json_template, ensure_ascii=False, indent=2)}
Object: {query}"""
else:
json_template = {cat: ["屬性1", "屬性2", "屬性3"] for cat in category_keys}
return f"""/no_think
分析「{query}」,列出以下類別的屬性。每個類別列出 3-5 個常見屬性。
【類別列表】
@@ -185,30 +312,59 @@ def get_step2_dynamic_causal_chain_prompt(
categories: List, # List[CategoryDefinition]
attributes_by_category: Dict[str, List[str]],
existing_chains: List[Dict[str, str]],
chain_index: int
chain_index: int,
lang: LanguageType = "zh"
) -> str:
"""動態 Step 2 - 生成動態類別的因果鏈"""
"""Dynamic Step 2 - Generate causal chains for dynamic categories"""
sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0))
# 構建可選屬性
# Build available attributes
available_attrs = "\n".join([
f"{cat.name if hasattr(cat, 'name') else cat['name']}{', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
f"[{cat.name if hasattr(cat, 'name') else cat['name']}] {', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
for cat in sorted_cats
])
# 已生成的因果鏈
existing_text = ""
if existing_chains:
chains_list = [
"".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats])
for chain in existing_chains
]
existing_text = f"\n【已生成,請勿重複】\n" + "\n".join([f"- {c}" for c in chains_list])
if lang == "en":
# Already generated causal chains
existing_text = ""
if existing_chains:
chains_list = [
"".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats])
for chain in existing_chains
]
existing_text = "\n[Already generated, do not repeat]\n" + "\n".join([f"- {c}" for c in chains_list])
# JSON 模板
json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"選擇的{cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats}
# JSON template
json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"selected {cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats}
return f"""/no_think
return f"""/no_think
Generate causal chain #{chain_index} for "{query}".
[Available Attributes]
{available_attrs}
{existing_text}
[Rules]
1. Select one attribute from each category
2. Causal relationships must be logical
3. Do not repeat
Return JSON only:
{json.dumps(json_template, ensure_ascii=False, indent=2)}"""
else:
# 已生成的因果鏈
existing_text = ""
if existing_chains:
chains_list = [
"".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats])
for chain in existing_chains
]
existing_text = "\n【已生成,請勿重複】\n" + "\n".join([f"- {c}" for c in chains_list])
# JSON 模板
json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"選擇的{cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats}
return f"""/no_think
為「{query}」生成第 {chain_index} 條因果鏈。
【可選屬性】
@@ -230,20 +386,46 @@ def get_step2_dag_relationships_prompt(
query: str,
categories: List, # List[CategoryDefinition]
attributes_by_category: Dict[str, List[str]],
lang: LanguageType = "zh"
) -> str:
"""生成相鄰類別之間的自然關係"""
"""Generate natural relationships between adjacent categories"""
sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0))
# Build attribute listing
attr_listing = "\n".join([
f"{cat.name if hasattr(cat, 'name') else cat['name']}{', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
f"[{cat.name if hasattr(cat, 'name') else cat['name']}] {', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
for cat in sorted_cats
])
# Build direction hints
direction_hints = "".join([cat.name if hasattr(cat, 'name') else cat['name'] for cat in sorted_cats])
return f"""/no_think
if lang == "en":
return f"""/no_think
Analyze the attribute relationships of "{query}".
{attr_listing}
[Relationship Direction] {direction_hints}
[Rules]
1. Only establish relationships between adjacent categories (e.g., Materials→Functions, Functions→Usages)
2. Only output pairs that have true causal or associative relationships
3. An attribute can connect to multiple downstream attributes, or none at all
4. Not every attribute needs to have connections
5. Relationships should be reasonable and meaningful
Return JSON:
{{
"relationships": [
{{"source_category": "CategoryA", "source": "attribute name", "target_category": "CategoryB", "target": "attribute name"}},
...
]
}}
Return JSON only."""
else:
return f"""/no_think
分析「{query}」的屬性關係。
{attr_listing}

View File

@@ -1,34 +1,68 @@
"""Expert Transformation Agent 提示詞模組"""
"""Expert Transformation Agent prompts module - Bilingual support"""
from typing import List, Optional
from .language_config import LanguageType
def get_expert_generation_prompt(
query: str,
categories: List[str],
expert_count: int,
custom_experts: Optional[List[str]] = None
custom_experts: Optional[List[str]] = None,
lang: LanguageType = "zh"
) -> str:
"""Step 0: 生成專家團隊(不依賴主題,純隨機多元)"""
"""Step 0: Generate expert team (not dependent on topic, purely random and diverse)"""
import time
import random
custom_text = ""
if custom_experts and len(custom_experts) > 0:
custom_text = f"(已指定:{', '.join(custom_experts[:expert_count])}"
# 加入時間戳和隨機數來增加多樣性
# Add timestamp and random number for diversity
seed = int(time.time() * 1000) % 10000
diversity_hints = [
"冷門、非主流、跨領域",
"罕見職業、新興領域、邊緣學科",
"非傳統、創新、小眾專業",
"未來趨向、實驗性、非常規",
"跨文化、混合領域、獨特視角"
]
hint = random.choice(diversity_hints)
return f"""/no_think
if lang == "en":
custom_text = ""
if custom_experts and len(custom_experts) > 0:
custom_text = f" (Specified: {', '.join(custom_experts[:expert_count])})"
diversity_hints = [
"obscure, non-mainstream, cross-disciplinary",
"rare occupations, emerging fields, fringe disciplines",
"unconventional, innovative, niche specialties",
"future-oriented, experimental, non-traditional",
"cross-cultural, hybrid fields, unique perspectives"
]
hint = random.choice(diversity_hints)
return f"""/no_think
Randomly assemble a team of {expert_count} experts from completely different fields{custom_text}.
[Innovation Requirements] (Random seed: {seed})
- Prioritize {hint} experts
- Avoid common professions (such as doctors, engineers, teachers, lawyers, etc.)
- Each expert must be from a completely unrelated field
- The rarer and more innovative, the better
Return JSON:
{{"experts": [{{"id": "expert-0", "name": "profession", "domain": "field", "perspective": "viewpoint"}}, ...]}}
Rules:
- id should be expert-0 to expert-{expert_count - 1}
- name is the profession name (not a person's name), 2-5 words
- domain should be specific and unique, no duplicate types"""
else:
custom_text = ""
if custom_experts and len(custom_experts) > 0:
custom_text = f"(已指定:{', '.join(custom_experts[:expert_count])}"
diversity_hints = [
"冷門、非主流、跨領域",
"罕見職業、新興領域、邊緣學科",
"非傳統、創新、小眾專業",
"未來趨向、實驗性、非常規",
"跨文化、混合領域、獨特視角"
]
hint = random.choice(diversity_hints)
return f"""/no_think
隨機組建 {expert_count} 個來自完全不同領域的專家團隊{custom_text}
【創新要求】(隨機種子:{seed}
@@ -50,13 +84,39 @@ def get_expert_keyword_generation_prompt(
category: str,
attribute: str,
experts: List[dict], # List[ExpertProfile]
keywords_per_expert: int = 1
keywords_per_expert: int = 1,
lang: LanguageType = "zh"
) -> str:
"""Step 1: 專家視角關鍵字生成"""
# 建立專家列表,格式更清晰
"""Step 1: Expert perspective keyword generation"""
# Build expert list in clearer format
experts_list = "\n".join([f"- {exp['id']}: {exp['name']}" for exp in experts])
return f"""/no_think
if lang == "en":
return f"""/no_think
You need to play the role of the following experts to generate innovative keywords for an attribute:
[Expert List]
{experts_list}
[Task]
Attribute: "{attribute}" (Category: {category})
For each expert, please:
1. First understand the professional background, knowledge domain, and work content of that profession
2. Think about "{attribute}" from that profession's unique perspective
3. Generate {keywords_per_expert} innovative keyword(s) related to that specialty (2-6 words)
Keywords must reflect that expert's professional thinking style, for example:
- Accountant viewing "movement""cash flow", "cost-benefit"
- Architect viewing "movement""circulation design", "spatial flow"
- Psychologist viewing "movement""behavioral motivation", "emotional transition"
Return JSON:
{{"keywords": [{{"keyword": "term", "expert_id": "expert-X", "expert_name": "name"}}, ...]}}
Total of {len(experts) * keywords_per_expert} keywords needed, each keyword must be clearly related to the corresponding expert's professional field."""
else:
return f"""/no_think
你需要扮演以下專家,為屬性生成創新關鍵字:
【專家名單】
@@ -86,13 +146,29 @@ def get_single_description_prompt(
keyword: str,
expert_id: str,
expert_name: str,
expert_domain: str
expert_domain: str,
lang: LanguageType = "zh"
) -> str:
"""Step 2: 為單一關鍵字生成描述"""
# 如果 domain 是通用的,就只用職業名稱
domain_text = f"{expert_domain}領域)" if expert_domain and expert_domain != "Professional Field" else ""
"""Step 2: Generate description for a single keyword"""
if lang == "en":
# If domain is generic, just use profession name
domain_text = f" ({expert_domain} field)" if expert_domain and expert_domain != "Professional Field" else ""
return f"""/no_think
return f"""/no_think
You are a {expert_name}{domain_text}.
Task: Generate an innovative application description for "{query}".
Keyword: {keyword}
From your professional perspective, explain how to apply the concept of "{keyword}" to "{query}". The description should be specific, creative, 15-30 words.
Return JSON only, no other text:
{{"description": "your innovative application description"}}"""
else:
# 如果 domain 是通用的,就只用職業名稱
domain_text = f"{expert_domain}領域)" if expert_domain and expert_domain != "Professional Field" else ""
return f"""/no_think
你是一位{expert_name}{domain_text}
任務:為「{query}」生成一段創新應用描述。

View File

@@ -0,0 +1,51 @@
"""Language configuration for prompts"""
from enum import Enum
from typing import Literal
class Language(str, Enum):
CHINESE = "zh"
ENGLISH = "en"
LanguageType = Literal["zh", "en"]
# Default categories for each language
DEFAULT_CATEGORIES = {
"zh": ["材料", "功能", "用途", "使用族群", "特性"],
"en": ["Materials", "Functions", "Usages", "User Groups", "Characteristics"],
}
CATEGORY_DESCRIPTIONS = {
"zh": {
"材料": "物件由什麼材料組成",
"功能": "物件能做什麼",
"用途": "物件在什麼場景使用",
"使用族群": "誰會使用這個物件",
"特性": "物件有什麼特徵",
},
"en": {
"Materials": "What materials the object is made of",
"Functions": "What the object can do",
"Usages": "In what scenarios the object is used",
"User Groups": "Who uses this object",
"Characteristics": "What features the object has",
},
}
# Category name mappings between languages
CATEGORY_MAPPING = {
"zh_to_en": {
"材料": "Materials",
"功能": "Functions",
"用途": "Usages",
"使用族群": "User Groups",
"特性": "Characteristics",
},
"en_to_zh": {
"Materials": "材料",
"Functions": "功能",
"Usages": "用途",
"User Groups": "使用族群",
"Characteristics": "特性",
},
}

View File

@@ -1,22 +1,43 @@
"""Transformation Agent 提示詞模組"""
"""Transformation Agent prompts module - Bilingual support"""
from typing import List
from .language_config import LanguageType
def get_keyword_generation_prompt(
category: str,
attributes: List[str],
keyword_count: int = 3
keyword_count: int = 3,
lang: LanguageType = "zh"
) -> str:
"""
Step 1: 生成新關鍵字
Step 1: Generate new keywords
給定類別和現有屬性,生成全新的、有創意的關鍵字。
不考慮原始查詢,只專注於類別本身可能的延伸。
Given a category and existing attributes, generate new, creative keywords.
Don't consider the original query, focus only on possible extensions of the category itself.
"""
attrs_text = "".join(attributes)
attrs_text = ", ".join(attributes) if lang == "en" else "".join(attributes)
return f"""/no_think
if lang == "en":
return f"""/no_think
You are a creative brainstorming expert. Given a category and its existing attributes, please generate new, creative keywords or descriptive phrases.
[Category] {category}
[Existing Attributes] {attrs_text}
[Important Rules]
1. Generate {keyword_count} completely new keywords
2. Keywords must fit within the scope of "{category}" category
3. Keywords should be creative and not duplicate or be too similar to existing attributes
4. Don't consider any specific object, focus only on possible extensions of this category
5. Each keyword should be 2-6 words
Return JSON only:
{{
"keywords": ["keyword1", "keyword2", "keyword3"]
}}"""
else:
return f"""/no_think
你是一個創意發想專家。給定一個類別和該類別下的現有屬性,請生成全新的、有創意的關鍵字或描述片段。
【類別】{category}
@@ -38,14 +59,36 @@ def get_keyword_generation_prompt(
def get_description_generation_prompt(
query: str,
category: str,
keyword: str
keyword: str,
lang: LanguageType = "zh"
) -> str:
"""
Step 2: 結合原始查詢生成描述
Step 2: Combine with original query to generate description
用新關鍵字創造一個與原始查詢相關的創新應用描述。
Use new keyword to create an innovative application description related to the original query.
"""
return f"""/no_think
if lang == "en":
return f"""/no_think
You are an innovation application expert. Please apply a new keyword concept to a specific object to create an innovative application description.
[Object] {query}
[Category] {category}
[New Keyword] {keyword}
[Task]
Using the concept of "{keyword}", create an innovative application description for "{query}".
The description should be a complete sentence or phrase explaining how to apply this new concept to the object.
[Example Format]
- If the object is "bicycle" and keyword is "monitor", you could generate "bicycle monitors the rider's health status"
- If the object is "umbrella" and keyword is "generate power", you could generate "umbrella generates electricity using raindrop impacts"
Return JSON only:
{{
"description": "innovative application description"
}}"""
else:
return f"""/no_think
你是一個創新應用專家。請將一個新的關鍵字概念應用到特定物件上,創造出創新的應用描述。
【物件】{query}
@@ -69,15 +112,35 @@ def get_description_generation_prompt(
def get_batch_description_prompt(
query: str,
category: str,
keywords: List[str]
keywords: List[str],
lang: LanguageType = "zh"
) -> str:
"""
批次生成描述(可選的優化版本,一次處理多個關鍵字)
Batch description generation (optional optimized version, process multiple keywords at once)
"""
keywords_text = "".join(keywords)
keywords_json = ", ".join([f'"{k}"' for k in keywords])
keywords_text = ", ".join(keywords) if lang == "en" else "".join(keywords)
return f"""/no_think
if lang == "en":
return f"""/no_think
You are an innovation application expert. Please apply multiple new keyword concepts to a specific object, creating an innovative application description for each keyword.
[Object] {query}
[Category] {category}
[New Keywords] {keywords_text}
[Task]
Create an innovative application description related to "{query}" for each keyword.
Each description should be a complete sentence or phrase.
Return JSON only:
{{
"descriptions": [
{{"keyword": "keyword1", "description": "description1"}},
{{"keyword": "keyword2", "description": "description2"}}
]
}}"""
else:
return f"""/no_think
你是一個創新應用專家。請將多個新的關鍵字概念應用到特定物件上,為每個關鍵字創造創新的應用描述。
【物件】{query}

View File

@@ -58,7 +58,8 @@ async def execute_step0(
prompt = get_step0_category_analysis_prompt(
request.query,
request.suggested_category_count,
exclude_categories=exclude_categories
exclude_categories=exclude_categories,
lang=request.lang
)
temperature = request.temperature if request.temperature is not None else 0.7
response = await ollama_provider.generate(
@@ -310,7 +311,7 @@ async def generate_sse_events(request: StreamAnalyzeRequest) -> AsyncGenerator[s
# ========== Step 1: Generate Attributes (Dynamic) ==========
yield f"event: step1_start\ndata: {json.dumps({'message': '生成屬性...'}, ensure_ascii=False)}\n\n"
step1_prompt = get_step1_dynamic_attributes_prompt(request.query, final_categories)
step1_prompt = get_step1_dynamic_attributes_prompt(request.query, final_categories, lang=request.lang)
logger.info(f"Step 1 prompt: {step1_prompt[:200]}")
step1_response = await ollama_provider.generate(
@@ -330,6 +331,7 @@ async def generate_sse_events(request: StreamAnalyzeRequest) -> AsyncGenerator[s
query=request.query,
categories=final_categories,
attributes_by_category=step1_result.attributes,
lang=request.lang
)
logger.info(f"Step 2 (relationships) prompt: {step2_prompt[:300]}")

View File

@@ -63,7 +63,8 @@ async def deduplicate_descriptions(request: DeduplicationRequest) -> Deduplicati
# 使用 LLM 成對比較去重
result = await llm_deduplication_service.deduplicate(
descriptions=request.descriptions,
model=request.model
model=request.model,
lang=request.lang
)
return result
except ValueError as e:

View File

@@ -68,7 +68,8 @@ async def generate_expert_transformation_events(
query=request.query,
categories=all_categories,
expert_count=request.expert_count,
custom_experts=actual_custom_experts if actual_custom_experts else None
custom_experts=actual_custom_experts if actual_custom_experts else None,
lang=request.lang
)
logger.info(f"Expert prompt: {expert_prompt[:200]}")
@@ -119,7 +120,8 @@ async def generate_expert_transformation_events(
query=request.query,
categories=all_categories,
expert_count=request.expert_count,
custom_experts=actual_custom_experts if actual_custom_experts else None
custom_experts=actual_custom_experts if actual_custom_experts else None,
lang=request.lang
)
expert_response = await ollama_provider.generate(
@@ -160,7 +162,8 @@ async def generate_expert_transformation_events(
category=request.category,
attribute=attribute,
experts=[e.model_dump() for e in experts],
keywords_per_expert=request.keywords_per_expert
keywords_per_expert=request.keywords_per_expert,
lang=request.lang
)
logger.info(f"Keyword prompt for '{attribute}': {kw_prompt[:300]}")
@@ -214,7 +217,8 @@ async def generate_expert_transformation_events(
keyword=kw.keyword,
expert_id=kw.expert_id,
expert_name=kw.expert_name,
expert_domain=expert_domain
expert_domain=expert_domain,
lang=request.lang
)
desc_response = await ollama_provider.generate(

View File

@@ -0,0 +1,133 @@
"""Patent Search Router - Search for similar patents"""
import logging
from typing import Optional, List
from fastapi import APIRouter
from pydantic import BaseModel
from ..services.patent_search_service import patent_search_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/patent", tags=["patent"])
# ===== Request/Response Models =====
class PatentSearchRequest(BaseModel):
"""Patent search request"""
query: str # Search query (description or keywords)
max_results: int = 10 # Maximum results to return (1-20)
class PatentResult(BaseModel):
"""Single patent result"""
publication_number: str
title: str
snippet: str
publication_date: Optional[str] = None
assignee: Optional[str] = None
inventor: Optional[str] = None
status: str # ACTIVE, NOT_ACTIVE, UNKNOWN
pdf_url: Optional[str] = None
thumbnail_url: Optional[str] = None
class PatentSearchResponse(BaseModel):
"""Patent search response"""
query: str
total_results: int
patents: List[PatentResult]
error: Optional[str] = None
class BatchPatentSearchRequest(BaseModel):
"""Batch patent search request - search multiple descriptions"""
queries: List[str] # List of descriptions to search
max_results_per_query: int = 5 # Max results per query
class BatchPatentSearchResult(BaseModel):
"""Results for a single query in batch search"""
query: str
total_results: int
patents: List[PatentResult]
error: Optional[str] = None
class BatchPatentSearchResponse(BaseModel):
"""Batch patent search response"""
results: List[BatchPatentSearchResult]
total_queries: int
# ===== Endpoints =====
@router.post("/search", response_model=PatentSearchResponse)
async def search_patents(request: PatentSearchRequest):
"""
Search for patents similar to the given description/query.
Uses Google Patents to find related patents based on keywords.
"""
logger.info(f"Patent search request: {request.query[:100]}...")
# Limit max_results to reasonable range
max_results = min(max(1, request.max_results), 20)
result = await patent_search_service.search(
query=request.query,
max_results=max_results,
)
return PatentSearchResponse(
query=request.query,
total_results=result.get("total_results", 0),
patents=[PatentResult(**p) for p in result.get("patents", [])],
error=result.get("error"),
)
@router.post("/search/batch", response_model=BatchPatentSearchResponse)
async def batch_search_patents(request: BatchPatentSearchRequest):
"""
Search for patents for multiple descriptions at once.
Useful for checking multiple creative descriptions against patents.
"""
logger.info(f"Batch patent search: {len(request.queries)} queries")
# Limit results per query
max_per_query = min(max(1, request.max_results_per_query), 10)
results: List[BatchPatentSearchResult] = []
for query in request.queries:
result = await patent_search_service.search(
query=query,
max_results=max_per_query,
)
results.append(BatchPatentSearchResult(
query=query,
total_results=result.get("total_results", 0),
patents=[PatentResult(**p) for p in result.get("patents", [])],
error=result.get("error"),
))
return BatchPatentSearchResponse(
results=results,
total_queries=len(request.queries),
)
@router.get("/health")
async def patent_search_health():
"""Check if patent search service is working"""
# Do a simple test search
result = await patent_search_service.search("test", max_results=1)
if result.get("error"):
return {"status": "unhealthy", "error": result["error"]}
return {"status": "healthy"}

View File

@@ -36,7 +36,8 @@ async def generate_transformation_events(
keyword_prompt = get_keyword_generation_prompt(
category=request.category,
attributes=request.attributes,
keyword_count=request.keyword_count
keyword_count=request.keyword_count,
lang=request.lang
)
logger.info(f"Keyword prompt: {keyword_prompt[:200]}")
@@ -61,7 +62,8 @@ async def generate_transformation_events(
desc_prompt = get_batch_description_prompt(
query=request.query,
category=request.category,
keywords=new_keywords
keywords=new_keywords,
lang=request.lang
)
logger.info(f"Description prompt: {desc_prompt[:300]}")

View File

@@ -1,12 +1,12 @@
"""
LLM Deduplication Service - 使用 LLM 成對比較進行去重
LLM Deduplication Service - Using LLM pairwise comparison for deduplication
LLM 判斷兩個描述是否語意重複,透過並行處理加速。
Let LLM determine whether two descriptions are semantically duplicate, accelerated by parallel processing.
"""
import asyncio
import logging
from typing import List, Tuple, Optional
from typing import List, Tuple, Optional, Literal
import httpx
import numpy as np
@@ -18,6 +18,7 @@ from ..models.schemas import (
DeduplicationMethod,
DescriptionGroup,
)
from ..prompts.language_config import LanguageType
logger = logging.getLogger(__name__)
@@ -31,27 +32,20 @@ class LLMDeduplicationService:
self.client = httpx.AsyncClient(timeout=60.0)
self.max_concurrent = 5 # 最大並行數,避免 Ollama 過載
async def compare_pair(
self,
desc1: str,
desc2: str,
model: str,
semaphore: asyncio.Semaphore
) -> bool:
"""
讓 LLM 判斷兩個描述是否語意重複
def _get_comparison_prompt(self, desc1: str, desc2: str, lang: LanguageType = "zh") -> str:
"""Get comparison prompt in the specified language"""
if lang == "en":
return f"""Determine whether the following two innovative descriptions express the same or very similar concepts:
Args:
desc1: 第一個描述
desc2: 第二個描述
model: LLM 模型名稱
semaphore: 並行控制信號量
Description 1: {desc1}
Returns:
bool: 是否為重複描述
"""
async with semaphore: # 控制並行數
prompt = f"""判斷以下兩個創新描述是否表達相同或非常相似的概念:
Description 2: {desc2}
If both descriptions essentially express the same or very similar innovative concept, answer "YES"
If the two descriptions express different innovative concepts, answer "NO"
Only answer YES or NO, no other text"""
else:
return f"""判斷以下兩個創新描述是否表達相同或非常相似的概念:
描述1: {desc1}
@@ -61,6 +55,30 @@ class LLMDeduplicationService:
如果兩者描述不同的創新概念,回答 "NO"
只回答 YES 或 NO不要其他文字"""
async def compare_pair(
self,
desc1: str,
desc2: str,
model: str,
semaphore: asyncio.Semaphore,
lang: LanguageType = "zh"
) -> bool:
"""
Let LLM determine whether two descriptions are semantically duplicate
Args:
desc1: First description
desc2: Second description
model: LLM model name
semaphore: Concurrency control semaphore
lang: Language for the prompt
Returns:
bool: Whether the descriptions are duplicates
"""
async with semaphore: # Control concurrency
prompt = self._get_comparison_prompt(desc1, desc2, lang)
try:
response = await self.client.post(
f"{self.base_url}/api/generate",
@@ -86,26 +104,28 @@ class LLMDeduplicationService:
async def compare_batch(
self,
pairs: List[Tuple[int, int, str, str]],
model: str
model: str,
lang: LanguageType = "zh"
) -> List[Tuple[int, int, bool]]:
"""
並行批次比較多個描述對
Parallel batch comparison of multiple description pairs
Args:
pairs: 待比較的配對列表 [(i, j, desc1, desc2), ...]
model: LLM 模型名稱
pairs: List of pairs to compare [(i, j, desc1, desc2), ...]
model: LLM model name
lang: Language for the prompt
Returns:
比較結果列表 [(i, j, is_similar), ...]
List of comparison results [(i, j, is_similar), ...]
"""
semaphore = asyncio.Semaphore(self.max_concurrent)
async def compare_one(pair: Tuple[int, int, str, str]) -> Tuple[int, int, bool]:
i, j, desc1, desc2 = pair
is_similar = await self.compare_pair(desc1, desc2, model, semaphore)
is_similar = await self.compare_pair(desc1, desc2, model, semaphore, lang)
return (i, j, is_similar)
# 使用 asyncio.gather 並行執行所有比較
# Use asyncio.gather to execute all comparisons in parallel
results = await asyncio.gather(*[compare_one(p) for p in pairs])
return results
@@ -144,17 +164,19 @@ class LLMDeduplicationService:
async def deduplicate(
self,
descriptions: List[ExpertTransformationDescription],
model: Optional[str] = None
model: Optional[str] = None,
lang: LanguageType = "zh"
) -> DeduplicationResult:
"""
使用 LLM 成對比較進行去重
Use LLM pairwise comparison for deduplication
Args:
descriptions: 要去重的描述列表
model: LLM 模型名稱
descriptions: List of descriptions to deduplicate
model: LLM model name
lang: Language for the prompt
Returns:
DeduplicationResult: 去重結果
DeduplicationResult: Deduplication result
"""
model = model or self.default_model
@@ -188,10 +210,10 @@ class LLMDeduplicationService:
))
total_pairs = len(pairs)
logger.info(f"LLM deduplication: {total_pairs} pairs to compare (parallel={self.max_concurrent}, model={model})")
logger.info(f"LLM deduplication: {total_pairs} pairs to compare (parallel={self.max_concurrent}, model={model}, lang={lang})")
# 並行批次比較
results = await self.compare_batch(pairs, model)
# Parallel batch comparison
results = await self.compare_batch(pairs, model, lang)
# 填入相似度矩陣
for i, j, is_similar in results:

View File

@@ -0,0 +1,195 @@
"""Patent Search Service using Google Patents XHR API"""
import httpx
import logging
from typing import List, Optional
from urllib.parse import quote_plus
logger = logging.getLogger(__name__)
class PatentSearchResult:
"""Single patent search result"""
def __init__(
self,
publication_number: str,
title: str,
snippet: str,
publication_date: Optional[str],
assignee: Optional[str],
inventor: Optional[str],
status: str,
pdf_url: Optional[str] = None,
thumbnail_url: Optional[str] = None,
):
self.publication_number = publication_number
self.title = title
self.snippet = snippet
self.publication_date = publication_date
self.assignee = assignee
self.inventor = inventor
self.status = status
self.pdf_url = pdf_url
self.thumbnail_url = thumbnail_url
def to_dict(self):
return {
"publication_number": self.publication_number,
"title": self.title,
"snippet": self.snippet,
"publication_date": self.publication_date,
"assignee": self.assignee,
"inventor": self.inventor,
"status": self.status,
"pdf_url": self.pdf_url,
"thumbnail_url": self.thumbnail_url,
}
class PatentSearchService:
"""Service for searching patents using Google Patents"""
GOOGLE_PATENTS_XHR_URL = "https://patents.google.com/xhr/query"
GOOGLE_PATENTS_PDF_BASE = "https://patentimages.storage.googleapis.com/"
def __init__(self):
self._client: Optional[httpx.AsyncClient] = None
# Browser-like headers to avoid being blocked
DEFAULT_HEADERS = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "application/json, text/plain, */*",
"Accept-Language": "en-US,en;q=0.9",
"Referer": "https://patents.google.com/",
"Origin": "https://patents.google.com",
}
async def _get_client(self) -> httpx.AsyncClient:
if self._client is None or self._client.is_closed:
self._client = httpx.AsyncClient(
timeout=30.0,
headers=self.DEFAULT_HEADERS,
follow_redirects=True,
)
return self._client
async def close(self):
if self._client and not self._client.is_closed:
await self._client.aclose()
async def search(
self,
query: str,
max_results: int = 10,
) -> dict:
"""
Search Google Patents for relevant patents
Args:
query: Search query (can be a description or keywords)
max_results: Maximum number of results to return
Returns:
Dict with total_results count and list of patent results
"""
try:
client = await self._get_client()
# URL encode the query
encoded_query = quote_plus(query)
url = f"{self.GOOGLE_PATENTS_XHR_URL}?url=q%3D{encoded_query}&exp=&tags="
logger.info(f"Searching patents with query: {query[:100]}...")
response = await client.get(url)
if response.status_code != 200:
logger.error(f"Google Patents API returned status {response.status_code}")
return {
"total_results": 0,
"patents": [],
"error": f"API returned status {response.status_code}"
}
data = response.json()
# Parse results
results = data.get("results", {})
total_num = results.get("total_num_results", 0)
clusters = results.get("cluster", [])
patents: List[PatentSearchResult] = []
if clusters and len(clusters) > 0:
patent_results = clusters[0].get("result", [])
for item in patent_results[:max_results]:
patent_data = item.get("patent", {})
family_meta = patent_data.get("family_metadata", {})
aggregated = family_meta.get("aggregated", {})
country_status = aggregated.get("country_status", [])
status = "UNKNOWN"
if country_status and len(country_status) > 0:
best_stage = country_status[0].get("best_patent_stage", {})
status = best_stage.get("state", "UNKNOWN")
# Build PDF URL if available
pdf_path = patent_data.get("pdf", "")
pdf_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{pdf_path}" if pdf_path else None
# Build thumbnail URL
thumbnail = patent_data.get("thumbnail", "")
thumbnail_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{thumbnail}" if thumbnail else None
patent = PatentSearchResult(
publication_number=patent_data.get("publication_number", ""),
title=self._clean_html(patent_data.get("title", "")),
snippet=self._clean_html(patent_data.get("snippet", "")),
publication_date=patent_data.get("publication_date"),
assignee=patent_data.get("assignee"),
inventor=patent_data.get("inventor"),
status=status,
pdf_url=pdf_url,
thumbnail_url=thumbnail_url,
)
patents.append(patent)
logger.info(f"Found {total_num} total patents, returning {len(patents)}")
return {
"total_results": total_num,
"patents": [p.to_dict() for p in patents],
}
except httpx.HTTPError as e:
logger.error(f"HTTP error searching patents: {e}")
return {
"total_results": 0,
"patents": [],
"error": str(e)
}
except Exception as e:
logger.error(f"Error searching patents: {e}")
return {
"total_results": 0,
"patents": [],
"error": str(e)
}
def _clean_html(self, text: str) -> str:
"""Remove HTML entities and tags from text"""
if not text:
return ""
# Replace common HTML entities
text = text.replace("…", "...")
text = text.replace("&", "&")
text = text.replace("&lt;", "<")
text = text.replace("&gt;", ">")
text = text.replace("&quot;", '"')
text = text.replace("&#39;", "'")
return text.strip()
# Singleton instance
patent_search_service = PatentSearchService()