chore: save local changes
This commit is contained in:
@@ -3,10 +3,11 @@ from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from .routers import attributes, transformation, expert_transformation, deduplication
|
||||
from .routers import attributes, transformation, expert_transformation, deduplication, patent_search
|
||||
from .services.llm_service import ollama_provider
|
||||
from .services.embedding_service import embedding_service
|
||||
from .services.llm_deduplication_service import llm_deduplication_service
|
||||
from .services.patent_search_service import patent_search_service
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
@@ -15,6 +16,7 @@ async def lifespan(app: FastAPI):
|
||||
await ollama_provider.close()
|
||||
await embedding_service.close()
|
||||
await llm_deduplication_service.close()
|
||||
await patent_search_service.close()
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
@@ -36,6 +38,7 @@ app.include_router(attributes.router)
|
||||
app.include_router(transformation.router)
|
||||
app.include_router(expert_transformation.router)
|
||||
app.include_router(deduplication.router)
|
||||
app.include_router(patent_search.router)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional, List, Dict
|
||||
from typing import Optional, List, Dict, Literal
|
||||
from enum import Enum
|
||||
|
||||
# Language type for prompts
|
||||
LanguageType = Literal["zh", "en"]
|
||||
|
||||
|
||||
class AttributeNode(BaseModel):
|
||||
name: str
|
||||
@@ -47,16 +50,19 @@ class CausalChain(BaseModel):
|
||||
|
||||
|
||||
class StreamAnalyzeRequest(BaseModel):
|
||||
"""多步驟分析請求(更新為支持動態類別)"""
|
||||
"""Multi-step analysis request (updated to support dynamic categories)"""
|
||||
query: str
|
||||
model: Optional[str] = None
|
||||
temperature: Optional[float] = 0.7
|
||||
chain_count: int = 5 # 用戶可設定要生成多少條因果鏈
|
||||
chain_count: int = 5 # User can set how many causal chains to generate
|
||||
|
||||
# 新增:動態類別支持
|
||||
category_mode: Optional[str] = "dynamic_auto" # CategoryMode enum 值
|
||||
# Dynamic category support
|
||||
category_mode: Optional[str] = "dynamic_auto" # CategoryMode enum value
|
||||
custom_categories: Optional[List[str]] = None
|
||||
suggested_category_count: int = 3 # 建議 LLM 生成的類別數量
|
||||
suggested_category_count: int = 3 # Suggest LLM to generate this many categories
|
||||
|
||||
# Language setting
|
||||
lang: LanguageType = "zh"
|
||||
|
||||
|
||||
class StreamAnalyzeResponse(BaseModel):
|
||||
@@ -136,13 +142,14 @@ class DAGRelationship(BaseModel):
|
||||
# ===== Transformation Agent schemas =====
|
||||
|
||||
class TransformationRequest(BaseModel):
|
||||
"""Transformation Agent 請求"""
|
||||
query: str # 原始查詢 (e.g., "腳踏車")
|
||||
category: str # 類別名稱 (e.g., "功能")
|
||||
attributes: List[str] # 該類別的屬性列表
|
||||
"""Transformation Agent request"""
|
||||
query: str # Original query (e.g., "bicycle")
|
||||
category: str # Category name (e.g., "Functions")
|
||||
attributes: List[str] # Attribute list for this category
|
||||
model: Optional[str] = None
|
||||
temperature: Optional[float] = 0.7
|
||||
keyword_count: int = 3 # 要生成的新關鍵字數量
|
||||
keyword_count: int = 3 # Number of new keywords to generate
|
||||
lang: LanguageType = "zh" # Language for prompts
|
||||
|
||||
|
||||
class TransformationDescription(BaseModel):
|
||||
@@ -215,24 +222,27 @@ class ExpertSource(str, Enum):
|
||||
|
||||
|
||||
class ExpertTransformationRequest(BaseModel):
|
||||
"""Expert Transformation Agent 請求"""
|
||||
"""Expert Transformation Agent request"""
|
||||
query: str
|
||||
category: str
|
||||
attributes: List[str]
|
||||
|
||||
# Expert parameters
|
||||
expert_count: int = 3 # 專家數量 (2-8)
|
||||
keywords_per_expert: int = 1 # 每個專家為每個屬性生成幾個關鍵字 (1-3)
|
||||
custom_experts: Optional[List[str]] = None # 用戶指定專家 ["藥師", "工程師"]
|
||||
expert_count: int = 3 # Number of experts (2-8)
|
||||
keywords_per_expert: int = 1 # Keywords per expert per attribute (1-3)
|
||||
custom_experts: Optional[List[str]] = None # User-specified experts
|
||||
|
||||
# Expert source parameters
|
||||
expert_source: ExpertSource = ExpertSource.LLM # 專家來源
|
||||
expert_language: str = "en" # 外部來源的語言 (目前只有英文資料)
|
||||
expert_source: ExpertSource = ExpertSource.LLM # Expert source
|
||||
expert_language: str = "en" # Language for external sources
|
||||
|
||||
# LLM parameters
|
||||
model: Optional[str] = None
|
||||
temperature: Optional[float] = 0.7
|
||||
|
||||
# Prompt language
|
||||
lang: LanguageType = "zh"
|
||||
|
||||
|
||||
# ===== Deduplication Agent schemas =====
|
||||
|
||||
@@ -243,11 +253,12 @@ class DeduplicationMethod(str, Enum):
|
||||
|
||||
|
||||
class DeduplicationRequest(BaseModel):
|
||||
"""去重請求"""
|
||||
"""Deduplication request"""
|
||||
descriptions: List[ExpertTransformationDescription]
|
||||
method: DeduplicationMethod = DeduplicationMethod.EMBEDDING # 去重方法
|
||||
similarity_threshold: float = 0.85 # 餘弦相似度閾值 (0.0-1.0),僅 Embedding 使用
|
||||
model: Optional[str] = None # Embedding/LLM 模型
|
||||
method: DeduplicationMethod = DeduplicationMethod.EMBEDDING # Deduplication method
|
||||
similarity_threshold: float = 0.85 # Cosine similarity threshold (0.0-1.0), only for Embedding
|
||||
model: Optional[str] = None # Embedding/LLM model
|
||||
lang: LanguageType = "zh" # Prompt language (for LLM method)
|
||||
|
||||
|
||||
class DescriptionGroup(BaseModel):
|
||||
|
||||
@@ -1,21 +1,37 @@
|
||||
from typing import List, Optional, Dict
|
||||
import json
|
||||
|
||||
DEFAULT_CATEGORIES = ["材料", "功能", "用途", "使用族群", "特性"]
|
||||
|
||||
CATEGORY_DESCRIPTIONS = {
|
||||
"材料": "物件由什麼材料組成",
|
||||
"功能": "物件能做什麼",
|
||||
"用途": "物件在什麼場景使用",
|
||||
"使用族群": "誰會使用這個物件",
|
||||
"特性": "物件有什麼特徵",
|
||||
}
|
||||
from .language_config import (
|
||||
LanguageType,
|
||||
DEFAULT_CATEGORIES,
|
||||
CATEGORY_DESCRIPTIONS,
|
||||
)
|
||||
|
||||
|
||||
def get_attribute_prompt(query: str, categories: Optional[List[str]] = None) -> str:
|
||||
def get_default_categories(lang: LanguageType = "zh") -> List[str]:
|
||||
return DEFAULT_CATEGORIES.get(lang, DEFAULT_CATEGORIES["zh"])
|
||||
|
||||
|
||||
def get_category_descriptions(lang: LanguageType = "zh") -> Dict[str, str]:
|
||||
return CATEGORY_DESCRIPTIONS.get(lang, CATEGORY_DESCRIPTIONS["zh"])
|
||||
|
||||
|
||||
def get_attribute_prompt(
|
||||
query: str,
|
||||
categories: Optional[List[str]] = None,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""Generate prompt with causal chain structure."""
|
||||
if lang == "en":
|
||||
prompt = f"""Analyze the attributes of "{query}" in a causal chain format: Materials→Functions→Usages→User Groups.
|
||||
|
||||
prompt = f"""分析「{query}」的屬性,以因果鏈方式呈現:材料→功能→用途→使用族群。
|
||||
List 3-5 types of materials, each extending into a complete causal chain.
|
||||
|
||||
JSON format:
|
||||
{{"name": "{query}", "children": [{{"name": "Material Name", "category": "Materials", "children": [{{"name": "Function Name", "category": "Functions", "children": [{{"name": "Usage Name", "category": "Usages", "children": [{{"name": "User Group Name", "category": "User Groups"}}]}}]}}]}}]}}
|
||||
|
||||
Return JSON only."""
|
||||
else:
|
||||
prompt = f"""分析「{query}」的屬性,以因果鏈方式呈現:材料→功能→用途→使用族群。
|
||||
|
||||
請列出 3-5 種材料,每種材料延伸出完整因果鏈。
|
||||
|
||||
@@ -27,9 +43,18 @@ JSON 格式:
|
||||
return prompt
|
||||
|
||||
|
||||
def get_step1_attributes_prompt(query: str) -> str:
|
||||
"""Step 1: 生成各類別的屬性列表(平行結構)"""
|
||||
return f"""/no_think
|
||||
def get_step1_attributes_prompt(query: str, lang: LanguageType = "zh") -> str:
|
||||
"""Step 1: Generate attribute list for each category (parallel structure)"""
|
||||
if lang == "en":
|
||||
return f"""/no_think
|
||||
Analyze "{query}" and list attributes for the following four categories. List 3-5 common attributes for each category.
|
||||
|
||||
Return JSON only, in the following format:
|
||||
{{"materials": ["material1", "material2", "material3"], "functions": ["function1", "function2", "function3"], "usages": ["usage1", "usage2", "usage3"], "users": ["user group1", "user group2", "user group3"]}}
|
||||
|
||||
Object: {query}"""
|
||||
else:
|
||||
return f"""/no_think
|
||||
分析「{query}」,列出以下四個類別的屬性。每個類別列出 3-5 個常見屬性。
|
||||
|
||||
只回傳 JSON,格式如下:
|
||||
@@ -45,21 +70,48 @@ def get_step2_causal_chain_prompt(
|
||||
usages: List[str],
|
||||
users: List[str],
|
||||
existing_chains: List[dict],
|
||||
chain_index: int
|
||||
chain_index: int,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""Step 2: 生成單條因果鏈"""
|
||||
"""Step 2: Generate a single causal chain"""
|
||||
existing_chains_text = ""
|
||||
if existing_chains:
|
||||
chains_list = [
|
||||
f"- {c['material']} → {c['function']} → {c['usage']} → {c['user']}"
|
||||
for c in existing_chains
|
||||
]
|
||||
existing_chains_text = f"""
|
||||
|
||||
if lang == "en":
|
||||
if existing_chains:
|
||||
chains_list = [
|
||||
f"- {c['material']} → {c['function']} → {c['usage']} → {c['user']}"
|
||||
for c in existing_chains
|
||||
]
|
||||
existing_chains_text = f"""
|
||||
[Already generated causal chains, do not repeat]
|
||||
{chr(10).join(chains_list)}
|
||||
"""
|
||||
return f"""/no_think
|
||||
Generate causal chain #{chain_index} for "{query}".
|
||||
|
||||
[Available Materials] {', '.join(materials)}
|
||||
[Available Functions] {', '.join(functions)}
|
||||
[Available Usages] {', '.join(usages)}
|
||||
[Available User Groups] {', '.join(users)}
|
||||
{existing_chains_text}
|
||||
[Rules]
|
||||
1. Select one attribute from each category to form a logical causal chain
|
||||
2. The causal relationship must be logical (materials determine functions, functions determine usages, usages determine user groups)
|
||||
3. Do not repeat existing causal chains
|
||||
|
||||
Return JSON only:
|
||||
{{"material": "selected material", "function": "selected function", "usage": "selected usage", "user": "selected user group"}}"""
|
||||
else:
|
||||
if existing_chains:
|
||||
chains_list = [
|
||||
f"- {c['material']} → {c['function']} → {c['usage']} → {c['user']}"
|
||||
for c in existing_chains
|
||||
]
|
||||
existing_chains_text = f"""
|
||||
【已生成的因果鏈,請勿重複】
|
||||
{chr(10).join(chains_list)}
|
||||
"""
|
||||
|
||||
return f"""/no_think
|
||||
return f"""/no_think
|
||||
為「{query}」生成第 {chain_index} 條因果鏈。
|
||||
|
||||
【可選材料】{', '.join(materials)}
|
||||
@@ -76,19 +128,52 @@ def get_step2_causal_chain_prompt(
|
||||
{{"material": "選擇的材料", "function": "選擇的功能", "usage": "選擇的用途", "user": "選擇的族群"}}"""
|
||||
|
||||
|
||||
def get_flat_attribute_prompt(query: str, categories: Optional[List[str]] = None) -> str:
|
||||
def get_flat_attribute_prompt(
|
||||
query: str,
|
||||
categories: Optional[List[str]] = None,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""Generate prompt with flat/parallel categories (original design)."""
|
||||
cats = categories if categories else DEFAULT_CATEGORIES
|
||||
cats = categories if categories else get_default_categories(lang)
|
||||
cat_descs = get_category_descriptions(lang)
|
||||
|
||||
# Build category list
|
||||
category_lines = []
|
||||
for cat in cats:
|
||||
desc = CATEGORY_DESCRIPTIONS.get(cat, f"{cat}的相關屬性")
|
||||
category_lines.append(f"- {cat}:{desc}")
|
||||
desc = cat_descs.get(cat, f"Related attributes of {cat}" if lang == "en" else f"{cat}的相關屬性")
|
||||
category_lines.append(f"- {cat}: {desc}")
|
||||
|
||||
categories_text = "\n".join(category_lines)
|
||||
|
||||
prompt = f"""/no_think
|
||||
if lang == "en":
|
||||
prompt = f"""/no_think
|
||||
You are an object attribute analysis expert. Please break down the user's input object into the following attribute categories.
|
||||
|
||||
[Required Categories]
|
||||
{categories_text}
|
||||
|
||||
[Important] The return format must be valid JSON, and each node must have a "name" field:
|
||||
|
||||
```json
|
||||
{{
|
||||
"name": "Object Name",
|
||||
"children": [
|
||||
{{
|
||||
"name": "Category Name",
|
||||
"children": [
|
||||
{{"name": "Attribute 1"}},
|
||||
{{"name": "Attribute 2"}}
|
||||
]
|
||||
}}
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
Return JSON only, no other text.
|
||||
|
||||
User input: {query}"""
|
||||
else:
|
||||
prompt = f"""/no_think
|
||||
你是一個物件屬性分析專家。請將用戶輸入的物件拆解成以下屬性類別。
|
||||
|
||||
【必須包含的類別】
|
||||
@@ -123,14 +208,42 @@ def get_flat_attribute_prompt(query: str, categories: Optional[List[str]] = None
|
||||
def get_step0_category_analysis_prompt(
|
||||
query: str,
|
||||
suggested_count: int = 3,
|
||||
exclude_categories: List[str] | None = None
|
||||
exclude_categories: List[str] | None = None,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""Step 0: LLM 分析建議類別"""
|
||||
exclude_text = ""
|
||||
if exclude_categories:
|
||||
exclude_text = f"\n【禁止使用的類別】{', '.join(exclude_categories)}(這些已經是固定類別,不要重複建議)\n"
|
||||
"""Step 0: LLM analyzes and suggests categories"""
|
||||
|
||||
return f"""/no_think
|
||||
if lang == "en":
|
||||
exclude_text = ""
|
||||
if exclude_categories:
|
||||
exclude_text = f"\n[Forbidden Categories] {', '.join(exclude_categories)} (These are already fixed categories, do not suggest duplicates)\n"
|
||||
|
||||
return f"""/no_think
|
||||
Analyze "{query}" and suggest {suggested_count} most suitable attribute categories to describe it.
|
||||
|
||||
[Common Category References] Characteristics, Shape, Color, Size, Brand, Price Range, Weight, Style, Occasion, Season, Technical Specifications
|
||||
{exclude_text}
|
||||
[Important]
|
||||
1. Choose categories that best describe the essence of this object
|
||||
2. Categories should have logical relationships
|
||||
3. Do not choose overly abstract or duplicate categories
|
||||
4. Must suggest creative categories different from the reference list
|
||||
|
||||
Return JSON only:
|
||||
{{
|
||||
"categories": [
|
||||
{{"name": "Category1", "description": "Description1", "order": 0}},
|
||||
{{"name": "Category2", "description": "Description2", "order": 1}}
|
||||
]
|
||||
}}
|
||||
|
||||
Object: {query}"""
|
||||
else:
|
||||
exclude_text = ""
|
||||
if exclude_categories:
|
||||
exclude_text = f"\n【禁止使用的類別】{', '.join(exclude_categories)}(這些已經是固定類別,不要重複建議)\n"
|
||||
|
||||
return f"""/no_think
|
||||
分析「{query}」,建議 {suggested_count} 個最適合的屬性類別來描述它。
|
||||
|
||||
【常見類別參考】特性、形狀、顏色、尺寸、品牌、價格區間、重量、風格、場合、季節、技術規格
|
||||
@@ -154,21 +267,35 @@ def get_step0_category_analysis_prompt(
|
||||
|
||||
def get_step1_dynamic_attributes_prompt(
|
||||
query: str,
|
||||
categories: List # List[CategoryDefinition]
|
||||
categories: List, # List[CategoryDefinition]
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""動態 Step 1 - 根據類別列表生成屬性"""
|
||||
# 按 order 排序並構建描述
|
||||
"""Dynamic Step 1 - Generate attributes based on category list"""
|
||||
# Sort by order and build description
|
||||
sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0))
|
||||
|
||||
category_desc = "\n".join([
|
||||
f"- {cat.name if hasattr(cat, 'name') else cat['name']}: {cat.description if hasattr(cat, 'description') else cat.get('description', '相關屬性')}"
|
||||
f"- {cat.name if hasattr(cat, 'name') else cat['name']}: {cat.description if hasattr(cat, 'description') else cat.get('description', 'Related attributes' if lang == 'en' else '相關屬性')}"
|
||||
for cat in sorted_cats
|
||||
])
|
||||
|
||||
category_keys = [cat.name if hasattr(cat, 'name') else cat['name'] for cat in sorted_cats]
|
||||
json_template = {cat: ["屬性1", "屬性2", "屬性3"] for cat in category_keys}
|
||||
|
||||
return f"""/no_think
|
||||
if lang == "en":
|
||||
json_template = {cat: ["attribute1", "attribute2", "attribute3"] for cat in category_keys}
|
||||
return f"""/no_think
|
||||
Analyze "{query}" and list attributes for the following categories. List 3-5 common attributes for each category.
|
||||
|
||||
[Category List]
|
||||
{category_desc}
|
||||
|
||||
Return JSON only:
|
||||
{json.dumps(json_template, ensure_ascii=False, indent=2)}
|
||||
|
||||
Object: {query}"""
|
||||
else:
|
||||
json_template = {cat: ["屬性1", "屬性2", "屬性3"] for cat in category_keys}
|
||||
return f"""/no_think
|
||||
分析「{query}」,列出以下類別的屬性。每個類別列出 3-5 個常見屬性。
|
||||
|
||||
【類別列表】
|
||||
@@ -185,30 +312,59 @@ def get_step2_dynamic_causal_chain_prompt(
|
||||
categories: List, # List[CategoryDefinition]
|
||||
attributes_by_category: Dict[str, List[str]],
|
||||
existing_chains: List[Dict[str, str]],
|
||||
chain_index: int
|
||||
chain_index: int,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""動態 Step 2 - 生成動態類別的因果鏈"""
|
||||
"""Dynamic Step 2 - Generate causal chains for dynamic categories"""
|
||||
sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0))
|
||||
|
||||
# 構建可選屬性
|
||||
# Build available attributes
|
||||
available_attrs = "\n".join([
|
||||
f"【{cat.name if hasattr(cat, 'name') else cat['name']}】{', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
|
||||
f"[{cat.name if hasattr(cat, 'name') else cat['name']}] {', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
|
||||
for cat in sorted_cats
|
||||
])
|
||||
|
||||
# 已生成的因果鏈
|
||||
existing_text = ""
|
||||
if existing_chains:
|
||||
chains_list = [
|
||||
" → ".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats])
|
||||
for chain in existing_chains
|
||||
]
|
||||
existing_text = f"\n【已生成,請勿重複】\n" + "\n".join([f"- {c}" for c in chains_list])
|
||||
if lang == "en":
|
||||
# Already generated causal chains
|
||||
existing_text = ""
|
||||
if existing_chains:
|
||||
chains_list = [
|
||||
" → ".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats])
|
||||
for chain in existing_chains
|
||||
]
|
||||
existing_text = "\n[Already generated, do not repeat]\n" + "\n".join([f"- {c}" for c in chains_list])
|
||||
|
||||
# JSON 模板
|
||||
json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"選擇的{cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats}
|
||||
# JSON template
|
||||
json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"selected {cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats}
|
||||
|
||||
return f"""/no_think
|
||||
return f"""/no_think
|
||||
Generate causal chain #{chain_index} for "{query}".
|
||||
|
||||
[Available Attributes]
|
||||
{available_attrs}
|
||||
{existing_text}
|
||||
|
||||
[Rules]
|
||||
1. Select one attribute from each category
|
||||
2. Causal relationships must be logical
|
||||
3. Do not repeat
|
||||
|
||||
Return JSON only:
|
||||
{json.dumps(json_template, ensure_ascii=False, indent=2)}"""
|
||||
else:
|
||||
# 已生成的因果鏈
|
||||
existing_text = ""
|
||||
if existing_chains:
|
||||
chains_list = [
|
||||
" → ".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats])
|
||||
for chain in existing_chains
|
||||
]
|
||||
existing_text = "\n【已生成,請勿重複】\n" + "\n".join([f"- {c}" for c in chains_list])
|
||||
|
||||
# JSON 模板
|
||||
json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"選擇的{cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats}
|
||||
|
||||
return f"""/no_think
|
||||
為「{query}」生成第 {chain_index} 條因果鏈。
|
||||
|
||||
【可選屬性】
|
||||
@@ -230,20 +386,46 @@ def get_step2_dag_relationships_prompt(
|
||||
query: str,
|
||||
categories: List, # List[CategoryDefinition]
|
||||
attributes_by_category: Dict[str, List[str]],
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""生成相鄰類別之間的自然關係"""
|
||||
"""Generate natural relationships between adjacent categories"""
|
||||
sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0))
|
||||
|
||||
# Build attribute listing
|
||||
attr_listing = "\n".join([
|
||||
f"【{cat.name if hasattr(cat, 'name') else cat['name']}】{', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
|
||||
f"[{cat.name if hasattr(cat, 'name') else cat['name']}] {', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
|
||||
for cat in sorted_cats
|
||||
])
|
||||
|
||||
# Build direction hints
|
||||
direction_hints = " → ".join([cat.name if hasattr(cat, 'name') else cat['name'] for cat in sorted_cats])
|
||||
|
||||
return f"""/no_think
|
||||
if lang == "en":
|
||||
return f"""/no_think
|
||||
Analyze the attribute relationships of "{query}".
|
||||
|
||||
{attr_listing}
|
||||
|
||||
[Relationship Direction] {direction_hints}
|
||||
|
||||
[Rules]
|
||||
1. Only establish relationships between adjacent categories (e.g., Materials→Functions, Functions→Usages)
|
||||
2. Only output pairs that have true causal or associative relationships
|
||||
3. An attribute can connect to multiple downstream attributes, or none at all
|
||||
4. Not every attribute needs to have connections
|
||||
5. Relationships should be reasonable and meaningful
|
||||
|
||||
Return JSON:
|
||||
{{
|
||||
"relationships": [
|
||||
{{"source_category": "CategoryA", "source": "attribute name", "target_category": "CategoryB", "target": "attribute name"}},
|
||||
...
|
||||
]
|
||||
}}
|
||||
|
||||
Return JSON only."""
|
||||
else:
|
||||
return f"""/no_think
|
||||
分析「{query}」的屬性關係。
|
||||
|
||||
{attr_listing}
|
||||
|
||||
@@ -1,34 +1,68 @@
|
||||
"""Expert Transformation Agent 提示詞模組"""
|
||||
"""Expert Transformation Agent prompts module - Bilingual support"""
|
||||
|
||||
from typing import List, Optional
|
||||
from .language_config import LanguageType
|
||||
|
||||
|
||||
def get_expert_generation_prompt(
|
||||
query: str,
|
||||
categories: List[str],
|
||||
expert_count: int,
|
||||
custom_experts: Optional[List[str]] = None
|
||||
custom_experts: Optional[List[str]] = None,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""Step 0: 生成專家團隊(不依賴主題,純隨機多元)"""
|
||||
"""Step 0: Generate expert team (not dependent on topic, purely random and diverse)"""
|
||||
import time
|
||||
import random
|
||||
|
||||
custom_text = ""
|
||||
if custom_experts and len(custom_experts) > 0:
|
||||
custom_text = f"(已指定:{', '.join(custom_experts[:expert_count])})"
|
||||
|
||||
# 加入時間戳和隨機數來增加多樣性
|
||||
# Add timestamp and random number for diversity
|
||||
seed = int(time.time() * 1000) % 10000
|
||||
diversity_hints = [
|
||||
"冷門、非主流、跨領域",
|
||||
"罕見職業、新興領域、邊緣學科",
|
||||
"非傳統、創新、小眾專業",
|
||||
"未來趨向、實驗性、非常規",
|
||||
"跨文化、混合領域、獨特視角"
|
||||
]
|
||||
hint = random.choice(diversity_hints)
|
||||
|
||||
return f"""/no_think
|
||||
if lang == "en":
|
||||
custom_text = ""
|
||||
if custom_experts and len(custom_experts) > 0:
|
||||
custom_text = f" (Specified: {', '.join(custom_experts[:expert_count])})"
|
||||
|
||||
diversity_hints = [
|
||||
"obscure, non-mainstream, cross-disciplinary",
|
||||
"rare occupations, emerging fields, fringe disciplines",
|
||||
"unconventional, innovative, niche specialties",
|
||||
"future-oriented, experimental, non-traditional",
|
||||
"cross-cultural, hybrid fields, unique perspectives"
|
||||
]
|
||||
hint = random.choice(diversity_hints)
|
||||
|
||||
return f"""/no_think
|
||||
Randomly assemble a team of {expert_count} experts from completely different fields{custom_text}.
|
||||
|
||||
[Innovation Requirements] (Random seed: {seed})
|
||||
- Prioritize {hint} experts
|
||||
- Avoid common professions (such as doctors, engineers, teachers, lawyers, etc.)
|
||||
- Each expert must be from a completely unrelated field
|
||||
- The rarer and more innovative, the better
|
||||
|
||||
Return JSON:
|
||||
{{"experts": [{{"id": "expert-0", "name": "profession", "domain": "field", "perspective": "viewpoint"}}, ...]}}
|
||||
|
||||
Rules:
|
||||
- id should be expert-0 to expert-{expert_count - 1}
|
||||
- name is the profession name (not a person's name), 2-5 words
|
||||
- domain should be specific and unique, no duplicate types"""
|
||||
else:
|
||||
custom_text = ""
|
||||
if custom_experts and len(custom_experts) > 0:
|
||||
custom_text = f"(已指定:{', '.join(custom_experts[:expert_count])})"
|
||||
|
||||
diversity_hints = [
|
||||
"冷門、非主流、跨領域",
|
||||
"罕見職業、新興領域、邊緣學科",
|
||||
"非傳統、創新、小眾專業",
|
||||
"未來趨向、實驗性、非常規",
|
||||
"跨文化、混合領域、獨特視角"
|
||||
]
|
||||
hint = random.choice(diversity_hints)
|
||||
|
||||
return f"""/no_think
|
||||
隨機組建 {expert_count} 個來自完全不同領域的專家團隊{custom_text}。
|
||||
|
||||
【創新要求】(隨機種子:{seed})
|
||||
@@ -50,13 +84,39 @@ def get_expert_keyword_generation_prompt(
|
||||
category: str,
|
||||
attribute: str,
|
||||
experts: List[dict], # List[ExpertProfile]
|
||||
keywords_per_expert: int = 1
|
||||
keywords_per_expert: int = 1,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""Step 1: 專家視角關鍵字生成"""
|
||||
# 建立專家列表,格式更清晰
|
||||
"""Step 1: Expert perspective keyword generation"""
|
||||
# Build expert list in clearer format
|
||||
experts_list = "\n".join([f"- {exp['id']}: {exp['name']}" for exp in experts])
|
||||
|
||||
return f"""/no_think
|
||||
if lang == "en":
|
||||
return f"""/no_think
|
||||
You need to play the role of the following experts to generate innovative keywords for an attribute:
|
||||
|
||||
[Expert List]
|
||||
{experts_list}
|
||||
|
||||
[Task]
|
||||
Attribute: "{attribute}" (Category: {category})
|
||||
|
||||
For each expert, please:
|
||||
1. First understand the professional background, knowledge domain, and work content of that profession
|
||||
2. Think about "{attribute}" from that profession's unique perspective
|
||||
3. Generate {keywords_per_expert} innovative keyword(s) related to that specialty (2-6 words)
|
||||
|
||||
Keywords must reflect that expert's professional thinking style, for example:
|
||||
- Accountant viewing "movement" → "cash flow", "cost-benefit"
|
||||
- Architect viewing "movement" → "circulation design", "spatial flow"
|
||||
- Psychologist viewing "movement" → "behavioral motivation", "emotional transition"
|
||||
|
||||
Return JSON:
|
||||
{{"keywords": [{{"keyword": "term", "expert_id": "expert-X", "expert_name": "name"}}, ...]}}
|
||||
|
||||
Total of {len(experts) * keywords_per_expert} keywords needed, each keyword must be clearly related to the corresponding expert's professional field."""
|
||||
else:
|
||||
return f"""/no_think
|
||||
你需要扮演以下專家,為屬性生成創新關鍵字:
|
||||
|
||||
【專家名單】
|
||||
@@ -86,13 +146,29 @@ def get_single_description_prompt(
|
||||
keyword: str,
|
||||
expert_id: str,
|
||||
expert_name: str,
|
||||
expert_domain: str
|
||||
expert_domain: str,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""Step 2: 為單一關鍵字生成描述"""
|
||||
# 如果 domain 是通用的,就只用職業名稱
|
||||
domain_text = f"({expert_domain}領域)" if expert_domain and expert_domain != "Professional Field" else ""
|
||||
"""Step 2: Generate description for a single keyword"""
|
||||
if lang == "en":
|
||||
# If domain is generic, just use profession name
|
||||
domain_text = f" ({expert_domain} field)" if expert_domain and expert_domain != "Professional Field" else ""
|
||||
|
||||
return f"""/no_think
|
||||
return f"""/no_think
|
||||
You are a {expert_name}{domain_text}.
|
||||
|
||||
Task: Generate an innovative application description for "{query}".
|
||||
Keyword: {keyword}
|
||||
|
||||
From your professional perspective, explain how to apply the concept of "{keyword}" to "{query}". The description should be specific, creative, 15-30 words.
|
||||
|
||||
Return JSON only, no other text:
|
||||
{{"description": "your innovative application description"}}"""
|
||||
else:
|
||||
# 如果 domain 是通用的,就只用職業名稱
|
||||
domain_text = f"({expert_domain}領域)" if expert_domain and expert_domain != "Professional Field" else ""
|
||||
|
||||
return f"""/no_think
|
||||
你是一位{expert_name}{domain_text}。
|
||||
|
||||
任務:為「{query}」生成一段創新應用描述。
|
||||
|
||||
51
backend/app/prompts/language_config.py
Normal file
51
backend/app/prompts/language_config.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Language configuration for prompts"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
|
||||
class Language(str, Enum):
|
||||
CHINESE = "zh"
|
||||
ENGLISH = "en"
|
||||
|
||||
LanguageType = Literal["zh", "en"]
|
||||
|
||||
# Default categories for each language
|
||||
DEFAULT_CATEGORIES = {
|
||||
"zh": ["材料", "功能", "用途", "使用族群", "特性"],
|
||||
"en": ["Materials", "Functions", "Usages", "User Groups", "Characteristics"],
|
||||
}
|
||||
|
||||
CATEGORY_DESCRIPTIONS = {
|
||||
"zh": {
|
||||
"材料": "物件由什麼材料組成",
|
||||
"功能": "物件能做什麼",
|
||||
"用途": "物件在什麼場景使用",
|
||||
"使用族群": "誰會使用這個物件",
|
||||
"特性": "物件有什麼特徵",
|
||||
},
|
||||
"en": {
|
||||
"Materials": "What materials the object is made of",
|
||||
"Functions": "What the object can do",
|
||||
"Usages": "In what scenarios the object is used",
|
||||
"User Groups": "Who uses this object",
|
||||
"Characteristics": "What features the object has",
|
||||
},
|
||||
}
|
||||
|
||||
# Category name mappings between languages
|
||||
CATEGORY_MAPPING = {
|
||||
"zh_to_en": {
|
||||
"材料": "Materials",
|
||||
"功能": "Functions",
|
||||
"用途": "Usages",
|
||||
"使用族群": "User Groups",
|
||||
"特性": "Characteristics",
|
||||
},
|
||||
"en_to_zh": {
|
||||
"Materials": "材料",
|
||||
"Functions": "功能",
|
||||
"Usages": "用途",
|
||||
"User Groups": "使用族群",
|
||||
"Characteristics": "特性",
|
||||
},
|
||||
}
|
||||
@@ -1,22 +1,43 @@
|
||||
"""Transformation Agent 提示詞模組"""
|
||||
"""Transformation Agent prompts module - Bilingual support"""
|
||||
|
||||
from typing import List
|
||||
from .language_config import LanguageType
|
||||
|
||||
|
||||
def get_keyword_generation_prompt(
|
||||
category: str,
|
||||
attributes: List[str],
|
||||
keyword_count: int = 3
|
||||
keyword_count: int = 3,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""
|
||||
Step 1: 生成新關鍵字
|
||||
Step 1: Generate new keywords
|
||||
|
||||
給定類別和現有屬性,生成全新的、有創意的關鍵字。
|
||||
不考慮原始查詢,只專注於類別本身可能的延伸。
|
||||
Given a category and existing attributes, generate new, creative keywords.
|
||||
Don't consider the original query, focus only on possible extensions of the category itself.
|
||||
"""
|
||||
attrs_text = "、".join(attributes)
|
||||
attrs_text = ", ".join(attributes) if lang == "en" else "、".join(attributes)
|
||||
|
||||
return f"""/no_think
|
||||
if lang == "en":
|
||||
return f"""/no_think
|
||||
You are a creative brainstorming expert. Given a category and its existing attributes, please generate new, creative keywords or descriptive phrases.
|
||||
|
||||
[Category] {category}
|
||||
[Existing Attributes] {attrs_text}
|
||||
|
||||
[Important Rules]
|
||||
1. Generate {keyword_count} completely new keywords
|
||||
2. Keywords must fit within the scope of "{category}" category
|
||||
3. Keywords should be creative and not duplicate or be too similar to existing attributes
|
||||
4. Don't consider any specific object, focus only on possible extensions of this category
|
||||
5. Each keyword should be 2-6 words
|
||||
|
||||
Return JSON only:
|
||||
{{
|
||||
"keywords": ["keyword1", "keyword2", "keyword3"]
|
||||
}}"""
|
||||
else:
|
||||
return f"""/no_think
|
||||
你是一個創意發想專家。給定一個類別和該類別下的現有屬性,請生成全新的、有創意的關鍵字或描述片段。
|
||||
|
||||
【類別】{category}
|
||||
@@ -38,14 +59,36 @@ def get_keyword_generation_prompt(
|
||||
def get_description_generation_prompt(
|
||||
query: str,
|
||||
category: str,
|
||||
keyword: str
|
||||
keyword: str,
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""
|
||||
Step 2: 結合原始查詢生成描述
|
||||
Step 2: Combine with original query to generate description
|
||||
|
||||
用新關鍵字創造一個與原始查詢相關的創新應用描述。
|
||||
Use new keyword to create an innovative application description related to the original query.
|
||||
"""
|
||||
return f"""/no_think
|
||||
if lang == "en":
|
||||
return f"""/no_think
|
||||
You are an innovation application expert. Please apply a new keyword concept to a specific object to create an innovative application description.
|
||||
|
||||
[Object] {query}
|
||||
[Category] {category}
|
||||
[New Keyword] {keyword}
|
||||
|
||||
[Task]
|
||||
Using the concept of "{keyword}", create an innovative application description for "{query}".
|
||||
The description should be a complete sentence or phrase explaining how to apply this new concept to the object.
|
||||
|
||||
[Example Format]
|
||||
- If the object is "bicycle" and keyword is "monitor", you could generate "bicycle monitors the rider's health status"
|
||||
- If the object is "umbrella" and keyword is "generate power", you could generate "umbrella generates electricity using raindrop impacts"
|
||||
|
||||
Return JSON only:
|
||||
{{
|
||||
"description": "innovative application description"
|
||||
}}"""
|
||||
else:
|
||||
return f"""/no_think
|
||||
你是一個創新應用專家。請將一個新的關鍵字概念應用到特定物件上,創造出創新的應用描述。
|
||||
|
||||
【物件】{query}
|
||||
@@ -69,15 +112,35 @@ def get_description_generation_prompt(
|
||||
def get_batch_description_prompt(
|
||||
query: str,
|
||||
category: str,
|
||||
keywords: List[str]
|
||||
keywords: List[str],
|
||||
lang: LanguageType = "zh"
|
||||
) -> str:
|
||||
"""
|
||||
批次生成描述(可選的優化版本,一次處理多個關鍵字)
|
||||
Batch description generation (optional optimized version, process multiple keywords at once)
|
||||
"""
|
||||
keywords_text = "、".join(keywords)
|
||||
keywords_json = ", ".join([f'"{k}"' for k in keywords])
|
||||
keywords_text = ", ".join(keywords) if lang == "en" else "、".join(keywords)
|
||||
|
||||
return f"""/no_think
|
||||
if lang == "en":
|
||||
return f"""/no_think
|
||||
You are an innovation application expert. Please apply multiple new keyword concepts to a specific object, creating an innovative application description for each keyword.
|
||||
|
||||
[Object] {query}
|
||||
[Category] {category}
|
||||
[New Keywords] {keywords_text}
|
||||
|
||||
[Task]
|
||||
Create an innovative application description related to "{query}" for each keyword.
|
||||
Each description should be a complete sentence or phrase.
|
||||
|
||||
Return JSON only:
|
||||
{{
|
||||
"descriptions": [
|
||||
{{"keyword": "keyword1", "description": "description1"}},
|
||||
{{"keyword": "keyword2", "description": "description2"}}
|
||||
]
|
||||
}}"""
|
||||
else:
|
||||
return f"""/no_think
|
||||
你是一個創新應用專家。請將多個新的關鍵字概念應用到特定物件上,為每個關鍵字創造創新的應用描述。
|
||||
|
||||
【物件】{query}
|
||||
|
||||
@@ -58,7 +58,8 @@ async def execute_step0(
|
||||
prompt = get_step0_category_analysis_prompt(
|
||||
request.query,
|
||||
request.suggested_category_count,
|
||||
exclude_categories=exclude_categories
|
||||
exclude_categories=exclude_categories,
|
||||
lang=request.lang
|
||||
)
|
||||
temperature = request.temperature if request.temperature is not None else 0.7
|
||||
response = await ollama_provider.generate(
|
||||
@@ -310,7 +311,7 @@ async def generate_sse_events(request: StreamAnalyzeRequest) -> AsyncGenerator[s
|
||||
# ========== Step 1: Generate Attributes (Dynamic) ==========
|
||||
yield f"event: step1_start\ndata: {json.dumps({'message': '生成屬性...'}, ensure_ascii=False)}\n\n"
|
||||
|
||||
step1_prompt = get_step1_dynamic_attributes_prompt(request.query, final_categories)
|
||||
step1_prompt = get_step1_dynamic_attributes_prompt(request.query, final_categories, lang=request.lang)
|
||||
logger.info(f"Step 1 prompt: {step1_prompt[:200]}")
|
||||
|
||||
step1_response = await ollama_provider.generate(
|
||||
@@ -330,6 +331,7 @@ async def generate_sse_events(request: StreamAnalyzeRequest) -> AsyncGenerator[s
|
||||
query=request.query,
|
||||
categories=final_categories,
|
||||
attributes_by_category=step1_result.attributes,
|
||||
lang=request.lang
|
||||
)
|
||||
logger.info(f"Step 2 (relationships) prompt: {step2_prompt[:300]}")
|
||||
|
||||
|
||||
@@ -63,7 +63,8 @@ async def deduplicate_descriptions(request: DeduplicationRequest) -> Deduplicati
|
||||
# 使用 LLM 成對比較去重
|
||||
result = await llm_deduplication_service.deduplicate(
|
||||
descriptions=request.descriptions,
|
||||
model=request.model
|
||||
model=request.model,
|
||||
lang=request.lang
|
||||
)
|
||||
return result
|
||||
except ValueError as e:
|
||||
|
||||
@@ -68,7 +68,8 @@ async def generate_expert_transformation_events(
|
||||
query=request.query,
|
||||
categories=all_categories,
|
||||
expert_count=request.expert_count,
|
||||
custom_experts=actual_custom_experts if actual_custom_experts else None
|
||||
custom_experts=actual_custom_experts if actual_custom_experts else None,
|
||||
lang=request.lang
|
||||
)
|
||||
logger.info(f"Expert prompt: {expert_prompt[:200]}")
|
||||
|
||||
@@ -119,7 +120,8 @@ async def generate_expert_transformation_events(
|
||||
query=request.query,
|
||||
categories=all_categories,
|
||||
expert_count=request.expert_count,
|
||||
custom_experts=actual_custom_experts if actual_custom_experts else None
|
||||
custom_experts=actual_custom_experts if actual_custom_experts else None,
|
||||
lang=request.lang
|
||||
)
|
||||
|
||||
expert_response = await ollama_provider.generate(
|
||||
@@ -160,7 +162,8 @@ async def generate_expert_transformation_events(
|
||||
category=request.category,
|
||||
attribute=attribute,
|
||||
experts=[e.model_dump() for e in experts],
|
||||
keywords_per_expert=request.keywords_per_expert
|
||||
keywords_per_expert=request.keywords_per_expert,
|
||||
lang=request.lang
|
||||
)
|
||||
logger.info(f"Keyword prompt for '{attribute}': {kw_prompt[:300]}")
|
||||
|
||||
@@ -214,7 +217,8 @@ async def generate_expert_transformation_events(
|
||||
keyword=kw.keyword,
|
||||
expert_id=kw.expert_id,
|
||||
expert_name=kw.expert_name,
|
||||
expert_domain=expert_domain
|
||||
expert_domain=expert_domain,
|
||||
lang=request.lang
|
||||
)
|
||||
|
||||
desc_response = await ollama_provider.generate(
|
||||
|
||||
133
backend/app/routers/patent_search.py
Normal file
133
backend/app/routers/patent_search.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""Patent Search Router - Search for similar patents"""
|
||||
|
||||
import logging
|
||||
from typing import Optional, List
|
||||
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ..services.patent_search_service import patent_search_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/api/patent", tags=["patent"])
|
||||
|
||||
|
||||
# ===== Request/Response Models =====
|
||||
|
||||
class PatentSearchRequest(BaseModel):
|
||||
"""Patent search request"""
|
||||
query: str # Search query (description or keywords)
|
||||
max_results: int = 10 # Maximum results to return (1-20)
|
||||
|
||||
|
||||
class PatentResult(BaseModel):
|
||||
"""Single patent result"""
|
||||
publication_number: str
|
||||
title: str
|
||||
snippet: str
|
||||
publication_date: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
inventor: Optional[str] = None
|
||||
status: str # ACTIVE, NOT_ACTIVE, UNKNOWN
|
||||
pdf_url: Optional[str] = None
|
||||
thumbnail_url: Optional[str] = None
|
||||
|
||||
|
||||
class PatentSearchResponse(BaseModel):
|
||||
"""Patent search response"""
|
||||
query: str
|
||||
total_results: int
|
||||
patents: List[PatentResult]
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class BatchPatentSearchRequest(BaseModel):
|
||||
"""Batch patent search request - search multiple descriptions"""
|
||||
queries: List[str] # List of descriptions to search
|
||||
max_results_per_query: int = 5 # Max results per query
|
||||
|
||||
|
||||
class BatchPatentSearchResult(BaseModel):
|
||||
"""Results for a single query in batch search"""
|
||||
query: str
|
||||
total_results: int
|
||||
patents: List[PatentResult]
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
class BatchPatentSearchResponse(BaseModel):
|
||||
"""Batch patent search response"""
|
||||
results: List[BatchPatentSearchResult]
|
||||
total_queries: int
|
||||
|
||||
|
||||
# ===== Endpoints =====
|
||||
|
||||
@router.post("/search", response_model=PatentSearchResponse)
|
||||
async def search_patents(request: PatentSearchRequest):
|
||||
"""
|
||||
Search for patents similar to the given description/query.
|
||||
|
||||
Uses Google Patents to find related patents based on keywords.
|
||||
"""
|
||||
logger.info(f"Patent search request: {request.query[:100]}...")
|
||||
|
||||
# Limit max_results to reasonable range
|
||||
max_results = min(max(1, request.max_results), 20)
|
||||
|
||||
result = await patent_search_service.search(
|
||||
query=request.query,
|
||||
max_results=max_results,
|
||||
)
|
||||
|
||||
return PatentSearchResponse(
|
||||
query=request.query,
|
||||
total_results=result.get("total_results", 0),
|
||||
patents=[PatentResult(**p) for p in result.get("patents", [])],
|
||||
error=result.get("error"),
|
||||
)
|
||||
|
||||
|
||||
@router.post("/search/batch", response_model=BatchPatentSearchResponse)
|
||||
async def batch_search_patents(request: BatchPatentSearchRequest):
|
||||
"""
|
||||
Search for patents for multiple descriptions at once.
|
||||
|
||||
Useful for checking multiple creative descriptions against patents.
|
||||
"""
|
||||
logger.info(f"Batch patent search: {len(request.queries)} queries")
|
||||
|
||||
# Limit results per query
|
||||
max_per_query = min(max(1, request.max_results_per_query), 10)
|
||||
|
||||
results: List[BatchPatentSearchResult] = []
|
||||
|
||||
for query in request.queries:
|
||||
result = await patent_search_service.search(
|
||||
query=query,
|
||||
max_results=max_per_query,
|
||||
)
|
||||
|
||||
results.append(BatchPatentSearchResult(
|
||||
query=query,
|
||||
total_results=result.get("total_results", 0),
|
||||
patents=[PatentResult(**p) for p in result.get("patents", [])],
|
||||
error=result.get("error"),
|
||||
))
|
||||
|
||||
return BatchPatentSearchResponse(
|
||||
results=results,
|
||||
total_queries=len(request.queries),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def patent_search_health():
|
||||
"""Check if patent search service is working"""
|
||||
# Do a simple test search
|
||||
result = await patent_search_service.search("test", max_results=1)
|
||||
|
||||
if result.get("error"):
|
||||
return {"status": "unhealthy", "error": result["error"]}
|
||||
|
||||
return {"status": "healthy"}
|
||||
@@ -36,7 +36,8 @@ async def generate_transformation_events(
|
||||
keyword_prompt = get_keyword_generation_prompt(
|
||||
category=request.category,
|
||||
attributes=request.attributes,
|
||||
keyword_count=request.keyword_count
|
||||
keyword_count=request.keyword_count,
|
||||
lang=request.lang
|
||||
)
|
||||
logger.info(f"Keyword prompt: {keyword_prompt[:200]}")
|
||||
|
||||
@@ -61,7 +62,8 @@ async def generate_transformation_events(
|
||||
desc_prompt = get_batch_description_prompt(
|
||||
query=request.query,
|
||||
category=request.category,
|
||||
keywords=new_keywords
|
||||
keywords=new_keywords,
|
||||
lang=request.lang
|
||||
)
|
||||
logger.info(f"Description prompt: {desc_prompt[:300]}")
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
"""
|
||||
LLM Deduplication Service - 使用 LLM 成對比較進行去重
|
||||
LLM Deduplication Service - Using LLM pairwise comparison for deduplication
|
||||
|
||||
讓 LLM 判斷兩個描述是否語意重複,透過並行處理加速。
|
||||
Let LLM determine whether two descriptions are semantically duplicate, accelerated by parallel processing.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import List, Tuple, Optional
|
||||
from typing import List, Tuple, Optional, Literal
|
||||
|
||||
import httpx
|
||||
import numpy as np
|
||||
@@ -18,6 +18,7 @@ from ..models.schemas import (
|
||||
DeduplicationMethod,
|
||||
DescriptionGroup,
|
||||
)
|
||||
from ..prompts.language_config import LanguageType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -31,27 +32,20 @@ class LLMDeduplicationService:
|
||||
self.client = httpx.AsyncClient(timeout=60.0)
|
||||
self.max_concurrent = 5 # 最大並行數,避免 Ollama 過載
|
||||
|
||||
async def compare_pair(
|
||||
self,
|
||||
desc1: str,
|
||||
desc2: str,
|
||||
model: str,
|
||||
semaphore: asyncio.Semaphore
|
||||
) -> bool:
|
||||
"""
|
||||
讓 LLM 判斷兩個描述是否語意重複
|
||||
def _get_comparison_prompt(self, desc1: str, desc2: str, lang: LanguageType = "zh") -> str:
|
||||
"""Get comparison prompt in the specified language"""
|
||||
if lang == "en":
|
||||
return f"""Determine whether the following two innovative descriptions express the same or very similar concepts:
|
||||
|
||||
Args:
|
||||
desc1: 第一個描述
|
||||
desc2: 第二個描述
|
||||
model: LLM 模型名稱
|
||||
semaphore: 並行控制信號量
|
||||
Description 1: {desc1}
|
||||
|
||||
Returns:
|
||||
bool: 是否為重複描述
|
||||
"""
|
||||
async with semaphore: # 控制並行數
|
||||
prompt = f"""判斷以下兩個創新描述是否表達相同或非常相似的概念:
|
||||
Description 2: {desc2}
|
||||
|
||||
If both descriptions essentially express the same or very similar innovative concept, answer "YES"
|
||||
If the two descriptions express different innovative concepts, answer "NO"
|
||||
Only answer YES or NO, no other text"""
|
||||
else:
|
||||
return f"""判斷以下兩個創新描述是否表達相同或非常相似的概念:
|
||||
|
||||
描述1: {desc1}
|
||||
|
||||
@@ -61,6 +55,30 @@ class LLMDeduplicationService:
|
||||
如果兩者描述不同的創新概念,回答 "NO"
|
||||
只回答 YES 或 NO,不要其他文字"""
|
||||
|
||||
async def compare_pair(
|
||||
self,
|
||||
desc1: str,
|
||||
desc2: str,
|
||||
model: str,
|
||||
semaphore: asyncio.Semaphore,
|
||||
lang: LanguageType = "zh"
|
||||
) -> bool:
|
||||
"""
|
||||
Let LLM determine whether two descriptions are semantically duplicate
|
||||
|
||||
Args:
|
||||
desc1: First description
|
||||
desc2: Second description
|
||||
model: LLM model name
|
||||
semaphore: Concurrency control semaphore
|
||||
lang: Language for the prompt
|
||||
|
||||
Returns:
|
||||
bool: Whether the descriptions are duplicates
|
||||
"""
|
||||
async with semaphore: # Control concurrency
|
||||
prompt = self._get_comparison_prompt(desc1, desc2, lang)
|
||||
|
||||
try:
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
@@ -86,26 +104,28 @@ class LLMDeduplicationService:
|
||||
async def compare_batch(
|
||||
self,
|
||||
pairs: List[Tuple[int, int, str, str]],
|
||||
model: str
|
||||
model: str,
|
||||
lang: LanguageType = "zh"
|
||||
) -> List[Tuple[int, int, bool]]:
|
||||
"""
|
||||
並行批次比較多個描述對
|
||||
Parallel batch comparison of multiple description pairs
|
||||
|
||||
Args:
|
||||
pairs: 待比較的配對列表 [(i, j, desc1, desc2), ...]
|
||||
model: LLM 模型名稱
|
||||
pairs: List of pairs to compare [(i, j, desc1, desc2), ...]
|
||||
model: LLM model name
|
||||
lang: Language for the prompt
|
||||
|
||||
Returns:
|
||||
比較結果列表 [(i, j, is_similar), ...]
|
||||
List of comparison results [(i, j, is_similar), ...]
|
||||
"""
|
||||
semaphore = asyncio.Semaphore(self.max_concurrent)
|
||||
|
||||
async def compare_one(pair: Tuple[int, int, str, str]) -> Tuple[int, int, bool]:
|
||||
i, j, desc1, desc2 = pair
|
||||
is_similar = await self.compare_pair(desc1, desc2, model, semaphore)
|
||||
is_similar = await self.compare_pair(desc1, desc2, model, semaphore, lang)
|
||||
return (i, j, is_similar)
|
||||
|
||||
# 使用 asyncio.gather 並行執行所有比較
|
||||
# Use asyncio.gather to execute all comparisons in parallel
|
||||
results = await asyncio.gather(*[compare_one(p) for p in pairs])
|
||||
return results
|
||||
|
||||
@@ -144,17 +164,19 @@ class LLMDeduplicationService:
|
||||
async def deduplicate(
|
||||
self,
|
||||
descriptions: List[ExpertTransformationDescription],
|
||||
model: Optional[str] = None
|
||||
model: Optional[str] = None,
|
||||
lang: LanguageType = "zh"
|
||||
) -> DeduplicationResult:
|
||||
"""
|
||||
使用 LLM 成對比較進行去重
|
||||
Use LLM pairwise comparison for deduplication
|
||||
|
||||
Args:
|
||||
descriptions: 要去重的描述列表
|
||||
model: LLM 模型名稱
|
||||
descriptions: List of descriptions to deduplicate
|
||||
model: LLM model name
|
||||
lang: Language for the prompt
|
||||
|
||||
Returns:
|
||||
DeduplicationResult: 去重結果
|
||||
DeduplicationResult: Deduplication result
|
||||
"""
|
||||
model = model or self.default_model
|
||||
|
||||
@@ -188,10 +210,10 @@ class LLMDeduplicationService:
|
||||
))
|
||||
|
||||
total_pairs = len(pairs)
|
||||
logger.info(f"LLM deduplication: {total_pairs} pairs to compare (parallel={self.max_concurrent}, model={model})")
|
||||
logger.info(f"LLM deduplication: {total_pairs} pairs to compare (parallel={self.max_concurrent}, model={model}, lang={lang})")
|
||||
|
||||
# 並行批次比較
|
||||
results = await self.compare_batch(pairs, model)
|
||||
# Parallel batch comparison
|
||||
results = await self.compare_batch(pairs, model, lang)
|
||||
|
||||
# 填入相似度矩陣
|
||||
for i, j, is_similar in results:
|
||||
|
||||
195
backend/app/services/patent_search_service.py
Normal file
195
backend/app/services/patent_search_service.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""Patent Search Service using Google Patents XHR API"""
|
||||
|
||||
import httpx
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PatentSearchResult:
|
||||
"""Single patent search result"""
|
||||
def __init__(
|
||||
self,
|
||||
publication_number: str,
|
||||
title: str,
|
||||
snippet: str,
|
||||
publication_date: Optional[str],
|
||||
assignee: Optional[str],
|
||||
inventor: Optional[str],
|
||||
status: str,
|
||||
pdf_url: Optional[str] = None,
|
||||
thumbnail_url: Optional[str] = None,
|
||||
):
|
||||
self.publication_number = publication_number
|
||||
self.title = title
|
||||
self.snippet = snippet
|
||||
self.publication_date = publication_date
|
||||
self.assignee = assignee
|
||||
self.inventor = inventor
|
||||
self.status = status
|
||||
self.pdf_url = pdf_url
|
||||
self.thumbnail_url = thumbnail_url
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"publication_number": self.publication_number,
|
||||
"title": self.title,
|
||||
"snippet": self.snippet,
|
||||
"publication_date": self.publication_date,
|
||||
"assignee": self.assignee,
|
||||
"inventor": self.inventor,
|
||||
"status": self.status,
|
||||
"pdf_url": self.pdf_url,
|
||||
"thumbnail_url": self.thumbnail_url,
|
||||
}
|
||||
|
||||
|
||||
class PatentSearchService:
|
||||
"""Service for searching patents using Google Patents"""
|
||||
|
||||
GOOGLE_PATENTS_XHR_URL = "https://patents.google.com/xhr/query"
|
||||
GOOGLE_PATENTS_PDF_BASE = "https://patentimages.storage.googleapis.com/"
|
||||
|
||||
def __init__(self):
|
||||
self._client: Optional[httpx.AsyncClient] = None
|
||||
|
||||
# Browser-like headers to avoid being blocked
|
||||
DEFAULT_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Referer": "https://patents.google.com/",
|
||||
"Origin": "https://patents.google.com",
|
||||
}
|
||||
|
||||
async def _get_client(self) -> httpx.AsyncClient:
|
||||
if self._client is None or self._client.is_closed:
|
||||
self._client = httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
headers=self.DEFAULT_HEADERS,
|
||||
follow_redirects=True,
|
||||
)
|
||||
return self._client
|
||||
|
||||
async def close(self):
|
||||
if self._client and not self._client.is_closed:
|
||||
await self._client.aclose()
|
||||
|
||||
async def search(
|
||||
self,
|
||||
query: str,
|
||||
max_results: int = 10,
|
||||
) -> dict:
|
||||
"""
|
||||
Search Google Patents for relevant patents
|
||||
|
||||
Args:
|
||||
query: Search query (can be a description or keywords)
|
||||
max_results: Maximum number of results to return
|
||||
|
||||
Returns:
|
||||
Dict with total_results count and list of patent results
|
||||
"""
|
||||
try:
|
||||
client = await self._get_client()
|
||||
|
||||
# URL encode the query
|
||||
encoded_query = quote_plus(query)
|
||||
url = f"{self.GOOGLE_PATENTS_XHR_URL}?url=q%3D{encoded_query}&exp=&tags="
|
||||
|
||||
logger.info(f"Searching patents with query: {query[:100]}...")
|
||||
|
||||
response = await client.get(url)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(f"Google Patents API returned status {response.status_code}")
|
||||
return {
|
||||
"total_results": 0,
|
||||
"patents": [],
|
||||
"error": f"API returned status {response.status_code}"
|
||||
}
|
||||
|
||||
data = response.json()
|
||||
|
||||
# Parse results
|
||||
results = data.get("results", {})
|
||||
total_num = results.get("total_num_results", 0)
|
||||
clusters = results.get("cluster", [])
|
||||
|
||||
patents: List[PatentSearchResult] = []
|
||||
|
||||
if clusters and len(clusters) > 0:
|
||||
patent_results = clusters[0].get("result", [])
|
||||
|
||||
for item in patent_results[:max_results]:
|
||||
patent_data = item.get("patent", {})
|
||||
family_meta = patent_data.get("family_metadata", {})
|
||||
aggregated = family_meta.get("aggregated", {})
|
||||
country_status = aggregated.get("country_status", [])
|
||||
|
||||
status = "UNKNOWN"
|
||||
if country_status and len(country_status) > 0:
|
||||
best_stage = country_status[0].get("best_patent_stage", {})
|
||||
status = best_stage.get("state", "UNKNOWN")
|
||||
|
||||
# Build PDF URL if available
|
||||
pdf_path = patent_data.get("pdf", "")
|
||||
pdf_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{pdf_path}" if pdf_path else None
|
||||
|
||||
# Build thumbnail URL
|
||||
thumbnail = patent_data.get("thumbnail", "")
|
||||
thumbnail_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{thumbnail}" if thumbnail else None
|
||||
|
||||
patent = PatentSearchResult(
|
||||
publication_number=patent_data.get("publication_number", ""),
|
||||
title=self._clean_html(patent_data.get("title", "")),
|
||||
snippet=self._clean_html(patent_data.get("snippet", "")),
|
||||
publication_date=patent_data.get("publication_date"),
|
||||
assignee=patent_data.get("assignee"),
|
||||
inventor=patent_data.get("inventor"),
|
||||
status=status,
|
||||
pdf_url=pdf_url,
|
||||
thumbnail_url=thumbnail_url,
|
||||
)
|
||||
patents.append(patent)
|
||||
|
||||
logger.info(f"Found {total_num} total patents, returning {len(patents)}")
|
||||
|
||||
return {
|
||||
"total_results": total_num,
|
||||
"patents": [p.to_dict() for p in patents],
|
||||
}
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"HTTP error searching patents: {e}")
|
||||
return {
|
||||
"total_results": 0,
|
||||
"patents": [],
|
||||
"error": str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching patents: {e}")
|
||||
return {
|
||||
"total_results": 0,
|
||||
"patents": [],
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _clean_html(self, text: str) -> str:
|
||||
"""Remove HTML entities and tags from text"""
|
||||
if not text:
|
||||
return ""
|
||||
# Replace common HTML entities
|
||||
text = text.replace("…", "...")
|
||||
text = text.replace("&", "&")
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
text = text.replace(""", '"')
|
||||
text = text.replace("'", "'")
|
||||
return text.strip()
|
||||
|
||||
|
||||
# Singleton instance
|
||||
patent_search_service = PatentSearchService()
|
||||
Reference in New Issue
Block a user