diff --git a/backend/app/main.py b/backend/app/main.py index a607f36..c948367 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -3,10 +3,11 @@ from contextlib import asynccontextmanager from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from .routers import attributes, transformation, expert_transformation, deduplication +from .routers import attributes, transformation, expert_transformation, deduplication, patent_search from .services.llm_service import ollama_provider from .services.embedding_service import embedding_service from .services.llm_deduplication_service import llm_deduplication_service +from .services.patent_search_service import patent_search_service @asynccontextmanager @@ -15,6 +16,7 @@ async def lifespan(app: FastAPI): await ollama_provider.close() await embedding_service.close() await llm_deduplication_service.close() + await patent_search_service.close() app = FastAPI( @@ -36,6 +38,7 @@ app.include_router(attributes.router) app.include_router(transformation.router) app.include_router(expert_transformation.router) app.include_router(deduplication.router) +app.include_router(patent_search.router) @app.get("/") diff --git a/backend/app/models/schemas.py b/backend/app/models/schemas.py index d277fae..8dfcdb0 100644 --- a/backend/app/models/schemas.py +++ b/backend/app/models/schemas.py @@ -1,7 +1,10 @@ from pydantic import BaseModel -from typing import Optional, List, Dict +from typing import Optional, List, Dict, Literal from enum import Enum +# Language type for prompts +LanguageType = Literal["zh", "en"] + class AttributeNode(BaseModel): name: str @@ -47,16 +50,19 @@ class CausalChain(BaseModel): class StreamAnalyzeRequest(BaseModel): - """多步驟分析請求(更新為支持動態類別)""" + """Multi-step analysis request (updated to support dynamic categories)""" query: str model: Optional[str] = None temperature: Optional[float] = 0.7 - chain_count: int = 5 # 用戶可設定要生成多少條因果鏈 + chain_count: int = 5 # User can set how many causal chains to generate - # 新增:動態類別支持 - category_mode: Optional[str] = "dynamic_auto" # CategoryMode enum 值 + # Dynamic category support + category_mode: Optional[str] = "dynamic_auto" # CategoryMode enum value custom_categories: Optional[List[str]] = None - suggested_category_count: int = 3 # 建議 LLM 生成的類別數量 + suggested_category_count: int = 3 # Suggest LLM to generate this many categories + + # Language setting + lang: LanguageType = "zh" class StreamAnalyzeResponse(BaseModel): @@ -136,13 +142,14 @@ class DAGRelationship(BaseModel): # ===== Transformation Agent schemas ===== class TransformationRequest(BaseModel): - """Transformation Agent 請求""" - query: str # 原始查詢 (e.g., "腳踏車") - category: str # 類別名稱 (e.g., "功能") - attributes: List[str] # 該類別的屬性列表 + """Transformation Agent request""" + query: str # Original query (e.g., "bicycle") + category: str # Category name (e.g., "Functions") + attributes: List[str] # Attribute list for this category model: Optional[str] = None temperature: Optional[float] = 0.7 - keyword_count: int = 3 # 要生成的新關鍵字數量 + keyword_count: int = 3 # Number of new keywords to generate + lang: LanguageType = "zh" # Language for prompts class TransformationDescription(BaseModel): @@ -215,24 +222,27 @@ class ExpertSource(str, Enum): class ExpertTransformationRequest(BaseModel): - """Expert Transformation Agent 請求""" + """Expert Transformation Agent request""" query: str category: str attributes: List[str] # Expert parameters - expert_count: int = 3 # 專家數量 (2-8) - keywords_per_expert: int = 1 # 每個專家為每個屬性生成幾個關鍵字 (1-3) - custom_experts: Optional[List[str]] = None # 用戶指定專家 ["藥師", "工程師"] + expert_count: int = 3 # Number of experts (2-8) + keywords_per_expert: int = 1 # Keywords per expert per attribute (1-3) + custom_experts: Optional[List[str]] = None # User-specified experts # Expert source parameters - expert_source: ExpertSource = ExpertSource.LLM # 專家來源 - expert_language: str = "en" # 外部來源的語言 (目前只有英文資料) + expert_source: ExpertSource = ExpertSource.LLM # Expert source + expert_language: str = "en" # Language for external sources # LLM parameters model: Optional[str] = None temperature: Optional[float] = 0.7 + # Prompt language + lang: LanguageType = "zh" + # ===== Deduplication Agent schemas ===== @@ -243,11 +253,12 @@ class DeduplicationMethod(str, Enum): class DeduplicationRequest(BaseModel): - """去重請求""" + """Deduplication request""" descriptions: List[ExpertTransformationDescription] - method: DeduplicationMethod = DeduplicationMethod.EMBEDDING # 去重方法 - similarity_threshold: float = 0.85 # 餘弦相似度閾值 (0.0-1.0),僅 Embedding 使用 - model: Optional[str] = None # Embedding/LLM 模型 + method: DeduplicationMethod = DeduplicationMethod.EMBEDDING # Deduplication method + similarity_threshold: float = 0.85 # Cosine similarity threshold (0.0-1.0), only for Embedding + model: Optional[str] = None # Embedding/LLM model + lang: LanguageType = "zh" # Prompt language (for LLM method) class DescriptionGroup(BaseModel): diff --git a/backend/app/prompts/attribute_prompt.py b/backend/app/prompts/attribute_prompt.py index a7c0e5a..5a551db 100644 --- a/backend/app/prompts/attribute_prompt.py +++ b/backend/app/prompts/attribute_prompt.py @@ -1,21 +1,37 @@ from typing import List, Optional, Dict import json - -DEFAULT_CATEGORIES = ["材料", "功能", "用途", "使用族群", "特性"] - -CATEGORY_DESCRIPTIONS = { - "材料": "物件由什麼材料組成", - "功能": "物件能做什麼", - "用途": "物件在什麼場景使用", - "使用族群": "誰會使用這個物件", - "特性": "物件有什麼特徵", -} +from .language_config import ( + LanguageType, + DEFAULT_CATEGORIES, + CATEGORY_DESCRIPTIONS, +) -def get_attribute_prompt(query: str, categories: Optional[List[str]] = None) -> str: +def get_default_categories(lang: LanguageType = "zh") -> List[str]: + return DEFAULT_CATEGORIES.get(lang, DEFAULT_CATEGORIES["zh"]) + + +def get_category_descriptions(lang: LanguageType = "zh") -> Dict[str, str]: + return CATEGORY_DESCRIPTIONS.get(lang, CATEGORY_DESCRIPTIONS["zh"]) + + +def get_attribute_prompt( + query: str, + categories: Optional[List[str]] = None, + lang: LanguageType = "zh" +) -> str: """Generate prompt with causal chain structure.""" + if lang == "en": + prompt = f"""Analyze the attributes of "{query}" in a causal chain format: Materials→Functions→Usages→User Groups. - prompt = f"""分析「{query}」的屬性,以因果鏈方式呈現:材料→功能→用途→使用族群。 +List 3-5 types of materials, each extending into a complete causal chain. + +JSON format: +{{"name": "{query}", "children": [{{"name": "Material Name", "category": "Materials", "children": [{{"name": "Function Name", "category": "Functions", "children": [{{"name": "Usage Name", "category": "Usages", "children": [{{"name": "User Group Name", "category": "User Groups"}}]}}]}}]}}]}} + +Return JSON only.""" + else: + prompt = f"""分析「{query}」的屬性,以因果鏈方式呈現:材料→功能→用途→使用族群。 請列出 3-5 種材料,每種材料延伸出完整因果鏈。 @@ -27,9 +43,18 @@ JSON 格式: return prompt -def get_step1_attributes_prompt(query: str) -> str: - """Step 1: 生成各類別的屬性列表(平行結構)""" - return f"""/no_think +def get_step1_attributes_prompt(query: str, lang: LanguageType = "zh") -> str: + """Step 1: Generate attribute list for each category (parallel structure)""" + if lang == "en": + return f"""/no_think +Analyze "{query}" and list attributes for the following four categories. List 3-5 common attributes for each category. + +Return JSON only, in the following format: +{{"materials": ["material1", "material2", "material3"], "functions": ["function1", "function2", "function3"], "usages": ["usage1", "usage2", "usage3"], "users": ["user group1", "user group2", "user group3"]}} + +Object: {query}""" + else: + return f"""/no_think 分析「{query}」,列出以下四個類別的屬性。每個類別列出 3-5 個常見屬性。 只回傳 JSON,格式如下: @@ -45,21 +70,48 @@ def get_step2_causal_chain_prompt( usages: List[str], users: List[str], existing_chains: List[dict], - chain_index: int + chain_index: int, + lang: LanguageType = "zh" ) -> str: - """Step 2: 生成單條因果鏈""" + """Step 2: Generate a single causal chain""" existing_chains_text = "" - if existing_chains: - chains_list = [ - f"- {c['material']} → {c['function']} → {c['usage']} → {c['user']}" - for c in existing_chains - ] - existing_chains_text = f""" + + if lang == "en": + if existing_chains: + chains_list = [ + f"- {c['material']} → {c['function']} → {c['usage']} → {c['user']}" + for c in existing_chains + ] + existing_chains_text = f""" +[Already generated causal chains, do not repeat] +{chr(10).join(chains_list)} +""" + return f"""/no_think +Generate causal chain #{chain_index} for "{query}". + +[Available Materials] {', '.join(materials)} +[Available Functions] {', '.join(functions)} +[Available Usages] {', '.join(usages)} +[Available User Groups] {', '.join(users)} +{existing_chains_text} +[Rules] +1. Select one attribute from each category to form a logical causal chain +2. The causal relationship must be logical (materials determine functions, functions determine usages, usages determine user groups) +3. Do not repeat existing causal chains + +Return JSON only: +{{"material": "selected material", "function": "selected function", "usage": "selected usage", "user": "selected user group"}}""" + else: + if existing_chains: + chains_list = [ + f"- {c['material']} → {c['function']} → {c['usage']} → {c['user']}" + for c in existing_chains + ] + existing_chains_text = f""" 【已生成的因果鏈,請勿重複】 {chr(10).join(chains_list)} """ - - return f"""/no_think + return f"""/no_think 為「{query}」生成第 {chain_index} 條因果鏈。 【可選材料】{', '.join(materials)} @@ -76,19 +128,52 @@ def get_step2_causal_chain_prompt( {{"material": "選擇的材料", "function": "選擇的功能", "usage": "選擇的用途", "user": "選擇的族群"}}""" -def get_flat_attribute_prompt(query: str, categories: Optional[List[str]] = None) -> str: +def get_flat_attribute_prompt( + query: str, + categories: Optional[List[str]] = None, + lang: LanguageType = "zh" +) -> str: """Generate prompt with flat/parallel categories (original design).""" - cats = categories if categories else DEFAULT_CATEGORIES + cats = categories if categories else get_default_categories(lang) + cat_descs = get_category_descriptions(lang) # Build category list category_lines = [] for cat in cats: - desc = CATEGORY_DESCRIPTIONS.get(cat, f"{cat}的相關屬性") - category_lines.append(f"- {cat}:{desc}") + desc = cat_descs.get(cat, f"Related attributes of {cat}" if lang == "en" else f"{cat}的相關屬性") + category_lines.append(f"- {cat}: {desc}") categories_text = "\n".join(category_lines) - prompt = f"""/no_think + if lang == "en": + prompt = f"""/no_think +You are an object attribute analysis expert. Please break down the user's input object into the following attribute categories. + +[Required Categories] +{categories_text} + +[Important] The return format must be valid JSON, and each node must have a "name" field: + +```json +{{ + "name": "Object Name", + "children": [ + {{ + "name": "Category Name", + "children": [ + {{"name": "Attribute 1"}}, + {{"name": "Attribute 2"}} + ] + }} + ] +}} +``` + +Return JSON only, no other text. + +User input: {query}""" + else: + prompt = f"""/no_think 你是一個物件屬性分析專家。請將用戶輸入的物件拆解成以下屬性類別。 【必須包含的類別】 @@ -123,14 +208,42 @@ def get_flat_attribute_prompt(query: str, categories: Optional[List[str]] = None def get_step0_category_analysis_prompt( query: str, suggested_count: int = 3, - exclude_categories: List[str] | None = None + exclude_categories: List[str] | None = None, + lang: LanguageType = "zh" ) -> str: - """Step 0: LLM 分析建議類別""" - exclude_text = "" - if exclude_categories: - exclude_text = f"\n【禁止使用的類別】{', '.join(exclude_categories)}(這些已經是固定類別,不要重複建議)\n" + """Step 0: LLM analyzes and suggests categories""" - return f"""/no_think + if lang == "en": + exclude_text = "" + if exclude_categories: + exclude_text = f"\n[Forbidden Categories] {', '.join(exclude_categories)} (These are already fixed categories, do not suggest duplicates)\n" + + return f"""/no_think +Analyze "{query}" and suggest {suggested_count} most suitable attribute categories to describe it. + +[Common Category References] Characteristics, Shape, Color, Size, Brand, Price Range, Weight, Style, Occasion, Season, Technical Specifications +{exclude_text} +[Important] +1. Choose categories that best describe the essence of this object +2. Categories should have logical relationships +3. Do not choose overly abstract or duplicate categories +4. Must suggest creative categories different from the reference list + +Return JSON only: +{{ + "categories": [ + {{"name": "Category1", "description": "Description1", "order": 0}}, + {{"name": "Category2", "description": "Description2", "order": 1}} + ] +}} + +Object: {query}""" + else: + exclude_text = "" + if exclude_categories: + exclude_text = f"\n【禁止使用的類別】{', '.join(exclude_categories)}(這些已經是固定類別,不要重複建議)\n" + + return f"""/no_think 分析「{query}」,建議 {suggested_count} 個最適合的屬性類別來描述它。 【常見類別參考】特性、形狀、顏色、尺寸、品牌、價格區間、重量、風格、場合、季節、技術規格 @@ -154,21 +267,35 @@ def get_step0_category_analysis_prompt( def get_step1_dynamic_attributes_prompt( query: str, - categories: List # List[CategoryDefinition] + categories: List, # List[CategoryDefinition] + lang: LanguageType = "zh" ) -> str: - """動態 Step 1 - 根據類別列表生成屬性""" - # 按 order 排序並構建描述 + """Dynamic Step 1 - Generate attributes based on category list""" + # Sort by order and build description sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0)) category_desc = "\n".join([ - f"- {cat.name if hasattr(cat, 'name') else cat['name']}: {cat.description if hasattr(cat, 'description') else cat.get('description', '相關屬性')}" + f"- {cat.name if hasattr(cat, 'name') else cat['name']}: {cat.description if hasattr(cat, 'description') else cat.get('description', 'Related attributes' if lang == 'en' else '相關屬性')}" for cat in sorted_cats ]) category_keys = [cat.name if hasattr(cat, 'name') else cat['name'] for cat in sorted_cats] - json_template = {cat: ["屬性1", "屬性2", "屬性3"] for cat in category_keys} - return f"""/no_think + if lang == "en": + json_template = {cat: ["attribute1", "attribute2", "attribute3"] for cat in category_keys} + return f"""/no_think +Analyze "{query}" and list attributes for the following categories. List 3-5 common attributes for each category. + +[Category List] +{category_desc} + +Return JSON only: +{json.dumps(json_template, ensure_ascii=False, indent=2)} + +Object: {query}""" + else: + json_template = {cat: ["屬性1", "屬性2", "屬性3"] for cat in category_keys} + return f"""/no_think 分析「{query}」,列出以下類別的屬性。每個類別列出 3-5 個常見屬性。 【類別列表】 @@ -185,30 +312,59 @@ def get_step2_dynamic_causal_chain_prompt( categories: List, # List[CategoryDefinition] attributes_by_category: Dict[str, List[str]], existing_chains: List[Dict[str, str]], - chain_index: int + chain_index: int, + lang: LanguageType = "zh" ) -> str: - """動態 Step 2 - 生成動態類別的因果鏈""" + """Dynamic Step 2 - Generate causal chains for dynamic categories""" sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0)) - # 構建可選屬性 + # Build available attributes available_attrs = "\n".join([ - f"【{cat.name if hasattr(cat, 'name') else cat['name']}】{', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}" + f"[{cat.name if hasattr(cat, 'name') else cat['name']}] {', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}" for cat in sorted_cats ]) - # 已生成的因果鏈 - existing_text = "" - if existing_chains: - chains_list = [ - " → ".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats]) - for chain in existing_chains - ] - existing_text = f"\n【已生成,請勿重複】\n" + "\n".join([f"- {c}" for c in chains_list]) + if lang == "en": + # Already generated causal chains + existing_text = "" + if existing_chains: + chains_list = [ + " → ".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats]) + for chain in existing_chains + ] + existing_text = "\n[Already generated, do not repeat]\n" + "\n".join([f"- {c}" for c in chains_list]) - # JSON 模板 - json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"選擇的{cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats} + # JSON template + json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"selected {cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats} - return f"""/no_think + return f"""/no_think +Generate causal chain #{chain_index} for "{query}". + +[Available Attributes] +{available_attrs} +{existing_text} + +[Rules] +1. Select one attribute from each category +2. Causal relationships must be logical +3. Do not repeat + +Return JSON only: +{json.dumps(json_template, ensure_ascii=False, indent=2)}""" + else: + # 已生成的因果鏈 + existing_text = "" + if existing_chains: + chains_list = [ + " → ".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats]) + for chain in existing_chains + ] + existing_text = "\n【已生成,請勿重複】\n" + "\n".join([f"- {c}" for c in chains_list]) + + # JSON 模板 + json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"選擇的{cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats} + + return f"""/no_think 為「{query}」生成第 {chain_index} 條因果鏈。 【可選屬性】 @@ -230,20 +386,46 @@ def get_step2_dag_relationships_prompt( query: str, categories: List, # List[CategoryDefinition] attributes_by_category: Dict[str, List[str]], + lang: LanguageType = "zh" ) -> str: - """生成相鄰類別之間的自然關係""" + """Generate natural relationships between adjacent categories""" sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0)) # Build attribute listing attr_listing = "\n".join([ - f"【{cat.name if hasattr(cat, 'name') else cat['name']}】{', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}" + f"[{cat.name if hasattr(cat, 'name') else cat['name']}] {', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}" for cat in sorted_cats ]) # Build direction hints direction_hints = " → ".join([cat.name if hasattr(cat, 'name') else cat['name'] for cat in sorted_cats]) - return f"""/no_think + if lang == "en": + return f"""/no_think +Analyze the attribute relationships of "{query}". + +{attr_listing} + +[Relationship Direction] {direction_hints} + +[Rules] +1. Only establish relationships between adjacent categories (e.g., Materials→Functions, Functions→Usages) +2. Only output pairs that have true causal or associative relationships +3. An attribute can connect to multiple downstream attributes, or none at all +4. Not every attribute needs to have connections +5. Relationships should be reasonable and meaningful + +Return JSON: +{{ + "relationships": [ + {{"source_category": "CategoryA", "source": "attribute name", "target_category": "CategoryB", "target": "attribute name"}}, + ... + ] +}} + +Return JSON only.""" + else: + return f"""/no_think 分析「{query}」的屬性關係。 {attr_listing} diff --git a/backend/app/prompts/expert_transformation_prompt.py b/backend/app/prompts/expert_transformation_prompt.py index f40481c..9fdf67c 100644 --- a/backend/app/prompts/expert_transformation_prompt.py +++ b/backend/app/prompts/expert_transformation_prompt.py @@ -1,34 +1,68 @@ -"""Expert Transformation Agent 提示詞模組""" +"""Expert Transformation Agent prompts module - Bilingual support""" from typing import List, Optional +from .language_config import LanguageType def get_expert_generation_prompt( query: str, categories: List[str], expert_count: int, - custom_experts: Optional[List[str]] = None + custom_experts: Optional[List[str]] = None, + lang: LanguageType = "zh" ) -> str: - """Step 0: 生成專家團隊(不依賴主題,純隨機多元)""" + """Step 0: Generate expert team (not dependent on topic, purely random and diverse)""" import time import random - custom_text = "" - if custom_experts and len(custom_experts) > 0: - custom_text = f"(已指定:{', '.join(custom_experts[:expert_count])})" - - # 加入時間戳和隨機數來增加多樣性 + # Add timestamp and random number for diversity seed = int(time.time() * 1000) % 10000 - diversity_hints = [ - "冷門、非主流、跨領域", - "罕見職業、新興領域、邊緣學科", - "非傳統、創新、小眾專業", - "未來趨向、實驗性、非常規", - "跨文化、混合領域、獨特視角" - ] - hint = random.choice(diversity_hints) - return f"""/no_think + if lang == "en": + custom_text = "" + if custom_experts and len(custom_experts) > 0: + custom_text = f" (Specified: {', '.join(custom_experts[:expert_count])})" + + diversity_hints = [ + "obscure, non-mainstream, cross-disciplinary", + "rare occupations, emerging fields, fringe disciplines", + "unconventional, innovative, niche specialties", + "future-oriented, experimental, non-traditional", + "cross-cultural, hybrid fields, unique perspectives" + ] + hint = random.choice(diversity_hints) + + return f"""/no_think +Randomly assemble a team of {expert_count} experts from completely different fields{custom_text}. + +[Innovation Requirements] (Random seed: {seed}) +- Prioritize {hint} experts +- Avoid common professions (such as doctors, engineers, teachers, lawyers, etc.) +- Each expert must be from a completely unrelated field +- The rarer and more innovative, the better + +Return JSON: +{{"experts": [{{"id": "expert-0", "name": "profession", "domain": "field", "perspective": "viewpoint"}}, ...]}} + +Rules: +- id should be expert-0 to expert-{expert_count - 1} +- name is the profession name (not a person's name), 2-5 words +- domain should be specific and unique, no duplicate types""" + else: + custom_text = "" + if custom_experts and len(custom_experts) > 0: + custom_text = f"(已指定:{', '.join(custom_experts[:expert_count])})" + + diversity_hints = [ + "冷門、非主流、跨領域", + "罕見職業、新興領域、邊緣學科", + "非傳統、創新、小眾專業", + "未來趨向、實驗性、非常規", + "跨文化、混合領域、獨特視角" + ] + hint = random.choice(diversity_hints) + + return f"""/no_think 隨機組建 {expert_count} 個來自完全不同領域的專家團隊{custom_text}。 【創新要求】(隨機種子:{seed}) @@ -50,13 +84,39 @@ def get_expert_keyword_generation_prompt( category: str, attribute: str, experts: List[dict], # List[ExpertProfile] - keywords_per_expert: int = 1 + keywords_per_expert: int = 1, + lang: LanguageType = "zh" ) -> str: - """Step 1: 專家視角關鍵字生成""" - # 建立專家列表,格式更清晰 + """Step 1: Expert perspective keyword generation""" + # Build expert list in clearer format experts_list = "\n".join([f"- {exp['id']}: {exp['name']}" for exp in experts]) - return f"""/no_think + if lang == "en": + return f"""/no_think +You need to play the role of the following experts to generate innovative keywords for an attribute: + +[Expert List] +{experts_list} + +[Task] +Attribute: "{attribute}" (Category: {category}) + +For each expert, please: +1. First understand the professional background, knowledge domain, and work content of that profession +2. Think about "{attribute}" from that profession's unique perspective +3. Generate {keywords_per_expert} innovative keyword(s) related to that specialty (2-6 words) + +Keywords must reflect that expert's professional thinking style, for example: +- Accountant viewing "movement" → "cash flow", "cost-benefit" +- Architect viewing "movement" → "circulation design", "spatial flow" +- Psychologist viewing "movement" → "behavioral motivation", "emotional transition" + +Return JSON: +{{"keywords": [{{"keyword": "term", "expert_id": "expert-X", "expert_name": "name"}}, ...]}} + +Total of {len(experts) * keywords_per_expert} keywords needed, each keyword must be clearly related to the corresponding expert's professional field.""" + else: + return f"""/no_think 你需要扮演以下專家,為屬性生成創新關鍵字: 【專家名單】 @@ -86,13 +146,29 @@ def get_single_description_prompt( keyword: str, expert_id: str, expert_name: str, - expert_domain: str + expert_domain: str, + lang: LanguageType = "zh" ) -> str: - """Step 2: 為單一關鍵字生成描述""" - # 如果 domain 是通用的,就只用職業名稱 - domain_text = f"({expert_domain}領域)" if expert_domain and expert_domain != "Professional Field" else "" + """Step 2: Generate description for a single keyword""" + if lang == "en": + # If domain is generic, just use profession name + domain_text = f" ({expert_domain} field)" if expert_domain and expert_domain != "Professional Field" else "" - return f"""/no_think + return f"""/no_think +You are a {expert_name}{domain_text}. + +Task: Generate an innovative application description for "{query}". +Keyword: {keyword} + +From your professional perspective, explain how to apply the concept of "{keyword}" to "{query}". The description should be specific, creative, 15-30 words. + +Return JSON only, no other text: +{{"description": "your innovative application description"}}""" + else: + # 如果 domain 是通用的,就只用職業名稱 + domain_text = f"({expert_domain}領域)" if expert_domain and expert_domain != "Professional Field" else "" + + return f"""/no_think 你是一位{expert_name}{domain_text}。 任務:為「{query}」生成一段創新應用描述。 diff --git a/backend/app/prompts/language_config.py b/backend/app/prompts/language_config.py new file mode 100644 index 0000000..f87fcc2 --- /dev/null +++ b/backend/app/prompts/language_config.py @@ -0,0 +1,51 @@ +"""Language configuration for prompts""" + +from enum import Enum +from typing import Literal + +class Language(str, Enum): + CHINESE = "zh" + ENGLISH = "en" + +LanguageType = Literal["zh", "en"] + +# Default categories for each language +DEFAULT_CATEGORIES = { + "zh": ["材料", "功能", "用途", "使用族群", "特性"], + "en": ["Materials", "Functions", "Usages", "User Groups", "Characteristics"], +} + +CATEGORY_DESCRIPTIONS = { + "zh": { + "材料": "物件由什麼材料組成", + "功能": "物件能做什麼", + "用途": "物件在什麼場景使用", + "使用族群": "誰會使用這個物件", + "特性": "物件有什麼特徵", + }, + "en": { + "Materials": "What materials the object is made of", + "Functions": "What the object can do", + "Usages": "In what scenarios the object is used", + "User Groups": "Who uses this object", + "Characteristics": "What features the object has", + }, +} + +# Category name mappings between languages +CATEGORY_MAPPING = { + "zh_to_en": { + "材料": "Materials", + "功能": "Functions", + "用途": "Usages", + "使用族群": "User Groups", + "特性": "Characteristics", + }, + "en_to_zh": { + "Materials": "材料", + "Functions": "功能", + "Usages": "用途", + "User Groups": "使用族群", + "Characteristics": "特性", + }, +} diff --git a/backend/app/prompts/transformation_prompt.py b/backend/app/prompts/transformation_prompt.py index 415456a..310cafd 100644 --- a/backend/app/prompts/transformation_prompt.py +++ b/backend/app/prompts/transformation_prompt.py @@ -1,22 +1,43 @@ -"""Transformation Agent 提示詞模組""" +"""Transformation Agent prompts module - Bilingual support""" from typing import List +from .language_config import LanguageType def get_keyword_generation_prompt( category: str, attributes: List[str], - keyword_count: int = 3 + keyword_count: int = 3, + lang: LanguageType = "zh" ) -> str: """ - Step 1: 生成新關鍵字 + Step 1: Generate new keywords - 給定類別和現有屬性,生成全新的、有創意的關鍵字。 - 不考慮原始查詢,只專注於類別本身可能的延伸。 + Given a category and existing attributes, generate new, creative keywords. + Don't consider the original query, focus only on possible extensions of the category itself. """ - attrs_text = "、".join(attributes) + attrs_text = ", ".join(attributes) if lang == "en" else "、".join(attributes) - return f"""/no_think + if lang == "en": + return f"""/no_think +You are a creative brainstorming expert. Given a category and its existing attributes, please generate new, creative keywords or descriptive phrases. + +[Category] {category} +[Existing Attributes] {attrs_text} + +[Important Rules] +1. Generate {keyword_count} completely new keywords +2. Keywords must fit within the scope of "{category}" category +3. Keywords should be creative and not duplicate or be too similar to existing attributes +4. Don't consider any specific object, focus only on possible extensions of this category +5. Each keyword should be 2-6 words + +Return JSON only: +{{ + "keywords": ["keyword1", "keyword2", "keyword3"] +}}""" + else: + return f"""/no_think 你是一個創意發想專家。給定一個類別和該類別下的現有屬性,請生成全新的、有創意的關鍵字或描述片段。 【類別】{category} @@ -38,14 +59,36 @@ def get_keyword_generation_prompt( def get_description_generation_prompt( query: str, category: str, - keyword: str + keyword: str, + lang: LanguageType = "zh" ) -> str: """ - Step 2: 結合原始查詢生成描述 + Step 2: Combine with original query to generate description - 用新關鍵字創造一個與原始查詢相關的創新應用描述。 + Use new keyword to create an innovative application description related to the original query. """ - return f"""/no_think + if lang == "en": + return f"""/no_think +You are an innovation application expert. Please apply a new keyword concept to a specific object to create an innovative application description. + +[Object] {query} +[Category] {category} +[New Keyword] {keyword} + +[Task] +Using the concept of "{keyword}", create an innovative application description for "{query}". +The description should be a complete sentence or phrase explaining how to apply this new concept to the object. + +[Example Format] +- If the object is "bicycle" and keyword is "monitor", you could generate "bicycle monitors the rider's health status" +- If the object is "umbrella" and keyword is "generate power", you could generate "umbrella generates electricity using raindrop impacts" + +Return JSON only: +{{ + "description": "innovative application description" +}}""" + else: + return f"""/no_think 你是一個創新應用專家。請將一個新的關鍵字概念應用到特定物件上,創造出創新的應用描述。 【物件】{query} @@ -69,15 +112,35 @@ def get_description_generation_prompt( def get_batch_description_prompt( query: str, category: str, - keywords: List[str] + keywords: List[str], + lang: LanguageType = "zh" ) -> str: """ - 批次生成描述(可選的優化版本,一次處理多個關鍵字) + Batch description generation (optional optimized version, process multiple keywords at once) """ - keywords_text = "、".join(keywords) - keywords_json = ", ".join([f'"{k}"' for k in keywords]) + keywords_text = ", ".join(keywords) if lang == "en" else "、".join(keywords) - return f"""/no_think + if lang == "en": + return f"""/no_think +You are an innovation application expert. Please apply multiple new keyword concepts to a specific object, creating an innovative application description for each keyword. + +[Object] {query} +[Category] {category} +[New Keywords] {keywords_text} + +[Task] +Create an innovative application description related to "{query}" for each keyword. +Each description should be a complete sentence or phrase. + +Return JSON only: +{{ + "descriptions": [ + {{"keyword": "keyword1", "description": "description1"}}, + {{"keyword": "keyword2", "description": "description2"}} + ] +}}""" + else: + return f"""/no_think 你是一個創新應用專家。請將多個新的關鍵字概念應用到特定物件上,為每個關鍵字創造創新的應用描述。 【物件】{query} diff --git a/backend/app/routers/attributes.py b/backend/app/routers/attributes.py index a4dfa32..1f64c5b 100644 --- a/backend/app/routers/attributes.py +++ b/backend/app/routers/attributes.py @@ -58,7 +58,8 @@ async def execute_step0( prompt = get_step0_category_analysis_prompt( request.query, request.suggested_category_count, - exclude_categories=exclude_categories + exclude_categories=exclude_categories, + lang=request.lang ) temperature = request.temperature if request.temperature is not None else 0.7 response = await ollama_provider.generate( @@ -310,7 +311,7 @@ async def generate_sse_events(request: StreamAnalyzeRequest) -> AsyncGenerator[s # ========== Step 1: Generate Attributes (Dynamic) ========== yield f"event: step1_start\ndata: {json.dumps({'message': '生成屬性...'}, ensure_ascii=False)}\n\n" - step1_prompt = get_step1_dynamic_attributes_prompt(request.query, final_categories) + step1_prompt = get_step1_dynamic_attributes_prompt(request.query, final_categories, lang=request.lang) logger.info(f"Step 1 prompt: {step1_prompt[:200]}") step1_response = await ollama_provider.generate( @@ -330,6 +331,7 @@ async def generate_sse_events(request: StreamAnalyzeRequest) -> AsyncGenerator[s query=request.query, categories=final_categories, attributes_by_category=step1_result.attributes, + lang=request.lang ) logger.info(f"Step 2 (relationships) prompt: {step2_prompt[:300]}") diff --git a/backend/app/routers/deduplication.py b/backend/app/routers/deduplication.py index 134cc42..eb28de1 100644 --- a/backend/app/routers/deduplication.py +++ b/backend/app/routers/deduplication.py @@ -63,7 +63,8 @@ async def deduplicate_descriptions(request: DeduplicationRequest) -> Deduplicati # 使用 LLM 成對比較去重 result = await llm_deduplication_service.deduplicate( descriptions=request.descriptions, - model=request.model + model=request.model, + lang=request.lang ) return result except ValueError as e: diff --git a/backend/app/routers/expert_transformation.py b/backend/app/routers/expert_transformation.py index 07299b7..0bce43c 100644 --- a/backend/app/routers/expert_transformation.py +++ b/backend/app/routers/expert_transformation.py @@ -68,7 +68,8 @@ async def generate_expert_transformation_events( query=request.query, categories=all_categories, expert_count=request.expert_count, - custom_experts=actual_custom_experts if actual_custom_experts else None + custom_experts=actual_custom_experts if actual_custom_experts else None, + lang=request.lang ) logger.info(f"Expert prompt: {expert_prompt[:200]}") @@ -119,7 +120,8 @@ async def generate_expert_transformation_events( query=request.query, categories=all_categories, expert_count=request.expert_count, - custom_experts=actual_custom_experts if actual_custom_experts else None + custom_experts=actual_custom_experts if actual_custom_experts else None, + lang=request.lang ) expert_response = await ollama_provider.generate( @@ -160,7 +162,8 @@ async def generate_expert_transformation_events( category=request.category, attribute=attribute, experts=[e.model_dump() for e in experts], - keywords_per_expert=request.keywords_per_expert + keywords_per_expert=request.keywords_per_expert, + lang=request.lang ) logger.info(f"Keyword prompt for '{attribute}': {kw_prompt[:300]}") @@ -214,7 +217,8 @@ async def generate_expert_transformation_events( keyword=kw.keyword, expert_id=kw.expert_id, expert_name=kw.expert_name, - expert_domain=expert_domain + expert_domain=expert_domain, + lang=request.lang ) desc_response = await ollama_provider.generate( diff --git a/backend/app/routers/patent_search.py b/backend/app/routers/patent_search.py new file mode 100644 index 0000000..2c1aff6 --- /dev/null +++ b/backend/app/routers/patent_search.py @@ -0,0 +1,133 @@ +"""Patent Search Router - Search for similar patents""" + +import logging +from typing import Optional, List + +from fastapi import APIRouter +from pydantic import BaseModel + +from ..services.patent_search_service import patent_search_service + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/api/patent", tags=["patent"]) + + +# ===== Request/Response Models ===== + +class PatentSearchRequest(BaseModel): + """Patent search request""" + query: str # Search query (description or keywords) + max_results: int = 10 # Maximum results to return (1-20) + + +class PatentResult(BaseModel): + """Single patent result""" + publication_number: str + title: str + snippet: str + publication_date: Optional[str] = None + assignee: Optional[str] = None + inventor: Optional[str] = None + status: str # ACTIVE, NOT_ACTIVE, UNKNOWN + pdf_url: Optional[str] = None + thumbnail_url: Optional[str] = None + + +class PatentSearchResponse(BaseModel): + """Patent search response""" + query: str + total_results: int + patents: List[PatentResult] + error: Optional[str] = None + + +class BatchPatentSearchRequest(BaseModel): + """Batch patent search request - search multiple descriptions""" + queries: List[str] # List of descriptions to search + max_results_per_query: int = 5 # Max results per query + + +class BatchPatentSearchResult(BaseModel): + """Results for a single query in batch search""" + query: str + total_results: int + patents: List[PatentResult] + error: Optional[str] = None + + +class BatchPatentSearchResponse(BaseModel): + """Batch patent search response""" + results: List[BatchPatentSearchResult] + total_queries: int + + +# ===== Endpoints ===== + +@router.post("/search", response_model=PatentSearchResponse) +async def search_patents(request: PatentSearchRequest): + """ + Search for patents similar to the given description/query. + + Uses Google Patents to find related patents based on keywords. + """ + logger.info(f"Patent search request: {request.query[:100]}...") + + # Limit max_results to reasonable range + max_results = min(max(1, request.max_results), 20) + + result = await patent_search_service.search( + query=request.query, + max_results=max_results, + ) + + return PatentSearchResponse( + query=request.query, + total_results=result.get("total_results", 0), + patents=[PatentResult(**p) for p in result.get("patents", [])], + error=result.get("error"), + ) + + +@router.post("/search/batch", response_model=BatchPatentSearchResponse) +async def batch_search_patents(request: BatchPatentSearchRequest): + """ + Search for patents for multiple descriptions at once. + + Useful for checking multiple creative descriptions against patents. + """ + logger.info(f"Batch patent search: {len(request.queries)} queries") + + # Limit results per query + max_per_query = min(max(1, request.max_results_per_query), 10) + + results: List[BatchPatentSearchResult] = [] + + for query in request.queries: + result = await patent_search_service.search( + query=query, + max_results=max_per_query, + ) + + results.append(BatchPatentSearchResult( + query=query, + total_results=result.get("total_results", 0), + patents=[PatentResult(**p) for p in result.get("patents", [])], + error=result.get("error"), + )) + + return BatchPatentSearchResponse( + results=results, + total_queries=len(request.queries), + ) + + +@router.get("/health") +async def patent_search_health(): + """Check if patent search service is working""" + # Do a simple test search + result = await patent_search_service.search("test", max_results=1) + + if result.get("error"): + return {"status": "unhealthy", "error": result["error"]} + + return {"status": "healthy"} diff --git a/backend/app/routers/transformation.py b/backend/app/routers/transformation.py index 2d12ad7..f1663c8 100644 --- a/backend/app/routers/transformation.py +++ b/backend/app/routers/transformation.py @@ -36,7 +36,8 @@ async def generate_transformation_events( keyword_prompt = get_keyword_generation_prompt( category=request.category, attributes=request.attributes, - keyword_count=request.keyword_count + keyword_count=request.keyword_count, + lang=request.lang ) logger.info(f"Keyword prompt: {keyword_prompt[:200]}") @@ -61,7 +62,8 @@ async def generate_transformation_events( desc_prompt = get_batch_description_prompt( query=request.query, category=request.category, - keywords=new_keywords + keywords=new_keywords, + lang=request.lang ) logger.info(f"Description prompt: {desc_prompt[:300]}") diff --git a/backend/app/services/llm_deduplication_service.py b/backend/app/services/llm_deduplication_service.py index 17bf8d9..b50a71a 100644 --- a/backend/app/services/llm_deduplication_service.py +++ b/backend/app/services/llm_deduplication_service.py @@ -1,12 +1,12 @@ """ -LLM Deduplication Service - 使用 LLM 成對比較進行去重 +LLM Deduplication Service - Using LLM pairwise comparison for deduplication -讓 LLM 判斷兩個描述是否語意重複,透過並行處理加速。 +Let LLM determine whether two descriptions are semantically duplicate, accelerated by parallel processing. """ import asyncio import logging -from typing import List, Tuple, Optional +from typing import List, Tuple, Optional, Literal import httpx import numpy as np @@ -18,6 +18,7 @@ from ..models.schemas import ( DeduplicationMethod, DescriptionGroup, ) +from ..prompts.language_config import LanguageType logger = logging.getLogger(__name__) @@ -31,27 +32,20 @@ class LLMDeduplicationService: self.client = httpx.AsyncClient(timeout=60.0) self.max_concurrent = 5 # 最大並行數,避免 Ollama 過載 - async def compare_pair( - self, - desc1: str, - desc2: str, - model: str, - semaphore: asyncio.Semaphore - ) -> bool: - """ - 讓 LLM 判斷兩個描述是否語意重複 + def _get_comparison_prompt(self, desc1: str, desc2: str, lang: LanguageType = "zh") -> str: + """Get comparison prompt in the specified language""" + if lang == "en": + return f"""Determine whether the following two innovative descriptions express the same or very similar concepts: - Args: - desc1: 第一個描述 - desc2: 第二個描述 - model: LLM 模型名稱 - semaphore: 並行控制信號量 +Description 1: {desc1} - Returns: - bool: 是否為重複描述 - """ - async with semaphore: # 控制並行數 - prompt = f"""判斷以下兩個創新描述是否表達相同或非常相似的概念: +Description 2: {desc2} + +If both descriptions essentially express the same or very similar innovative concept, answer "YES" +If the two descriptions express different innovative concepts, answer "NO" +Only answer YES or NO, no other text""" + else: + return f"""判斷以下兩個創新描述是否表達相同或非常相似的概念: 描述1: {desc1} @@ -61,6 +55,30 @@ class LLMDeduplicationService: 如果兩者描述不同的創新概念,回答 "NO" 只回答 YES 或 NO,不要其他文字""" + async def compare_pair( + self, + desc1: str, + desc2: str, + model: str, + semaphore: asyncio.Semaphore, + lang: LanguageType = "zh" + ) -> bool: + """ + Let LLM determine whether two descriptions are semantically duplicate + + Args: + desc1: First description + desc2: Second description + model: LLM model name + semaphore: Concurrency control semaphore + lang: Language for the prompt + + Returns: + bool: Whether the descriptions are duplicates + """ + async with semaphore: # Control concurrency + prompt = self._get_comparison_prompt(desc1, desc2, lang) + try: response = await self.client.post( f"{self.base_url}/api/generate", @@ -86,26 +104,28 @@ class LLMDeduplicationService: async def compare_batch( self, pairs: List[Tuple[int, int, str, str]], - model: str + model: str, + lang: LanguageType = "zh" ) -> List[Tuple[int, int, bool]]: """ - 並行批次比較多個描述對 + Parallel batch comparison of multiple description pairs Args: - pairs: 待比較的配對列表 [(i, j, desc1, desc2), ...] - model: LLM 模型名稱 + pairs: List of pairs to compare [(i, j, desc1, desc2), ...] + model: LLM model name + lang: Language for the prompt Returns: - 比較結果列表 [(i, j, is_similar), ...] + List of comparison results [(i, j, is_similar), ...] """ semaphore = asyncio.Semaphore(self.max_concurrent) async def compare_one(pair: Tuple[int, int, str, str]) -> Tuple[int, int, bool]: i, j, desc1, desc2 = pair - is_similar = await self.compare_pair(desc1, desc2, model, semaphore) + is_similar = await self.compare_pair(desc1, desc2, model, semaphore, lang) return (i, j, is_similar) - # 使用 asyncio.gather 並行執行所有比較 + # Use asyncio.gather to execute all comparisons in parallel results = await asyncio.gather(*[compare_one(p) for p in pairs]) return results @@ -144,17 +164,19 @@ class LLMDeduplicationService: async def deduplicate( self, descriptions: List[ExpertTransformationDescription], - model: Optional[str] = None + model: Optional[str] = None, + lang: LanguageType = "zh" ) -> DeduplicationResult: """ - 使用 LLM 成對比較進行去重 + Use LLM pairwise comparison for deduplication Args: - descriptions: 要去重的描述列表 - model: LLM 模型名稱 + descriptions: List of descriptions to deduplicate + model: LLM model name + lang: Language for the prompt Returns: - DeduplicationResult: 去重結果 + DeduplicationResult: Deduplication result """ model = model or self.default_model @@ -188,10 +210,10 @@ class LLMDeduplicationService: )) total_pairs = len(pairs) - logger.info(f"LLM deduplication: {total_pairs} pairs to compare (parallel={self.max_concurrent}, model={model})") + logger.info(f"LLM deduplication: {total_pairs} pairs to compare (parallel={self.max_concurrent}, model={model}, lang={lang})") - # 並行批次比較 - results = await self.compare_batch(pairs, model) + # Parallel batch comparison + results = await self.compare_batch(pairs, model, lang) # 填入相似度矩陣 for i, j, is_similar in results: diff --git a/backend/app/services/patent_search_service.py b/backend/app/services/patent_search_service.py new file mode 100644 index 0000000..645201d --- /dev/null +++ b/backend/app/services/patent_search_service.py @@ -0,0 +1,195 @@ +"""Patent Search Service using Google Patents XHR API""" + +import httpx +import logging +from typing import List, Optional +from urllib.parse import quote_plus + +logger = logging.getLogger(__name__) + + +class PatentSearchResult: + """Single patent search result""" + def __init__( + self, + publication_number: str, + title: str, + snippet: str, + publication_date: Optional[str], + assignee: Optional[str], + inventor: Optional[str], + status: str, + pdf_url: Optional[str] = None, + thumbnail_url: Optional[str] = None, + ): + self.publication_number = publication_number + self.title = title + self.snippet = snippet + self.publication_date = publication_date + self.assignee = assignee + self.inventor = inventor + self.status = status + self.pdf_url = pdf_url + self.thumbnail_url = thumbnail_url + + def to_dict(self): + return { + "publication_number": self.publication_number, + "title": self.title, + "snippet": self.snippet, + "publication_date": self.publication_date, + "assignee": self.assignee, + "inventor": self.inventor, + "status": self.status, + "pdf_url": self.pdf_url, + "thumbnail_url": self.thumbnail_url, + } + + +class PatentSearchService: + """Service for searching patents using Google Patents""" + + GOOGLE_PATENTS_XHR_URL = "https://patents.google.com/xhr/query" + GOOGLE_PATENTS_PDF_BASE = "https://patentimages.storage.googleapis.com/" + + def __init__(self): + self._client: Optional[httpx.AsyncClient] = None + + # Browser-like headers to avoid being blocked + DEFAULT_HEADERS = { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Accept": "application/json, text/plain, */*", + "Accept-Language": "en-US,en;q=0.9", + "Referer": "https://patents.google.com/", + "Origin": "https://patents.google.com", + } + + async def _get_client(self) -> httpx.AsyncClient: + if self._client is None or self._client.is_closed: + self._client = httpx.AsyncClient( + timeout=30.0, + headers=self.DEFAULT_HEADERS, + follow_redirects=True, + ) + return self._client + + async def close(self): + if self._client and not self._client.is_closed: + await self._client.aclose() + + async def search( + self, + query: str, + max_results: int = 10, + ) -> dict: + """ + Search Google Patents for relevant patents + + Args: + query: Search query (can be a description or keywords) + max_results: Maximum number of results to return + + Returns: + Dict with total_results count and list of patent results + """ + try: + client = await self._get_client() + + # URL encode the query + encoded_query = quote_plus(query) + url = f"{self.GOOGLE_PATENTS_XHR_URL}?url=q%3D{encoded_query}&exp=&tags=" + + logger.info(f"Searching patents with query: {query[:100]}...") + + response = await client.get(url) + + if response.status_code != 200: + logger.error(f"Google Patents API returned status {response.status_code}") + return { + "total_results": 0, + "patents": [], + "error": f"API returned status {response.status_code}" + } + + data = response.json() + + # Parse results + results = data.get("results", {}) + total_num = results.get("total_num_results", 0) + clusters = results.get("cluster", []) + + patents: List[PatentSearchResult] = [] + + if clusters and len(clusters) > 0: + patent_results = clusters[0].get("result", []) + + for item in patent_results[:max_results]: + patent_data = item.get("patent", {}) + family_meta = patent_data.get("family_metadata", {}) + aggregated = family_meta.get("aggregated", {}) + country_status = aggregated.get("country_status", []) + + status = "UNKNOWN" + if country_status and len(country_status) > 0: + best_stage = country_status[0].get("best_patent_stage", {}) + status = best_stage.get("state", "UNKNOWN") + + # Build PDF URL if available + pdf_path = patent_data.get("pdf", "") + pdf_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{pdf_path}" if pdf_path else None + + # Build thumbnail URL + thumbnail = patent_data.get("thumbnail", "") + thumbnail_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{thumbnail}" if thumbnail else None + + patent = PatentSearchResult( + publication_number=patent_data.get("publication_number", ""), + title=self._clean_html(patent_data.get("title", "")), + snippet=self._clean_html(patent_data.get("snippet", "")), + publication_date=patent_data.get("publication_date"), + assignee=patent_data.get("assignee"), + inventor=patent_data.get("inventor"), + status=status, + pdf_url=pdf_url, + thumbnail_url=thumbnail_url, + ) + patents.append(patent) + + logger.info(f"Found {total_num} total patents, returning {len(patents)}") + + return { + "total_results": total_num, + "patents": [p.to_dict() for p in patents], + } + + except httpx.HTTPError as e: + logger.error(f"HTTP error searching patents: {e}") + return { + "total_results": 0, + "patents": [], + "error": str(e) + } + except Exception as e: + logger.error(f"Error searching patents: {e}") + return { + "total_results": 0, + "patents": [], + "error": str(e) + } + + def _clean_html(self, text: str) -> str: + """Remove HTML entities and tags from text""" + if not text: + return "" + # Replace common HTML entities + text = text.replace("…", "...") + text = text.replace("&", "&") + text = text.replace("<", "<") + text = text.replace(">", ">") + text = text.replace(""", '"') + text = text.replace("'", "'") + return text.strip() + + +# Singleton instance +patent_search_service = PatentSearchService() diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 379c940..cddaadd 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -1,17 +1,24 @@ -import { useState, useRef, useCallback, useEffect } from 'react'; -import { ConfigProvider, Layout, theme, Typography, Space, Tabs, Slider, Radio } from 'antd'; -import { ApartmentOutlined, ThunderboltOutlined, FilterOutlined } from '@ant-design/icons'; +import { useState, useRef, useCallback, useEffect, useMemo } from 'react'; +import { ConfigProvider, Layout, theme, Typography, Space, Tabs, Slider, Radio, Switch, Segmented } from 'antd'; +import { ApartmentOutlined, ThunderboltOutlined, FilterOutlined, SwapOutlined, FileSearchOutlined, GlobalOutlined } from '@ant-design/icons'; import { ThemeToggle } from './components/ThemeToggle'; import { InputPanel } from './components/InputPanel'; import { TransformationInputPanel } from './components/TransformationInputPanel'; import { MindmapPanel } from './components/MindmapPanel'; import { TransformationPanel } from './components/TransformationPanel'; import { DeduplicationPanel } from './components/DeduplicationPanel'; +import { PatentSearchPanel } from './components/PatentSearchPanel'; +import { DualPathInputPanel } from './components/DualPathInputPanel'; +import { DualPathMindmapPanel } from './components/DualPathMindmapPanel'; +import { CrossoverPanel } from './components/CrossoverPanel'; import { useAttribute } from './hooks/useAttribute'; +import { useDualPathAttribute } from './hooks/useDualPathAttribute'; import { getModels } from './services/api'; +import { crossoverPairsToDAGs, type CrossoverDAGResult } from './utils/crossoverToDAG'; +import { DualTransformationPanel } from './components/DualTransformationPanel'; import type { MindmapDAGRef } from './components/MindmapDAG'; import type { TransformationDAGRef } from './components/TransformationDAG'; -import type { CategoryMode, ExpertSource, ExpertTransformationDAGResult, DeduplicationMethod } from './types'; +import type { CategoryMode, ExpertSource, ExpertTransformationDAGResult, DeduplicationMethod, ExpertMode, CrossoverPair, PromptLanguage } from './types'; const { Header, Sider, Content } = Layout; const { Title } = Typography; @@ -24,7 +31,15 @@ interface VisualSettings { function App() { const [isDark, setIsDark] = useState(true); const [activeTab, setActiveTab] = useState('attribute'); + const [dualPathMode, setDualPathMode] = useState(false); + const [promptLanguage, setPromptLanguage] = useState('zh'); + + // Single path hook const { loading, progress, error, currentResult, history, analyze, loadFromHistory } = useAttribute(); + + // Dual path hook + const dualPath = useDualPathAttribute(); + const [visualSettings, setVisualSettings] = useState({ nodeSpacing: 32, fontSize: 14, @@ -32,6 +47,21 @@ function App() { const mindmapRef = useRef(null); const transformationRef = useRef(null); + // Dual path expert mode + const [expertMode, setExpertMode] = useState('shared'); + const [selectedCrossoverPairs, setSelectedCrossoverPairs] = useState([]); + + // Convert selected crossover pairs to two separate DAGs for dual transformation + const crossoverDAGs = useMemo((): CrossoverDAGResult | null => { + if (selectedCrossoverPairs.length === 0) return null; + if (!dualPath.pathA.result || !dualPath.pathB.result) return null; + return crossoverPairsToDAGs( + selectedCrossoverPairs, + dualPath.pathA.result, + dualPath.pathB.result + ); + }, [selectedCrossoverPairs, dualPath.pathA.result, dualPath.pathB.result]); + // Transformation Agent settings const [transformModel, setTransformModel] = useState(''); const [transformTemperature, setTransformTemperature] = useState(0.95); @@ -83,9 +113,10 @@ function App() { chainCount?: number, categoryMode?: CategoryMode, customCategories?: string[], - suggestedCategoryCount?: number + suggestedCategoryCount?: number, + lang?: PromptLanguage ) => { - await analyze(query, model, temperature, chainCount, categoryMode, customCategories, suggestedCategoryCount); + await analyze(query, model, temperature, chainCount, categoryMode, customCategories, suggestedCategoryCount, lang || promptLanguage); }; const handleResetView = useCallback(() => { @@ -96,6 +127,30 @@ function App() { setShouldStartTransform(true); }, []); + // Dual path analysis handler + const handleDualPathAnalyze = useCallback(async ( + queryA: string, + queryB: string, + options?: { + model?: string; + temperature?: number; + chainCount?: number; + categoryMode?: CategoryMode; + customCategories?: string[]; + suggestedCategoryCount?: number; + lang?: PromptLanguage; + } + ) => { + await dualPath.analyzeParallel(queryA, queryB, { ...options, lang: options?.lang || promptLanguage }); + }, [dualPath, promptLanguage]); + + // Handle mode switch + const handleModeSwitch = useCallback((checked: boolean) => { + setDualPathMode(checked); + // Reset to attribute tab when switching modes + setActiveTab('attribute'); + }, []); + return ( - + + + Single + } + unCheckedChildren={} + /> + Dual + + + + setPromptLanguage(value as PromptLanguage)} + options={[ + { label: '中文', value: 'zh' }, + { label: 'EN', value: 'en' }, + ]} + /> + + + + + Dual Path Attribute + + ), + children: ( +
+ +
+ ), + }, + { + key: 'crossover', + label: ( + + + Crossover + + ), + children: ( +
+ +
+ ), + }, + { + key: 'transformation', + label: ( + + + Transformation Agent + {crossoverDAGs && ( + + (A:{crossoverDAGs.pathA.nodes.length} / B:{crossoverDAGs.pathB.nodes.length}) + + )} + + ), + children: ( +
+ setShouldStartTransform(false)} + onLoadingChange={setTransformLoading} + /> +
+ ), + }, + { + key: 'patent', + label: ( + + + Patent Search + + ), + children: ( +
+ +
+ ), + }, + ] : [ + // ===== Single Path Mode Tabs ===== { key: 'attribute', label: ( @@ -196,6 +366,7 @@ function App() { expertConfig={expertConfig} expertSource={expertSource} expertLanguage={expertLanguage} + lang={promptLanguage} shouldStartTransform={shouldStartTransform} onTransformComplete={() => setShouldStartTransform(false)} onLoadingChange={setTransformLoading} @@ -221,6 +392,24 @@ function App() { onThresholdChange={setDeduplicationThreshold} method={deduplicationMethod} onMethodChange={setDeduplicationMethod} + lang={promptLanguage} + /> + + ), + }, + { + key: 'patent', + label: ( + + + Patent Search + + ), + children: ( +
+ r.descriptions)} + isDark={isDark} />
), @@ -236,24 +425,54 @@ function App() { overflow: 'auto', }} > - {activeTab === 'attribute' && ( + {activeTab === 'attribute' && !dualPathMode && ( loadFromHistory(item, lang || promptLanguage)} onResetView={handleResetView} visualSettings={visualSettings} onVisualSettingsChange={setVisualSettings} + lang={promptLanguage} /> )} + {activeTab === 'attribute' && dualPathMode && ( + + )} + {activeTab === 'crossover' && dualPathMode && ( +
+ + + Crossover Settings + + + Select attribute pairs in the main panel to create crossover combinations. + {selectedCrossoverPairs.length > 0 && ( +
+ + {selectedCrossoverPairs.length} pairs selected + +
+ )} +
+
+ )} {activeTab === 'transformation' && ( void; + onCrossoverReady?: (selectedPairs: CrossoverPair[]) => void; +} + +type ViewMode = 'list' | 'matrix'; + +export function CrossoverPanel({ + pathAResult, + pathBResult, + isDark, + expertMode, + onExpertModeChange, + onCrossoverReady, +}: CrossoverPanelProps) { + const [viewMode, setViewMode] = useState('list'); + + const { + pairs, + selectedPairs, + pairsByType, + crossTypeStats, + applyPairs, + togglePairSelection, + selectPairsByType, + selectAll, + clearPairs, + } = useAttributeCrossover(); + + // Generate pairs when both results are available + useEffect(() => { + if (pathAResult && pathBResult) { + applyPairs(pathAResult, pathBResult); + } else { + clearPairs(); + } + }, [pathAResult, pathBResult, applyPairs, clearPairs]); + + // Notify parent when selection changes + useEffect(() => { + onCrossoverReady?.(selectedPairs); + }, [selectedPairs, onCrossoverReady]); + + // Render when no data + if (!pathAResult || !pathBResult) { + return ( +
+ + Complete both Path A and Path B analysis first + + {!pathAResult && !pathBResult + ? 'Neither path has been analyzed' + : !pathAResult + ? 'Path A has not been analyzed' + : 'Path B has not been analyzed'} + + + } + /> +
+ ); + } + + // Generate cross type labels dynamically + const getCrossTypeLabel = (crossType: string): string => { + if (crossType.startsWith('same-')) { + const category = crossType.replace('same-', ''); + return `Same Category: ${category}`; + } + if (crossType.startsWith('cross-')) { + const parts = crossType.replace('cross-', '').split('-'); + if (parts.length >= 2) { + return `Cross: ${parts[0]} × ${parts.slice(1).join('-')}`; + } + } + return crossType; + }; + + const renderListView = () => { + const crossTypes = Object.keys(pairsByType); + + if (crossTypes.length === 0) { + return ; + } + + const collapseItems = crossTypes.map(type => { + const typePairs = pairsByType[type]; + const stats = crossTypeStats[type]; + const label = getCrossTypeLabel(type); + + return { + key: type, + label: ( +
+ 0 && stats.selected < stats.total} + onClick={(e) => e.stopPropagation()} + onChange={(e) => selectPairsByType(type, e.target.checked)} + /> + {label} + 0 ? '#52c41a' : '#d9d9d9', + }} + /> +
+ ), + children: ( +
+ {typePairs.map(pair => ( + + ))} +
+ ), + }; + }); + + return ( + t.startsWith('same-'))} + /> + ); + }; + + const renderMatrixView = () => { + return ( + + ); + }; + + return ( +
+ {/* Statistics Header */} + + + + } + /> + + + } + valueStyle={{ color: '#52c41a' }} + /> + + + + + + + + + + + {/* Selection Preview */} + + + {/* Expert Mode Selection */} + + + Expert Team Mode + onExpertModeChange(e.target.value)} + buttonStyle="solid" + > + + Shared Experts + + + Independent Experts + + + + {expertMode === 'shared' + ? 'Both paths use the same expert team for crossover transformation' + : 'Each path uses its own expert team, combined for crossover'} + + + + + {/* Actions */} +
+ + + +
+ setViewMode(e.target.value)} + buttonStyle="solid" + size="small" + > + + List + + + Matrix + + +
+ + {/* Content */} +
+ {viewMode === 'list' ? renderListView() : renderMatrixView()} +
+
+ ); +} diff --git a/frontend/src/components/DeduplicationPanel.tsx b/frontend/src/components/DeduplicationPanel.tsx index 3397a54..687ebcd 100644 --- a/frontend/src/components/DeduplicationPanel.tsx +++ b/frontend/src/components/DeduplicationPanel.tsx @@ -26,6 +26,7 @@ import type { ExpertTransformationDAGResult, ExpertTransformationDescription, DeduplicationMethod, + PromptLanguage, } from '../types'; const { Title, Text } = Typography; @@ -37,6 +38,7 @@ interface DeduplicationPanelProps { onThresholdChange: (value: number) => void; method: DeduplicationMethod; onMethodChange?: (method: DeduplicationMethod) => void; // Optional, handled in App.tsx sidebar + lang?: PromptLanguage; } /** @@ -48,6 +50,7 @@ export const DeduplicationPanel: React.FC = ({ threshold, onThresholdChange, method, + lang = 'zh', // onMethodChange is handled in App.tsx sidebar }) => { const { loading, result, error, progress, deduplicate, clearResult } = useDeduplication(); @@ -70,7 +73,7 @@ export const DeduplicationPanel: React.FC = ({ const handleDeduplicate = () => { if (allDescriptions.length > 0) { - deduplicate(allDescriptions, threshold, method); + deduplicate(allDescriptions, threshold, method, lang); } }; diff --git a/frontend/src/components/DualPathInputPanel.tsx b/frontend/src/components/DualPathInputPanel.tsx new file mode 100644 index 0000000..c20b0b9 --- /dev/null +++ b/frontend/src/components/DualPathInputPanel.tsx @@ -0,0 +1,312 @@ +import { useState, useEffect } from 'react'; +import { + Input, + Button, + Select, + Typography, + Space, + message, + Slider, + Collapse, + Progress, + Card, + Alert, + Tag, + Divider, +} from 'antd'; +import { + SearchOutlined, + LoadingOutlined, + SwapOutlined, +} from '@ant-design/icons'; +import type { CategoryMode, DAGProgress, PromptLanguage } from '../types'; +import { getModels } from '../services/api'; +import { CategorySelector } from './CategorySelector'; + +const { TextArea } = Input; +const { Text } = Typography; + +interface DualPathInputPanelProps { + onAnalyze: (queryA: string, queryB: string, options?: { + model?: string; + temperature?: number; + chainCount?: number; + categoryMode?: CategoryMode; + customCategories?: string[]; + suggestedCategoryCount?: number; + lang?: PromptLanguage; + }) => Promise; + loadingA: boolean; + loadingB: boolean; + progressA: DAGProgress; + progressB: DAGProgress; + availableModels?: string[]; + lang?: PromptLanguage; +} + +export function DualPathInputPanel({ + onAnalyze, + loadingA, + loadingB, + progressA, + progressB, + availableModels: propModels, + lang = 'zh', +}: DualPathInputPanelProps) { + const [queryA, setQueryA] = useState(''); + const [queryB, setQueryB] = useState(''); + const [models, setModels] = useState(propModels || []); + const [selectedModel, setSelectedModel] = useState(); + const [loadingModels, setLoadingModels] = useState(false); + const [temperature, setTemperature] = useState(0.7); + const [chainCount, setChainCount] = useState(5); + // Category settings + const [categoryMode, setCategoryMode] = useState('dynamic_auto' as CategoryMode); + const [customCategories, setCustomCategories] = useState([]); + const [suggestedCategoryCount, setSuggestedCategoryCount] = useState(3); + + const isLoading = loadingA || loadingB; + + useEffect(() => { + if (propModels && propModels.length > 0) { + setModels(propModels); + if (!selectedModel) { + const defaultModel = propModels.find((m) => m.includes('qwen3')) || propModels[0]; + setSelectedModel(defaultModel); + } + return; + } + + async function fetchModels() { + setLoadingModels(true); + try { + const response = await getModels(); + setModels(response.models); + if (response.models.length > 0 && !selectedModel) { + const defaultModel = response.models.find((m) => m.includes('qwen3')) || response.models[0]; + setSelectedModel(defaultModel); + } + } catch { + message.error('Failed to fetch models'); + } finally { + setLoadingModels(false); + } + } + + fetchModels(); + }, [propModels]); + + const handleAnalyze = async () => { + if (!queryA.trim() || !queryB.trim()) { + message.warning(lang === 'zh' ? '請輸入兩個路徑的查詢內容' : 'Please enter queries for both paths'); + return; + } + + try { + await onAnalyze(queryA.trim(), queryB.trim(), { + model: selectedModel, + temperature, + chainCount, + categoryMode, + customCategories: customCategories.length > 0 ? customCategories : undefined, + suggestedCategoryCount, + lang, + }); + } catch { + message.error(lang === 'zh' ? '分析失敗' : 'Analysis failed'); + } + }; + + const handleSwapQueries = () => { + const temp = queryA; + setQueryA(queryB); + setQueryB(temp); + }; + + const renderProgressIndicator = (label: string, progress: DAGProgress, loading: boolean) => { + if (progress.step === 'idle' && !loading) return null; + if (progress.step === 'done') return null; + + const percent = progress.step === 'step0' + ? 15 + : progress.step === 'step1' + ? 50 + : progress.step === 'relationships' + ? 85 + : 100; + + return ( +
+ {label}: {progress.message} + +
+ ); + }; + + const collapseItems = [ + { + key: 'categories', + label: 'Category Settings', + children: ( + + ), + }, + { + key: 'llm', + label: 'LLM Parameters', + children: ( + +
+ Temperature: {temperature} + +
+
+ Chain Count: {chainCount} + +
+
+ ), + }, + ]; + + return ( +
+ {/* Dual Path Input Card */} + Dual Path Analysis} + styles={{ body: { padding: 12 } }} + > + + {/* Model Selection */} +