chore: save local changes

2026-01-05 22:32:08 +08:00
parent bc281b8e0a
commit ec48709755
42 changed files with 5576 additions and 254 deletions
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -3,10 +3,11 @@ from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware

-from .routers import attributes, transformation, expert_transformation, deduplication
+from .routers import attributes, transformation, expert_transformation, deduplication, patent_search
 from .services.llm_service import ollama_provider
 from .services.embedding_service import embedding_service
 from .services.llm_deduplication_service import llm_deduplication_service
+from .services.patent_search_service import patent_search_service


@asynccontextmanager
@@ -15,6 +16,7 @@ async def lifespan(app: FastAPI):
    await ollama_provider.close()
    await embedding_service.close()
    await llm_deduplication_service.close()
+    await patent_search_service.close()


 app = FastAPI(
@@ -36,6 +38,7 @@ app.include_router(attributes.router)
 app.include_router(transformation.router)
 app.include_router(expert_transformation.router)
 app.include_router(deduplication.router)
+app.include_router(patent_search.router)


@app.get("/")
--- a/backend/app/models/schemas.py
+++ b/backend/app/models/schemas.py
@@ -1,7 +1,10 @@
 from pydantic import BaseModel
-from typing import Optional, List, Dict
+from typing import Optional, List, Dict, Literal
 from enum import Enum

+# Language type for prompts
+LanguageType = Literal["zh", "en"]
+

 class AttributeNode(BaseModel):
    name: str
@@ -47,16 +50,19 @@ class CausalChain(BaseModel):


 class StreamAnalyzeRequest(BaseModel):
-    """多步驟分析請求（更新為支持動態類別）"""
+    """Multi-step analysis request (updated to support dynamic categories)"""
    query: str
    model: Optional[str] = None
    temperature: Optional[float] = 0.7
-    chain_count: int = 5  # 用戶可設定要生成多少條因果鏈
+    chain_count: int = 5  # User can set how many causal chains to generate

-    # 新增：動態類別支持
-    category_mode: Optional[str] = "dynamic_auto"  # CategoryMode enum 值
+    # Dynamic category support
+    category_mode: Optional[str] = "dynamic_auto"  # CategoryMode enum value
    custom_categories: Optional[List[str]] = None
-    suggested_category_count: int = 3  # 建議 LLM 生成的類別數量
+    suggested_category_count: int = 3  # Suggest LLM to generate this many categories
+
+    # Language setting
+    lang: LanguageType = "zh"


 class StreamAnalyzeResponse(BaseModel):
@@ -136,13 +142,14 @@ class DAGRelationship(BaseModel):
 # ===== Transformation Agent schemas =====

 class TransformationRequest(BaseModel):
-    """Transformation Agent 請求"""
-    query: str                        # 原始查詢 (e.g., "腳踏車")
-    category: str                     # 類別名稱 (e.g., "功能")
-    attributes: List[str]             # 該類別的屬性列表
+    """Transformation Agent request"""
+    query: str                        # Original query (e.g., "bicycle")
+    category: str                     # Category name (e.g., "Functions")
+    attributes: List[str]             # Attribute list for this category
    model: Optional[str] = None
    temperature: Optional[float] = 0.7
-    keyword_count: int = 3            # 要生成的新關鍵字數量
+    keyword_count: int = 3            # Number of new keywords to generate
+    lang: LanguageType = "zh"         # Language for prompts


 class TransformationDescription(BaseModel):
@@ -215,24 +222,27 @@ class ExpertSource(str, Enum):


 class ExpertTransformationRequest(BaseModel):
-    """Expert Transformation Agent 請求"""
+    """Expert Transformation Agent request"""
    query: str
    category: str
    attributes: List[str]

    # Expert parameters
-    expert_count: int = 3                        # 專家數量 (2-8)
-    keywords_per_expert: int = 1                 # 每個專家為每個屬性生成幾個關鍵字 (1-3)
-    custom_experts: Optional[List[str]] = None   # 用戶指定專家 ["藥師", "工程師"]
+    expert_count: int = 3                        # Number of experts (2-8)
+    keywords_per_expert: int = 1                 # Keywords per expert per attribute (1-3)
+    custom_experts: Optional[List[str]] = None   # User-specified experts

    # Expert source parameters
-    expert_source: ExpertSource = ExpertSource.LLM  # 專家來源
-    expert_language: str = "en"                     # 外部來源的語言 (目前只有英文資料)
+    expert_source: ExpertSource = ExpertSource.LLM  # Expert source
+    expert_language: str = "en"                     # Language for external sources

    # LLM parameters
    model: Optional[str] = None
    temperature: Optional[float] = 0.7

+    # Prompt language
+    lang: LanguageType = "zh"
+

 # ===== Deduplication Agent schemas =====

@@ -243,11 +253,12 @@ class DeduplicationMethod(str, Enum):


 class DeduplicationRequest(BaseModel):
-    """去重請求"""
+    """Deduplication request"""
    descriptions: List[ExpertTransformationDescription]
-    method: DeduplicationMethod = DeduplicationMethod.EMBEDDING  # 去重方法
-    similarity_threshold: float = 0.85  # 餘弦相似度閾值 (0.0-1.0)，僅 Embedding 使用
-    model: Optional[str] = None  # Embedding/LLM 模型
+    method: DeduplicationMethod = DeduplicationMethod.EMBEDDING  # Deduplication method
+    similarity_threshold: float = 0.85  # Cosine similarity threshold (0.0-1.0), only for Embedding
+    model: Optional[str] = None  # Embedding/LLM model
+    lang: LanguageType = "zh"  # Prompt language (for LLM method)


 class DescriptionGroup(BaseModel):
--- a/backend/app/prompts/attribute_prompt.py
+++ b/backend/app/prompts/attribute_prompt.py
@@ -1,21 +1,37 @@
 from typing import List, Optional, Dict
 import json
-
-DEFAULT_CATEGORIES = ["材料", "功能", "用途", "使用族群", "特性"]
-
-CATEGORY_DESCRIPTIONS = {
-    "材料": "物件由什麼材料組成",
-    "功能": "物件能做什麼",
-    "用途": "物件在什麼場景使用",
-    "使用族群": "誰會使用這個物件",
-    "特性": "物件有什麼特徵",
-}
+from .language_config import (
+    LanguageType,
+    DEFAULT_CATEGORIES,
+    CATEGORY_DESCRIPTIONS,
+)


-def get_attribute_prompt(query: str, categories: Optional[List[str]] = None) -> str:
+def get_default_categories(lang: LanguageType = "zh") -> List[str]:
+    return DEFAULT_CATEGORIES.get(lang, DEFAULT_CATEGORIES["zh"])
+
+
+def get_category_descriptions(lang: LanguageType = "zh") -> Dict[str, str]:
+    return CATEGORY_DESCRIPTIONS.get(lang, CATEGORY_DESCRIPTIONS["zh"])
+
+
+def get_attribute_prompt(
+    query: str,
+    categories: Optional[List[str]] = None,
+    lang: LanguageType = "zh"
+) -> str:
    """Generate prompt with causal chain structure."""
+    if lang == "en":
+        prompt = f"""Analyze the attributes of "{query}" in a causal chain format: Materials→Functions→Usages→User Groups.

-    prompt = f"""分析「{query}」的屬性，以因果鏈方式呈現：材料→功能→用途→使用族群。
+List 3-5 types of materials, each extending into a complete causal chain.
+
+JSON format:
+{{"name": "{query}", "children": [{{"name": "Material Name", "category": "Materials", "children": [{{"name": "Function Name", "category": "Functions", "children": [{{"name": "Usage Name", "category": "Usages", "children": [{{"name": "User Group Name", "category": "User Groups"}}]}}]}}]}}]}}
+
+Return JSON only."""
+    else:
+        prompt = f"""分析「{query}」的屬性，以因果鏈方式呈現：材料→功能→用途→使用族群。

 請列出 3-5 種材料，每種材料延伸出完整因果鏈。

@@ -27,9 +43,18 @@ JSON 格式：
    return prompt


-def get_step1_attributes_prompt(query: str) -> str:
-    """Step 1: 生成各類別的屬性列表（平行結構）"""
-    return f"""/no_think
+def get_step1_attributes_prompt(query: str, lang: LanguageType = "zh") -> str:
+    """Step 1: Generate attribute list for each category (parallel structure)"""
+    if lang == "en":
+        return f"""/no_think
+Analyze "{query}" and list attributes for the following four categories. List 3-5 common attributes for each category.
+
+Return JSON only, in the following format:
+{{"materials": ["material1", "material2", "material3"], "functions": ["function1", "function2", "function3"], "usages": ["usage1", "usage2", "usage3"], "users": ["user group1", "user group2", "user group3"]}}
+
+Object: {query}"""
+    else:
+        return f"""/no_think
 分析「{query}」，列出以下四個類別的屬性。每個類別列出 3-5 個常見屬性。

 只回傳 JSON，格式如下：
@@ -45,21 +70,48 @@ def get_step2_causal_chain_prompt(
    usages: List[str],
    users: List[str],
    existing_chains: List[dict],
-    chain_index: int
+    chain_index: int,
+    lang: LanguageType = "zh"
 ) -> str:
-    """Step 2: 生成單條因果鏈"""
+    """Step 2: Generate a single causal chain"""
    existing_chains_text = ""
-    if existing_chains:
-        chains_list = [
-            f"- {c['material']} → {c['function']} → {c['usage']} → {c['user']}"
-            for c in existing_chains
-        ]
-        existing_chains_text = f"""
+
+    if lang == "en":
+        if existing_chains:
+            chains_list = [
+                f"- {c['material']} → {c['function']} → {c['usage']} → {c['user']}"
+                for c in existing_chains
+            ]
+            existing_chains_text = f"""
+[Already generated causal chains, do not repeat]
+{chr(10).join(chains_list)}
+"""
+        return f"""/no_think
+Generate causal chain #{chain_index} for "{query}".
+
+[Available Materials] {', '.join(materials)}
+[Available Functions] {', '.join(functions)}
+[Available Usages] {', '.join(usages)}
+[Available User Groups] {', '.join(users)}
+{existing_chains_text}
+[Rules]
+1. Select one attribute from each category to form a logical causal chain
+2. The causal relationship must be logical (materials determine functions, functions determine usages, usages determine user groups)
+3. Do not repeat existing causal chains
+
+Return JSON only:
+{{"material": "selected material", "function": "selected function", "usage": "selected usage", "user": "selected user group"}}"""
+    else:
+        if existing_chains:
+            chains_list = [
+                f"- {c['material']} → {c['function']} → {c['usage']} → {c['user']}"
+                for c in existing_chains
+            ]
+            existing_chains_text = f"""
 【已生成的因果鏈，請勿重複】
 {chr(10).join(chains_list)}
 """
-
-    return f"""/no_think
+        return f"""/no_think
 為「{query}」生成第 {chain_index} 條因果鏈。

 【可選材料】{', '.join(materials)}
@@ -76,19 +128,52 @@ def get_step2_causal_chain_prompt(
 {{"material": "選擇的材料", "function": "選擇的功能", "usage": "選擇的用途", "user": "選擇的族群"}}"""


-def get_flat_attribute_prompt(query: str, categories: Optional[List[str]] = None) -> str:
+def get_flat_attribute_prompt(
+    query: str,
+    categories: Optional[List[str]] = None,
+    lang: LanguageType = "zh"
+) -> str:
    """Generate prompt with flat/parallel categories (original design)."""
-    cats = categories if categories else DEFAULT_CATEGORIES
+    cats = categories if categories else get_default_categories(lang)
+    cat_descs = get_category_descriptions(lang)

    # Build category list
    category_lines = []
    for cat in cats:
-        desc = CATEGORY_DESCRIPTIONS.get(cat, f"{cat}的相關屬性")
-        category_lines.append(f"- {cat}：{desc}")
+        desc = cat_descs.get(cat, f"Related attributes of {cat}" if lang == "en" else f"{cat}的相關屬性")
+        category_lines.append(f"- {cat}: {desc}")

    categories_text = "\n".join(category_lines)

-    prompt = f"""/no_think
+    if lang == "en":
+        prompt = f"""/no_think
+You are an object attribute analysis expert. Please break down the user's input object into the following attribute categories.
+
+[Required Categories]
+{categories_text}
+
+[Important] The return format must be valid JSON, and each node must have a "name" field:
+
+```json
+{{
+  "name": "Object Name",
+  "children": [
+    {{
+      "name": "Category Name",
+      "children": [
+        {{"name": "Attribute 1"}},
+        {{"name": "Attribute 2"}}
+      ]
+    }}
+  ]
+}}
+```
+
+Return JSON only, no other text.
+
+User input: {query}"""
+    else:
+        prompt = f"""/no_think
 你是一個物件屬性分析專家。請將用戶輸入的物件拆解成以下屬性類別。

 【必須包含的類別】
@@ -123,14 +208,42 @@ def get_flat_attribute_prompt(query: str, categories: Optional[List[str]] = None
 def get_step0_category_analysis_prompt(
    query: str,
    suggested_count: int = 3,
-    exclude_categories: List[str] | None = None
+    exclude_categories: List[str] | None = None,
+    lang: LanguageType = "zh"
 ) -> str:
-    """Step 0: LLM 分析建議類別"""
-    exclude_text = ""
-    if exclude_categories:
-        exclude_text = f"\n【禁止使用的類別】{', '.join(exclude_categories)}（這些已經是固定類別，不要重複建議）\n"
+    """Step 0: LLM analyzes and suggests categories"""

-    return f"""/no_think
+    if lang == "en":
+        exclude_text = ""
+        if exclude_categories:
+            exclude_text = f"\n[Forbidden Categories] {', '.join(exclude_categories)} (These are already fixed categories, do not suggest duplicates)\n"
+
+        return f"""/no_think
+Analyze "{query}" and suggest {suggested_count} most suitable attribute categories to describe it.
+
+[Common Category References] Characteristics, Shape, Color, Size, Brand, Price Range, Weight, Style, Occasion, Season, Technical Specifications
+{exclude_text}
+[Important]
+1. Choose categories that best describe the essence of this object
+2. Categories should have logical relationships
+3. Do not choose overly abstract or duplicate categories
+4. Must suggest creative categories different from the reference list
+
+Return JSON only:
+{{
+  "categories": [
+    {{"name": "Category1", "description": "Description1", "order": 0}},
+    {{"name": "Category2", "description": "Description2", "order": 1}}
+  ]
+}}
+
+Object: {query}"""
+    else:
+        exclude_text = ""
+        if exclude_categories:
+            exclude_text = f"\n【禁止使用的類別】{', '.join(exclude_categories)}（這些已經是固定類別，不要重複建議）\n"
+
+        return f"""/no_think
 分析「{query}」，建議 {suggested_count} 個最適合的屬性類別來描述它。

 【常見類別參考】特性、形狀、顏色、尺寸、品牌、價格區間、重量、風格、場合、季節、技術規格
@@ -154,21 +267,35 @@ def get_step0_category_analysis_prompt(

 def get_step1_dynamic_attributes_prompt(
    query: str,
-    categories: List  # List[CategoryDefinition]
+    categories: List,  # List[CategoryDefinition]
+    lang: LanguageType = "zh"
 ) -> str:
-    """動態 Step 1 - 根據類別列表生成屬性"""
-    # 按 order 排序並構建描述
+    """Dynamic Step 1 - Generate attributes based on category list"""
+    # Sort by order and build description
    sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0))

    category_desc = "\n".join([
-        f"- {cat.name if hasattr(cat, 'name') else cat['name']}: {cat.description if hasattr(cat, 'description') else cat.get('description', '相關屬性')}"
+        f"- {cat.name if hasattr(cat, 'name') else cat['name']}: {cat.description if hasattr(cat, 'description') else cat.get('description', 'Related attributes' if lang == 'en' else '相關屬性')}"
        for cat in sorted_cats
    ])

    category_keys = [cat.name if hasattr(cat, 'name') else cat['name'] for cat in sorted_cats]
-    json_template = {cat: ["屬性1", "屬性2", "屬性3"] for cat in category_keys}

-    return f"""/no_think
+    if lang == "en":
+        json_template = {cat: ["attribute1", "attribute2", "attribute3"] for cat in category_keys}
+        return f"""/no_think
+Analyze "{query}" and list attributes for the following categories. List 3-5 common attributes for each category.
+
+[Category List]
+{category_desc}
+
+Return JSON only:
+{json.dumps(json_template, ensure_ascii=False, indent=2)}
+
+Object: {query}"""
+    else:
+        json_template = {cat: ["屬性1", "屬性2", "屬性3"] for cat in category_keys}
+        return f"""/no_think
 分析「{query}」，列出以下類別的屬性。每個類別列出 3-5 個常見屬性。

 【類別列表】
@@ -185,30 +312,59 @@ def get_step2_dynamic_causal_chain_prompt(
    categories: List,  # List[CategoryDefinition]
    attributes_by_category: Dict[str, List[str]],
    existing_chains: List[Dict[str, str]],
-    chain_index: int
+    chain_index: int,
+    lang: LanguageType = "zh"
 ) -> str:
-    """動態 Step 2 - 生成動態類別的因果鏈"""
+    """Dynamic Step 2 - Generate causal chains for dynamic categories"""
    sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0))

-    # 構建可選屬性
+    # Build available attributes
    available_attrs = "\n".join([
-        f"【{cat.name if hasattr(cat, 'name') else cat['name']}】{', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
+        f"[{cat.name if hasattr(cat, 'name') else cat['name']}] {', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
        for cat in sorted_cats
    ])

-    # 已生成的因果鏈
-    existing_text = ""
-    if existing_chains:
-        chains_list = [
-            " → ".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats])
-            for chain in existing_chains
-        ]
-        existing_text = f"\n【已生成，請勿重複】\n" + "\n".join([f"- {c}" for c in chains_list])
+    if lang == "en":
+        # Already generated causal chains
+        existing_text = ""
+        if existing_chains:
+            chains_list = [
+                " → ".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats])
+                for chain in existing_chains
+            ]
+            existing_text = "\n[Already generated, do not repeat]\n" + "\n".join([f"- {c}" for c in chains_list])

-    # JSON 模板
-    json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"選擇的{cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats}
+        # JSON template
+        json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"selected {cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats}

-    return f"""/no_think
+        return f"""/no_think
+Generate causal chain #{chain_index} for "{query}".
+
+[Available Attributes]
+{available_attrs}
+{existing_text}
+
+[Rules]
+1. Select one attribute from each category
+2. Causal relationships must be logical
+3. Do not repeat
+
+Return JSON only:
+{json.dumps(json_template, ensure_ascii=False, indent=2)}"""
+    else:
+        # 已生成的因果鏈
+        existing_text = ""
+        if existing_chains:
+            chains_list = [
+                " → ".join([chain.get(cat.name if hasattr(cat, 'name') else cat['name'], '?') for cat in sorted_cats])
+                for chain in existing_chains
+            ]
+            existing_text = "\n【已生成，請勿重複】\n" + "\n".join([f"- {c}" for c in chains_list])
+
+        # JSON 模板
+        json_template = {cat.name if hasattr(cat, 'name') else cat['name']: f"選擇的{cat.name if hasattr(cat, 'name') else cat['name']}" for cat in sorted_cats}
+
+        return f"""/no_think
 為「{query}」生成第 {chain_index} 條因果鏈。

 【可選屬性】
@@ -230,20 +386,46 @@ def get_step2_dag_relationships_prompt(
    query: str,
    categories: List,  # List[CategoryDefinition]
    attributes_by_category: Dict[str, List[str]],
+    lang: LanguageType = "zh"
 ) -> str:
-    """生成相鄰類別之間的自然關係"""
+    """Generate natural relationships between adjacent categories"""
    sorted_cats = sorted(categories, key=lambda x: x.order if hasattr(x, 'order') else x.get('order', 0))

    # Build attribute listing
    attr_listing = "\n".join([
-        f"【{cat.name if hasattr(cat, 'name') else cat['name']}】{', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
+        f"[{cat.name if hasattr(cat, 'name') else cat['name']}] {', '.join(attributes_by_category.get(cat.name if hasattr(cat, 'name') else cat['name'], []))}"
        for cat in sorted_cats
    ])

    # Build direction hints
    direction_hints = " → ".join([cat.name if hasattr(cat, 'name') else cat['name'] for cat in sorted_cats])

-    return f"""/no_think
+    if lang == "en":
+        return f"""/no_think
+Analyze the attribute relationships of "{query}".
+
+{attr_listing}
+
+[Relationship Direction] {direction_hints}
+
+[Rules]
+1. Only establish relationships between adjacent categories (e.g., Materials→Functions, Functions→Usages)
+2. Only output pairs that have true causal or associative relationships
+3. An attribute can connect to multiple downstream attributes, or none at all
+4. Not every attribute needs to have connections
+5. Relationships should be reasonable and meaningful
+
+Return JSON:
+{{
+  "relationships": [
+    {{"source_category": "CategoryA", "source": "attribute name", "target_category": "CategoryB", "target": "attribute name"}},
+    ...
+  ]
+}}
+
+Return JSON only."""
+    else:
+        return f"""/no_think
 分析「{query}」的屬性關係。

 {attr_listing}
--- a/backend/app/prompts/expert_transformation_prompt.py
+++ b/backend/app/prompts/expert_transformation_prompt.py
@@ -1,34 +1,68 @@
-"""Expert Transformation Agent 提示詞模組"""
+"""Expert Transformation Agent prompts module - Bilingual support"""

 from typing import List, Optional
+from .language_config import LanguageType


 def get_expert_generation_prompt(
    query: str,
    categories: List[str],
    expert_count: int,
-    custom_experts: Optional[List[str]] = None
+    custom_experts: Optional[List[str]] = None,
+    lang: LanguageType = "zh"
 ) -> str:
-    """Step 0: 生成專家團隊（不依賴主題，純隨機多元）"""
+    """Step 0: Generate expert team (not dependent on topic, purely random and diverse)"""
    import time
    import random

-    custom_text = ""
-    if custom_experts and len(custom_experts) > 0:
-        custom_text = f"（已指定：{', '.join(custom_experts[:expert_count])}）"
-
-    # 加入時間戳和隨機數來增加多樣性
+    # Add timestamp and random number for diversity
    seed = int(time.time() * 1000) % 10000
-    diversity_hints = [
-        "冷門、非主流、跨領域",
-        "罕見職業、新興領域、邊緣學科",
-        "非傳統、創新、小眾專業",
-        "未來趨向、實驗性、非常規",
-        "跨文化、混合領域、獨特視角"
-    ]
-    hint = random.choice(diversity_hints)

-    return f"""/no_think
+    if lang == "en":
+        custom_text = ""
+        if custom_experts and len(custom_experts) > 0:
+            custom_text = f" (Specified: {', '.join(custom_experts[:expert_count])})"
+
+        diversity_hints = [
+            "obscure, non-mainstream, cross-disciplinary",
+            "rare occupations, emerging fields, fringe disciplines",
+            "unconventional, innovative, niche specialties",
+            "future-oriented, experimental, non-traditional",
+            "cross-cultural, hybrid fields, unique perspectives"
+        ]
+        hint = random.choice(diversity_hints)
+
+        return f"""/no_think
+Randomly assemble a team of {expert_count} experts from completely different fields{custom_text}.
+
+[Innovation Requirements] (Random seed: {seed})
+- Prioritize {hint} experts
+- Avoid common professions (such as doctors, engineers, teachers, lawyers, etc.)
+- Each expert must be from a completely unrelated field
+- The rarer and more innovative, the better
+
+Return JSON:
+{{"experts": [{{"id": "expert-0", "name": "profession", "domain": "field", "perspective": "viewpoint"}}, ...]}}
+
+Rules:
+- id should be expert-0 to expert-{expert_count - 1}
+- name is the profession name (not a person's name), 2-5 words
+- domain should be specific and unique, no duplicate types"""
+    else:
+        custom_text = ""
+        if custom_experts and len(custom_experts) > 0:
+            custom_text = f"（已指定：{', '.join(custom_experts[:expert_count])}）"
+
+        diversity_hints = [
+            "冷門、非主流、跨領域",
+            "罕見職業、新興領域、邊緣學科",
+            "非傳統、創新、小眾專業",
+            "未來趨向、實驗性、非常規",
+            "跨文化、混合領域、獨特視角"
+        ]
+        hint = random.choice(diversity_hints)
+
+        return f"""/no_think
 隨機組建 {expert_count} 個來自完全不同領域的專家團隊{custom_text}。

 【創新要求】（隨機種子:{seed}）
@@ -50,13 +84,39 @@ def get_expert_keyword_generation_prompt(
    category: str,
    attribute: str,
    experts: List[dict],  # List[ExpertProfile]
-    keywords_per_expert: int = 1
+    keywords_per_expert: int = 1,
+    lang: LanguageType = "zh"
 ) -> str:
-    """Step 1: 專家視角關鍵字生成"""
-    # 建立專家列表，格式更清晰
+    """Step 1: Expert perspective keyword generation"""
+    # Build expert list in clearer format
    experts_list = "\n".join([f"- {exp['id']}: {exp['name']}" for exp in experts])

-    return f"""/no_think
+    if lang == "en":
+        return f"""/no_think
+You need to play the role of the following experts to generate innovative keywords for an attribute:
+
+[Expert List]
+{experts_list}
+
+[Task]
+Attribute: "{attribute}" (Category: {category})
+
+For each expert, please:
+1. First understand the professional background, knowledge domain, and work content of that profession
+2. Think about "{attribute}" from that profession's unique perspective
+3. Generate {keywords_per_expert} innovative keyword(s) related to that specialty (2-6 words)
+
+Keywords must reflect that expert's professional thinking style, for example:
+- Accountant viewing "movement" → "cash flow", "cost-benefit"
+- Architect viewing "movement" → "circulation design", "spatial flow"
+- Psychologist viewing "movement" → "behavioral motivation", "emotional transition"
+
+Return JSON:
+{{"keywords": [{{"keyword": "term", "expert_id": "expert-X", "expert_name": "name"}}, ...]}}
+
+Total of {len(experts) * keywords_per_expert} keywords needed, each keyword must be clearly related to the corresponding expert's professional field."""
+    else:
+        return f"""/no_think
 你需要扮演以下專家，為屬性生成創新關鍵字：

 【專家名單】
@@ -86,13 +146,29 @@ def get_single_description_prompt(
    keyword: str,
    expert_id: str,
    expert_name: str,
-    expert_domain: str
+    expert_domain: str,
+    lang: LanguageType = "zh"
 ) -> str:
-    """Step 2: 為單一關鍵字生成描述"""
-    # 如果 domain 是通用的，就只用職業名稱
-    domain_text = f"（{expert_domain}領域）" if expert_domain and expert_domain != "Professional Field" else ""
+    """Step 2: Generate description for a single keyword"""
+    if lang == "en":
+        # If domain is generic, just use profession name
+        domain_text = f" ({expert_domain} field)" if expert_domain and expert_domain != "Professional Field" else ""

-    return f"""/no_think
+        return f"""/no_think
+You are a {expert_name}{domain_text}.
+
+Task: Generate an innovative application description for "{query}".
+Keyword: {keyword}
+
+From your professional perspective, explain how to apply the concept of "{keyword}" to "{query}". The description should be specific, creative, 15-30 words.
+
+Return JSON only, no other text:
+{{"description": "your innovative application description"}}"""
+    else:
+        # 如果 domain 是通用的，就只用職業名稱
+        domain_text = f"（{expert_domain}領域）" if expert_domain and expert_domain != "Professional Field" else ""
+
+        return f"""/no_think
 你是一位{expert_name}{domain_text}。

 任務：為「{query}」生成一段創新應用描述。
--- a/backend/app/prompts/language_config.py
+++ b/backend/app/prompts/language_config.py
@@ -0,0 +1,51 @@
+"""Language configuration for prompts"""
+
+from enum import Enum
+from typing import Literal
+
+class Language(str, Enum):
+    CHINESE = "zh"
+    ENGLISH = "en"
+
+LanguageType = Literal["zh", "en"]
+
+# Default categories for each language
+DEFAULT_CATEGORIES = {
+    "zh": ["材料", "功能", "用途", "使用族群", "特性"],
+    "en": ["Materials", "Functions", "Usages", "User Groups", "Characteristics"],
+}
+
+CATEGORY_DESCRIPTIONS = {
+    "zh": {
+        "材料": "物件由什麼材料組成",
+        "功能": "物件能做什麼",
+        "用途": "物件在什麼場景使用",
+        "使用族群": "誰會使用這個物件",
+        "特性": "物件有什麼特徵",
+    },
+    "en": {
+        "Materials": "What materials the object is made of",
+        "Functions": "What the object can do",
+        "Usages": "In what scenarios the object is used",
+        "User Groups": "Who uses this object",
+        "Characteristics": "What features the object has",
+    },
+}
+
+# Category name mappings between languages
+CATEGORY_MAPPING = {
+    "zh_to_en": {
+        "材料": "Materials",
+        "功能": "Functions",
+        "用途": "Usages",
+        "使用族群": "User Groups",
+        "特性": "Characteristics",
+    },
+    "en_to_zh": {
+        "Materials": "材料",
+        "Functions": "功能",
+        "Usages": "用途",
+        "User Groups": "使用族群",
+        "Characteristics": "特性",
+    },
+}
--- a/backend/app/prompts/transformation_prompt.py
+++ b/backend/app/prompts/transformation_prompt.py
@@ -1,22 +1,43 @@
-"""Transformation Agent 提示詞模組"""
+"""Transformation Agent prompts module - Bilingual support"""

 from typing import List
+from .language_config import LanguageType


 def get_keyword_generation_prompt(
    category: str,
    attributes: List[str],
-    keyword_count: int = 3
+    keyword_count: int = 3,
+    lang: LanguageType = "zh"
 ) -> str:
    """
-    Step 1: 生成新關鍵字
+    Step 1: Generate new keywords

-    給定類別和現有屬性，生成全新的、有創意的關鍵字。
-    不考慮原始查詢，只專注於類別本身可能的延伸。
+    Given a category and existing attributes, generate new, creative keywords.
+    Don't consider the original query, focus only on possible extensions of the category itself.
    """
-    attrs_text = "、".join(attributes)
+    attrs_text = ", ".join(attributes) if lang == "en" else "、".join(attributes)

-    return f"""/no_think
+    if lang == "en":
+        return f"""/no_think
+You are a creative brainstorming expert. Given a category and its existing attributes, please generate new, creative keywords or descriptive phrases.
+
+[Category] {category}
+[Existing Attributes] {attrs_text}
+
+[Important Rules]
+1. Generate {keyword_count} completely new keywords
+2. Keywords must fit within the scope of "{category}" category
+3. Keywords should be creative and not duplicate or be too similar to existing attributes
+4. Don't consider any specific object, focus only on possible extensions of this category
+5. Each keyword should be 2-6 words
+
+Return JSON only:
+{{
+  "keywords": ["keyword1", "keyword2", "keyword3"]
+}}"""
+    else:
+        return f"""/no_think
 你是一個創意發想專家。給定一個類別和該類別下的現有屬性，請生成全新的、有創意的關鍵字或描述片段。

 【類別】{category}
@@ -38,14 +59,36 @@ def get_keyword_generation_prompt(
 def get_description_generation_prompt(
    query: str,
    category: str,
-    keyword: str
+    keyword: str,
+    lang: LanguageType = "zh"
 ) -> str:
    """
-    Step 2: 結合原始查詢生成描述
+    Step 2: Combine with original query to generate description

-    用新關鍵字創造一個與原始查詢相關的創新應用描述。
+    Use new keyword to create an innovative application description related to the original query.
    """
-    return f"""/no_think
+    if lang == "en":
+        return f"""/no_think
+You are an innovation application expert. Please apply a new keyword concept to a specific object to create an innovative application description.
+
+[Object] {query}
+[Category] {category}
+[New Keyword] {keyword}
+
+[Task]
+Using the concept of "{keyword}", create an innovative application description for "{query}".
+The description should be a complete sentence or phrase explaining how to apply this new concept to the object.
+
+[Example Format]
+- If the object is "bicycle" and keyword is "monitor", you could generate "bicycle monitors the rider's health status"
+- If the object is "umbrella" and keyword is "generate power", you could generate "umbrella generates electricity using raindrop impacts"
+
+Return JSON only:
+{{
+  "description": "innovative application description"
+}}"""
+    else:
+        return f"""/no_think
 你是一個創新應用專家。請將一個新的關鍵字概念應用到特定物件上，創造出創新的應用描述。

 【物件】{query}
@@ -69,15 +112,35 @@ def get_description_generation_prompt(
 def get_batch_description_prompt(
    query: str,
    category: str,
-    keywords: List[str]
+    keywords: List[str],
+    lang: LanguageType = "zh"
 ) -> str:
    """
-    批次生成描述（可選的優化版本，一次處理多個關鍵字）
+    Batch description generation (optional optimized version, process multiple keywords at once)
    """
-    keywords_text = "、".join(keywords)
-    keywords_json = ", ".join([f'"{k}"' for k in keywords])
+    keywords_text = ", ".join(keywords) if lang == "en" else "、".join(keywords)

-    return f"""/no_think
+    if lang == "en":
+        return f"""/no_think
+You are an innovation application expert. Please apply multiple new keyword concepts to a specific object, creating an innovative application description for each keyword.
+
+[Object] {query}
+[Category] {category}
+[New Keywords] {keywords_text}
+
+[Task]
+Create an innovative application description related to "{query}" for each keyword.
+Each description should be a complete sentence or phrase.
+
+Return JSON only:
+{{
+  "descriptions": [
+    {{"keyword": "keyword1", "description": "description1"}},
+    {{"keyword": "keyword2", "description": "description2"}}
+  ]
+}}"""
+    else:
+        return f"""/no_think
 你是一個創新應用專家。請將多個新的關鍵字概念應用到特定物件上，為每個關鍵字創造創新的應用描述。

 【物件】{query}
--- a/backend/app/routers/attributes.py
+++ b/backend/app/routers/attributes.py
@@ -58,7 +58,8 @@ async def execute_step0(
    prompt = get_step0_category_analysis_prompt(
        request.query,
        request.suggested_category_count,
-        exclude_categories=exclude_categories
+        exclude_categories=exclude_categories,
+        lang=request.lang
    )
    temperature = request.temperature if request.temperature is not None else 0.7
    response = await ollama_provider.generate(
@@ -310,7 +311,7 @@ async def generate_sse_events(request: StreamAnalyzeRequest) -> AsyncGenerator[s
        # ========== Step 1: Generate Attributes (Dynamic) ==========
        yield f"event: step1_start\ndata: {json.dumps({'message': '生成屬性...'}, ensure_ascii=False)}\n\n"

-        step1_prompt = get_step1_dynamic_attributes_prompt(request.query, final_categories)
+        step1_prompt = get_step1_dynamic_attributes_prompt(request.query, final_categories, lang=request.lang)
        logger.info(f"Step 1 prompt: {step1_prompt[:200]}")

        step1_response = await ollama_provider.generate(
@@ -330,6 +331,7 @@ async def generate_sse_events(request: StreamAnalyzeRequest) -> AsyncGenerator[s
            query=request.query,
            categories=final_categories,
            attributes_by_category=step1_result.attributes,
+            lang=request.lang
        )
        logger.info(f"Step 2 (relationships) prompt: {step2_prompt[:300]}")

--- a/backend/app/routers/deduplication.py
+++ b/backend/app/routers/deduplication.py
@@ -63,7 +63,8 @@ async def deduplicate_descriptions(request: DeduplicationRequest) -> Deduplicati
            # 使用 LLM 成對比較去重
            result = await llm_deduplication_service.deduplicate(
                descriptions=request.descriptions,
-                model=request.model
+                model=request.model,
+                lang=request.lang
            )
        return result
    except ValueError as e:
--- a/backend/app/routers/expert_transformation.py
+++ b/backend/app/routers/expert_transformation.py
@@ -68,7 +68,8 @@ async def generate_expert_transformation_events(
                    query=request.query,
                    categories=all_categories,
                    expert_count=request.expert_count,
-                    custom_experts=actual_custom_experts if actual_custom_experts else None
+                    custom_experts=actual_custom_experts if actual_custom_experts else None,
+                    lang=request.lang
                )
                logger.info(f"Expert prompt: {expert_prompt[:200]}")

@@ -119,7 +120,8 @@ async def generate_expert_transformation_events(
                        query=request.query,
                        categories=all_categories,
                        expert_count=request.expert_count,
-                        custom_experts=actual_custom_experts if actual_custom_experts else None
+                        custom_experts=actual_custom_experts if actual_custom_experts else None,
+                        lang=request.lang
                    )

                    expert_response = await ollama_provider.generate(
@@ -160,7 +162,8 @@ async def generate_expert_transformation_events(
                    category=request.category,
                    attribute=attribute,
                    experts=[e.model_dump() for e in experts],
-                    keywords_per_expert=request.keywords_per_expert
+                    keywords_per_expert=request.keywords_per_expert,
+                    lang=request.lang
                )
                logger.info(f"Keyword prompt for '{attribute}': {kw_prompt[:300]}")

@@ -214,7 +217,8 @@ async def generate_expert_transformation_events(
                    keyword=kw.keyword,
                    expert_id=kw.expert_id,
                    expert_name=kw.expert_name,
-                    expert_domain=expert_domain
+                    expert_domain=expert_domain,
+                    lang=request.lang
                )

                desc_response = await ollama_provider.generate(
--- a/backend/app/routers/patent_search.py
+++ b/backend/app/routers/patent_search.py
@@ -0,0 +1,133 @@
+"""Patent Search Router - Search for similar patents"""
+
+import logging
+from typing import Optional, List
+
+from fastapi import APIRouter
+from pydantic import BaseModel
+
+from ..services.patent_search_service import patent_search_service
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/api/patent", tags=["patent"])
+
+
+# ===== Request/Response Models =====
+
+class PatentSearchRequest(BaseModel):
+    """Patent search request"""
+    query: str                          # Search query (description or keywords)
+    max_results: int = 10               # Maximum results to return (1-20)
+
+
+class PatentResult(BaseModel):
+    """Single patent result"""
+    publication_number: str
+    title: str
+    snippet: str
+    publication_date: Optional[str] = None
+    assignee: Optional[str] = None
+    inventor: Optional[str] = None
+    status: str                         # ACTIVE, NOT_ACTIVE, UNKNOWN
+    pdf_url: Optional[str] = None
+    thumbnail_url: Optional[str] = None
+
+
+class PatentSearchResponse(BaseModel):
+    """Patent search response"""
+    query: str
+    total_results: int
+    patents: List[PatentResult]
+    error: Optional[str] = None
+
+
+class BatchPatentSearchRequest(BaseModel):
+    """Batch patent search request - search multiple descriptions"""
+    queries: List[str]                  # List of descriptions to search
+    max_results_per_query: int = 5      # Max results per query
+
+
+class BatchPatentSearchResult(BaseModel):
+    """Results for a single query in batch search"""
+    query: str
+    total_results: int
+    patents: List[PatentResult]
+    error: Optional[str] = None
+
+
+class BatchPatentSearchResponse(BaseModel):
+    """Batch patent search response"""
+    results: List[BatchPatentSearchResult]
+    total_queries: int
+
+
+# ===== Endpoints =====
+
+@router.post("/search", response_model=PatentSearchResponse)
+async def search_patents(request: PatentSearchRequest):
+    """
+    Search for patents similar to the given description/query.
+
+    Uses Google Patents to find related patents based on keywords.
+    """
+    logger.info(f"Patent search request: {request.query[:100]}...")
+
+    # Limit max_results to reasonable range
+    max_results = min(max(1, request.max_results), 20)
+
+    result = await patent_search_service.search(
+        query=request.query,
+        max_results=max_results,
+    )
+
+    return PatentSearchResponse(
+        query=request.query,
+        total_results=result.get("total_results", 0),
+        patents=[PatentResult(**p) for p in result.get("patents", [])],
+        error=result.get("error"),
+    )
+
+
+@router.post("/search/batch", response_model=BatchPatentSearchResponse)
+async def batch_search_patents(request: BatchPatentSearchRequest):
+    """
+    Search for patents for multiple descriptions at once.
+
+    Useful for checking multiple creative descriptions against patents.
+    """
+    logger.info(f"Batch patent search: {len(request.queries)} queries")
+
+    # Limit results per query
+    max_per_query = min(max(1, request.max_results_per_query), 10)
+
+    results: List[BatchPatentSearchResult] = []
+
+    for query in request.queries:
+        result = await patent_search_service.search(
+            query=query,
+            max_results=max_per_query,
+        )
+
+        results.append(BatchPatentSearchResult(
+            query=query,
+            total_results=result.get("total_results", 0),
+            patents=[PatentResult(**p) for p in result.get("patents", [])],
+            error=result.get("error"),
+        ))
+
+    return BatchPatentSearchResponse(
+        results=results,
+        total_queries=len(request.queries),
+    )
+
+
+@router.get("/health")
+async def patent_search_health():
+    """Check if patent search service is working"""
+    # Do a simple test search
+    result = await patent_search_service.search("test", max_results=1)
+
+    if result.get("error"):
+        return {"status": "unhealthy", "error": result["error"]}
+
+    return {"status": "healthy"}
--- a/backend/app/routers/transformation.py
+++ b/backend/app/routers/transformation.py
@@ -36,7 +36,8 @@ async def generate_transformation_events(
        keyword_prompt = get_keyword_generation_prompt(
            category=request.category,
            attributes=request.attributes,
-            keyword_count=request.keyword_count
+            keyword_count=request.keyword_count,
+            lang=request.lang
        )
        logger.info(f"Keyword prompt: {keyword_prompt[:200]}")

@@ -61,7 +62,8 @@ async def generate_transformation_events(
        desc_prompt = get_batch_description_prompt(
            query=request.query,
            category=request.category,
-            keywords=new_keywords
+            keywords=new_keywords,
+            lang=request.lang
        )
        logger.info(f"Description prompt: {desc_prompt[:300]}")

--- a/backend/app/services/llm_deduplication_service.py
+++ b/backend/app/services/llm_deduplication_service.py
@@ -1,12 +1,12 @@
 """
-LLM Deduplication Service - 使用 LLM 成對比較進行去重
+LLM Deduplication Service - Using LLM pairwise comparison for deduplication

-讓 LLM 判斷兩個描述是否語意重複，透過並行處理加速。
+Let LLM determine whether two descriptions are semantically duplicate, accelerated by parallel processing.
 """

 import asyncio
 import logging
-from typing import List, Tuple, Optional
+from typing import List, Tuple, Optional, Literal

 import httpx
 import numpy as np
@@ -18,6 +18,7 @@ from ..models.schemas import (
    DeduplicationMethod,
    DescriptionGroup,
 )
+from ..prompts.language_config import LanguageType

 logger = logging.getLogger(__name__)

@@ -31,27 +32,20 @@ class LLMDeduplicationService:
        self.client = httpx.AsyncClient(timeout=60.0)
        self.max_concurrent = 5  # 最大並行數，避免 Ollama 過載

-    async def compare_pair(
-        self,
-        desc1: str,
-        desc2: str,
-        model: str,
-        semaphore: asyncio.Semaphore
-    ) -> bool:
-        """
-        讓 LLM 判斷兩個描述是否語意重複
+    def _get_comparison_prompt(self, desc1: str, desc2: str, lang: LanguageType = "zh") -> str:
+        """Get comparison prompt in the specified language"""
+        if lang == "en":
+            return f"""Determine whether the following two innovative descriptions express the same or very similar concepts:

-        Args:
-            desc1: 第一個描述
-            desc2: 第二個描述
-            model: LLM 模型名稱
-            semaphore: 並行控制信號量
+Description 1: {desc1}

-        Returns:
-            bool: 是否為重複描述
-        """
-        async with semaphore:  # 控制並行數
-            prompt = f"""判斷以下兩個創新描述是否表達相同或非常相似的概念：
+Description 2: {desc2}
+
+If both descriptions essentially express the same or very similar innovative concept, answer "YES"
+If the two descriptions express different innovative concepts, answer "NO"
+Only answer YES or NO, no other text"""
+        else:
+            return f"""判斷以下兩個創新描述是否表達相同或非常相似的概念：

 描述1: {desc1}

@@ -61,6 +55,30 @@ class LLMDeduplicationService:
 如果兩者描述不同的創新概念，回答 "NO"
 只回答 YES 或 NO，不要其他文字"""

+    async def compare_pair(
+        self,
+        desc1: str,
+        desc2: str,
+        model: str,
+        semaphore: asyncio.Semaphore,
+        lang: LanguageType = "zh"
+    ) -> bool:
+        """
+        Let LLM determine whether two descriptions are semantically duplicate
+
+        Args:
+            desc1: First description
+            desc2: Second description
+            model: LLM model name
+            semaphore: Concurrency control semaphore
+            lang: Language for the prompt
+
+        Returns:
+            bool: Whether the descriptions are duplicates
+        """
+        async with semaphore:  # Control concurrency
+            prompt = self._get_comparison_prompt(desc1, desc2, lang)
+
            try:
                response = await self.client.post(
                    f"{self.base_url}/api/generate",
@@ -86,26 +104,28 @@ class LLMDeduplicationService:
    async def compare_batch(
        self,
        pairs: List[Tuple[int, int, str, str]],
-        model: str
+        model: str,
+        lang: LanguageType = "zh"
    ) -> List[Tuple[int, int, bool]]:
        """
-        並行批次比較多個描述對
+        Parallel batch comparison of multiple description pairs

        Args:
-            pairs: 待比較的配對列表 [(i, j, desc1, desc2), ...]
-            model: LLM 模型名稱
+            pairs: List of pairs to compare [(i, j, desc1, desc2), ...]
+            model: LLM model name
+            lang: Language for the prompt

        Returns:
-            比較結果列表 [(i, j, is_similar), ...]
+            List of comparison results [(i, j, is_similar), ...]
        """
        semaphore = asyncio.Semaphore(self.max_concurrent)

        async def compare_one(pair: Tuple[int, int, str, str]) -> Tuple[int, int, bool]:
            i, j, desc1, desc2 = pair
-            is_similar = await self.compare_pair(desc1, desc2, model, semaphore)
+            is_similar = await self.compare_pair(desc1, desc2, model, semaphore, lang)
            return (i, j, is_similar)

-        # 使用 asyncio.gather 並行執行所有比較
+        # Use asyncio.gather to execute all comparisons in parallel
        results = await asyncio.gather(*[compare_one(p) for p in pairs])
        return results

@@ -144,17 +164,19 @@ class LLMDeduplicationService:
    async def deduplicate(
        self,
        descriptions: List[ExpertTransformationDescription],
-        model: Optional[str] = None
+        model: Optional[str] = None,
+        lang: LanguageType = "zh"
    ) -> DeduplicationResult:
        """
-        使用 LLM 成對比較進行去重
+        Use LLM pairwise comparison for deduplication

        Args:
-            descriptions: 要去重的描述列表
-            model: LLM 模型名稱
+            descriptions: List of descriptions to deduplicate
+            model: LLM model name
+            lang: Language for the prompt

        Returns:
-            DeduplicationResult: 去重結果
+            DeduplicationResult: Deduplication result
        """
        model = model or self.default_model

@@ -188,10 +210,10 @@ class LLMDeduplicationService:
                ))

        total_pairs = len(pairs)
-        logger.info(f"LLM deduplication: {total_pairs} pairs to compare (parallel={self.max_concurrent}, model={model})")
+        logger.info(f"LLM deduplication: {total_pairs} pairs to compare (parallel={self.max_concurrent}, model={model}, lang={lang})")

-        # 並行批次比較
-        results = await self.compare_batch(pairs, model)
+        # Parallel batch comparison
+        results = await self.compare_batch(pairs, model, lang)

        # 填入相似度矩陣
        for i, j, is_similar in results:
--- a/backend/app/services/patent_search_service.py
+++ b/backend/app/services/patent_search_service.py
@@ -0,0 +1,195 @@
+"""Patent Search Service using Google Patents XHR API"""
+
+import httpx
+import logging
+from typing import List, Optional
+from urllib.parse import quote_plus
+
+logger = logging.getLogger(__name__)
+
+
+class PatentSearchResult:
+    """Single patent search result"""
+    def __init__(
+        self,
+        publication_number: str,
+        title: str,
+        snippet: str,
+        publication_date: Optional[str],
+        assignee: Optional[str],
+        inventor: Optional[str],
+        status: str,
+        pdf_url: Optional[str] = None,
+        thumbnail_url: Optional[str] = None,
+    ):
+        self.publication_number = publication_number
+        self.title = title
+        self.snippet = snippet
+        self.publication_date = publication_date
+        self.assignee = assignee
+        self.inventor = inventor
+        self.status = status
+        self.pdf_url = pdf_url
+        self.thumbnail_url = thumbnail_url
+
+    def to_dict(self):
+        return {
+            "publication_number": self.publication_number,
+            "title": self.title,
+            "snippet": self.snippet,
+            "publication_date": self.publication_date,
+            "assignee": self.assignee,
+            "inventor": self.inventor,
+            "status": self.status,
+            "pdf_url": self.pdf_url,
+            "thumbnail_url": self.thumbnail_url,
+        }
+
+
+class PatentSearchService:
+    """Service for searching patents using Google Patents"""
+
+    GOOGLE_PATENTS_XHR_URL = "https://patents.google.com/xhr/query"
+    GOOGLE_PATENTS_PDF_BASE = "https://patentimages.storage.googleapis.com/"
+
+    def __init__(self):
+        self._client: Optional[httpx.AsyncClient] = None
+
+    # Browser-like headers to avoid being blocked
+    DEFAULT_HEADERS = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+        "Accept": "application/json, text/plain, */*",
+        "Accept-Language": "en-US,en;q=0.9",
+        "Referer": "https://patents.google.com/",
+        "Origin": "https://patents.google.com",
+    }
+
+    async def _get_client(self) -> httpx.AsyncClient:
+        if self._client is None or self._client.is_closed:
+            self._client = httpx.AsyncClient(
+                timeout=30.0,
+                headers=self.DEFAULT_HEADERS,
+                follow_redirects=True,
+            )
+        return self._client
+
+    async def close(self):
+        if self._client and not self._client.is_closed:
+            await self._client.aclose()
+
+    async def search(
+        self,
+        query: str,
+        max_results: int = 10,
+    ) -> dict:
+        """
+        Search Google Patents for relevant patents
+
+        Args:
+            query: Search query (can be a description or keywords)
+            max_results: Maximum number of results to return
+
+        Returns:
+            Dict with total_results count and list of patent results
+        """
+        try:
+            client = await self._get_client()
+
+            # URL encode the query
+            encoded_query = quote_plus(query)
+            url = f"{self.GOOGLE_PATENTS_XHR_URL}?url=q%3D{encoded_query}&exp=&tags="
+
+            logger.info(f"Searching patents with query: {query[:100]}...")
+
+            response = await client.get(url)
+
+            if response.status_code != 200:
+                logger.error(f"Google Patents API returned status {response.status_code}")
+                return {
+                    "total_results": 0,
+                    "patents": [],
+                    "error": f"API returned status {response.status_code}"
+                }
+
+            data = response.json()
+
+            # Parse results
+            results = data.get("results", {})
+            total_num = results.get("total_num_results", 0)
+            clusters = results.get("cluster", [])
+
+            patents: List[PatentSearchResult] = []
+
+            if clusters and len(clusters) > 0:
+                patent_results = clusters[0].get("result", [])
+
+                for item in patent_results[:max_results]:
+                    patent_data = item.get("patent", {})
+                    family_meta = patent_data.get("family_metadata", {})
+                    aggregated = family_meta.get("aggregated", {})
+                    country_status = aggregated.get("country_status", [])
+
+                    status = "UNKNOWN"
+                    if country_status and len(country_status) > 0:
+                        best_stage = country_status[0].get("best_patent_stage", {})
+                        status = best_stage.get("state", "UNKNOWN")
+
+                    # Build PDF URL if available
+                    pdf_path = patent_data.get("pdf", "")
+                    pdf_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{pdf_path}" if pdf_path else None
+
+                    # Build thumbnail URL
+                    thumbnail = patent_data.get("thumbnail", "")
+                    thumbnail_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{thumbnail}" if thumbnail else None
+
+                    patent = PatentSearchResult(
+                        publication_number=patent_data.get("publication_number", ""),
+                        title=self._clean_html(patent_data.get("title", "")),
+                        snippet=self._clean_html(patent_data.get("snippet", "")),
+                        publication_date=patent_data.get("publication_date"),
+                        assignee=patent_data.get("assignee"),
+                        inventor=patent_data.get("inventor"),
+                        status=status,
+                        pdf_url=pdf_url,
+                        thumbnail_url=thumbnail_url,
+                    )
+                    patents.append(patent)
+
+            logger.info(f"Found {total_num} total patents, returning {len(patents)}")
+
+            return {
+                "total_results": total_num,
+                "patents": [p.to_dict() for p in patents],
+            }
+
+        except httpx.HTTPError as e:
+            logger.error(f"HTTP error searching patents: {e}")
+            return {
+                "total_results": 0,
+                "patents": [],
+                "error": str(e)
+            }
+        except Exception as e:
+            logger.error(f"Error searching patents: {e}")
+            return {
+                "total_results": 0,
+                "patents": [],
+                "error": str(e)
+            }
+
+    def _clean_html(self, text: str) -> str:
+        """Remove HTML entities and tags from text"""
+        if not text:
+            return ""
+        # Replace common HTML entities
+        text = text.replace("&hellip;", "...")
+        text = text.replace("&amp;", "&")
+        text = text.replace("&lt;", "<")
+        text = text.replace("&gt;", ">")
+        text = text.replace("&quot;", '"')
+        text = text.replace("&#39;", "'")
+        return text.strip()
+
+
+# Singleton instance
+patent_search_service = PatentSearchService()