feat: Add Deduplication Agent with embedding and LLM methods

Implement a new Deduplication Agent that identifies and groups similar transformation descriptions. Supports two deduplication methods: - Embedding: Fast vector similarity comparison using cosine similarity - LLM: Accurate pairwise semantic comparison (slower but more precise) Backend changes: - Add deduplication router with /deduplicate endpoint - Add embedding_service for vector-based similarity - Add llm_deduplication_service for LLM-based comparison - Improve expert_transformation error handling and progress reporting Frontend changes: - Add DeduplicationPanel with interactive group visualization - Add useDeduplication hook for state management - Integrate deduplication tab in main App - Add threshold slider and method selector in sidebar 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-22 20:26:17 +08:00
parent 5571076406
commit bc281b8e0a
18 changed files with 1397 additions and 25 deletions
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -3,14 +3,18 @@ from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware

-from .routers import attributes, transformation, expert_transformation
+from .routers import attributes, transformation, expert_transformation, deduplication
 from .services.llm_service import ollama_provider
+from .services.embedding_service import embedding_service
+from .services.llm_deduplication_service import llm_deduplication_service


@asynccontextmanager
 async def lifespan(app: FastAPI):
    yield
    await ollama_provider.close()
+    await embedding_service.close()
+    await llm_deduplication_service.close()


 app = FastAPI(
@@ -31,6 +35,7 @@ app.add_middleware(
 app.include_router(attributes.router)
 app.include_router(transformation.router)
 app.include_router(expert_transformation.router)
+app.include_router(deduplication.router)


@app.get("/")
--- a/backend/app/models/schemas.py
+++ b/backend/app/models/schemas.py
@@ -232,3 +232,38 @@ class ExpertTransformationRequest(BaseModel):
    # LLM parameters
    model: Optional[str] = None
    temperature: Optional[float] = 0.7
+
+
+# ===== Deduplication Agent schemas =====
+
+class DeduplicationMethod(str, Enum):
+    """去重方法"""
+    EMBEDDING = "embedding"  # 向量相似度
+    LLM = "llm"              # LLM 成對判斷
+
+
+class DeduplicationRequest(BaseModel):
+    """去重請求"""
+    descriptions: List[ExpertTransformationDescription]
+    method: DeduplicationMethod = DeduplicationMethod.EMBEDDING  # 去重方法
+    similarity_threshold: float = 0.85  # 餘弦相似度閾值 (0.0-1.0)，僅 Embedding 使用
+    model: Optional[str] = None  # Embedding/LLM 模型
+
+
+class DescriptionGroup(BaseModel):
+    """相似描述分組"""
+    group_id: str                          # "group-0", "group-1"...
+    representative: ExpertTransformationDescription  # 代表描述
+    duplicates: List[ExpertTransformationDescription]  # 相似描述
+    similarity_scores: List[float]         # 每個重複項的相似度分數
+
+
+class DeduplicationResult(BaseModel):
+    """去重結果"""
+    total_input: int           # 輸入描述總數
+    total_groups: int          # 分組數量
+    total_duplicates: int      # 重複項數量
+    groups: List[DescriptionGroup]
+    threshold_used: float
+    method_used: DeduplicationMethod       # 使用的去重方法
+    model_used: str                        # 使用的模型
--- a/backend/app/prompts/expert_transformation_prompt.py
+++ b/backend/app/prompts/expert_transformation_prompt.py
@@ -90,16 +90,15 @@ def get_single_description_prompt(
 ) -> str:
    """Step 2: 為單一關鍵字生成描述"""
    # 如果 domain 是通用的，就只用職業名稱
-    domain_text = f"（{expert_domain}）" if expert_domain and expert_domain != "Professional Field" else ""
+    domain_text = f"（{expert_domain}領域）" if expert_domain and expert_domain != "Professional Field" else ""

    return f"""/no_think
-物件：「{query}」
-專家：{expert_name}{domain_text}
+你是一位{expert_name}{domain_text}。
+
+任務：為「{query}」生成一段創新應用描述。
 關鍵字：{keyword}

-你是一位{expert_name}。從你的專業視角，生成一段創新應用描述（15-30字），說明如何將「{keyword}」的概念應用到「{query}」上。
+從你的專業視角，說明如何將「{keyword}」的概念應用到「{query}」上。描述要具體、有創意，15-30字。

-描述要體現{expert_name}的專業思維和獨特觀點。
-
-回傳 JSON：
-{{"description": "應用描述"}}"""
+只回傳 JSON，不要其他文字：
+{{"description": "你的創新應用描述"}}"""
--- a/backend/app/routers/deduplication.py
+++ b/backend/app/routers/deduplication.py
@@ -0,0 +1,93 @@
+"""
+Deduplication Router - 使用 Embedding 或 LLM 去重描述
+
+提供 API 端點將相似的創新描述分組，幫助識別重複的想法。
+支援兩種方法：
+- Embedding: 快速向量相似度比較
+- LLM: 精準語意判斷（較慢但更準確）
+"""
+
+import logging
+from fastapi import APIRouter, HTTPException
+
+from ..models.schemas import DeduplicationRequest, DeduplicationResult, DeduplicationMethod
+from ..services.embedding_service import embedding_service
+from ..services.llm_deduplication_service import llm_deduplication_service
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/deduplication", tags=["deduplication"])
+
+
+@router.post("/deduplicate", response_model=DeduplicationResult)
+async def deduplicate_descriptions(request: DeduplicationRequest) -> DeduplicationResult:
+    """
+    去重描述
+
+    支援兩種方法：
+    - embedding: 使用向量相似度（快速）
+    - llm: 使用 LLM 成對比較（精準但較慢）
+
+    Args:
+        request: 去重請求，包含描述列表、方法選擇和相關參數
+
+    Returns:
+        DeduplicationResult: 去重結果，包含分組資訊
+
+    Raises:
+        HTTPException: 如果去重處理失敗
+    """
+    method = request.method
+    logger.info(f"Deduplication request: {len(request.descriptions)} descriptions, method={method.value}, threshold={request.similarity_threshold}")
+
+    if not request.descriptions:
+        return DeduplicationResult(
+            total_input=0,
+            total_groups=0,
+            total_duplicates=0,
+            groups=[],
+            threshold_used=request.similarity_threshold,
+            method_used=method,
+            model_used=request.model or ("nomic-embed-text" if method == DeduplicationMethod.EMBEDDING else "qwen3:4b")
+        )
+
+    try:
+        if method == DeduplicationMethod.EMBEDDING:
+            # 使用 Embedding 相似度去重
+            result = await embedding_service.deduplicate(
+                descriptions=request.descriptions,
+                threshold=request.similarity_threshold,
+                model=request.model
+            )
+        else:
+            # 使用 LLM 成對比較去重
+            result = await llm_deduplication_service.deduplicate(
+                descriptions=request.descriptions,
+                model=request.model
+            )
+        return result
+    except ValueError as e:
+        logger.error(f"Deduplication failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+    except Exception as e:
+        logger.error(f"Unexpected error during deduplication: {e}")
+        raise HTTPException(status_code=500, detail=f"Deduplication failed: {str(e)}")
+
+
+@router.get("/models")
+async def list_embedding_models():
+    """
+    列出可用的 Embedding 模型
+
+    Returns:
+        dict: 可用模型列表和建議的預設模型
+    """
+    return {
+        "default": "nomic-embed-text",
+        "available": [
+            {"name": "nomic-embed-text", "description": "Fast and efficient embedding model"},
+            {"name": "mxbai-embed-large", "description": "High quality embeddings"},
+            {"name": "all-minilm", "description": "Lightweight embedding model"},
+        ],
+        "note": "Run 'ollama pull <model>' to install a model"
+    }
--- a/backend/app/routers/expert_transformation.py
+++ b/backend/app/routers/expert_transformation.py
@@ -221,8 +221,27 @@ async def generate_expert_transformation_events(
                    desc_prompt, model=model, temperature=temperature
                )

-                desc_data = extract_json_from_response(desc_response)
-                desc_text = desc_data.get("description", "")
+                # 嘗試解析 JSON，若失敗則使用原始回應作為描述
+                desc_text = ""
+                try:
+                    desc_data = extract_json_from_response(desc_response)
+                    # 支援多種可能的 key: description, content, text, desc
+                    desc_text = (
+                        desc_data.get("description") or
+                        desc_data.get("content") or
+                        desc_data.get("text") or
+                        desc_data.get("desc") or
+                        ""
+                    )
+                except ValueError:
+                    # JSON 解析失敗，嘗試清理原始回應作為描述
+                    cleaned = desc_response.strip()
+                    # 移除可能的 markdown 和多餘符號
+                    if cleaned.startswith('"') and cleaned.endswith('"'):
+                        cleaned = cleaned[1:-1]
+                    if len(cleaned) > 5 and len(cleaned) < 100:
+                        desc_text = cleaned
+                        logger.info(f"[DESC] 使用 fallback 描述 for '{kw.keyword}': {desc_text[:50]}")

                if desc_text:
                    descriptions.append(ExpertTransformationDescription(
@@ -231,15 +250,22 @@ async def generate_expert_transformation_events(
                        expert_name=kw.expert_name,
                        description=desc_text
                    ))
+                else:
+                    logger.warning(f"[DESC] Empty description for keyword='{kw.keyword}', parsed_data={desc_data}")

-                # Send progress update
-                yield f"event: description_progress\ndata: {json.dumps({'current': idx + 1, 'total': len(all_expert_keywords), 'keyword': kw.keyword}, ensure_ascii=False)}\n\n"
+                # Send progress update with success/fail status
+                yield f"event: description_progress\ndata: {json.dumps({'current': idx + 1, 'total': len(all_expert_keywords), 'keyword': kw.keyword, 'success': bool(desc_text)}, ensure_ascii=False)}\n\n"

            except Exception as e:
-                logger.warning(f"Failed to generate description for '{kw.keyword}': {e}")
+                logger.warning(f"[DESC] Failed to generate description for '{kw.keyword}': {e}")
+                yield f"event: description_progress\ndata: {json.dumps({'current': idx + 1, 'total': len(all_expert_keywords), 'keyword': kw.keyword, 'success': False, 'error': str(e)}, ensure_ascii=False)}\n\n"
                # Continue with next keyword

-        yield f"event: description_complete\ndata: {json.dumps({'count': len(descriptions)}, ensure_ascii=False)}\n\n"
+        # 統計成功率
+        success_rate = len(descriptions) / len(all_expert_keywords) * 100 if all_expert_keywords else 0
+        logger.info(f"[DESC] 描述生成完成: {len(descriptions)}/{len(all_expert_keywords)} 成功 ({success_rate:.1f}%)")
+
+        yield f"event: description_complete\ndata: {json.dumps({'count': len(descriptions), 'total': len(all_expert_keywords), 'success_rate': success_rate}, ensure_ascii=False)}\n\n"

        # ========== Build final result ==========
        result = ExpertTransformationCategoryResult(
--- a/backend/app/services/embedding_service.py
+++ b/backend/app/services/embedding_service.py
@@ -0,0 +1,250 @@
+"""
+Embedding Service - generates embeddings and performs similarity-based deduplication
+
+使用 Ollama 的 embedding 端點生成向量，並透過餘弦相似度進行去重分組。
+"""
+
+import logging
+from typing import List, Optional
+
+import httpx
+import numpy as np
+
+from ..config import settings
+from ..models.schemas import (
+    ExpertTransformationDescription,
+    DeduplicationResult,
+    DeduplicationMethod,
+    DescriptionGroup,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class EmbeddingService:
+    """Embedding 服務：生成向量並執行相似度去重"""
+
+    def __init__(self):
+        self.base_url = settings.ollama_base_url
+        self.default_model = "nomic-embed-text"  # Ollama 預設的 embedding 模型
+        self.client = httpx.AsyncClient(timeout=120.0)
+
+    async def get_embedding(self, text: str, model: Optional[str] = None) -> List[float]:
+        """取得單一文字的 embedding 向量"""
+        model = model or self.default_model
+        url = f"{self.base_url}/api/embed"
+
+        try:
+            response = await self.client.post(url, json={
+                "model": model,
+                "input": text
+            })
+            response.raise_for_status()
+            result = response.json()
+            return result["embeddings"][0]
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Embedding API error: {e.response.status_code} - {e.response.text}")
+            raise
+        except Exception as e:
+            logger.error(f"Embedding error: {e}")
+            raise
+
+    async def get_embeddings_batch(
+        self,
+        texts: List[str],
+        model: Optional[str] = None
+    ) -> List[List[float]]:
+        """批次取得多個文字的 embedding 向量"""
+        if not texts:
+            return []
+
+        model = model or self.default_model
+        url = f"{self.base_url}/api/embed"
+
+        try:
+            # Ollama 支援批次 embedding
+            response = await self.client.post(url, json={
+                "model": model,
+                "input": texts
+            })
+            response.raise_for_status()
+            result = response.json()
+            return result["embeddings"]
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Batch embedding API error: {e.response.status_code} - {e.response.text}")
+            # 如果批次失敗，嘗試逐一處理
+            logger.info("Falling back to single embedding requests...")
+            embeddings = []
+            for text in texts:
+                emb = await self.get_embedding(text, model)
+                embeddings.append(emb)
+            return embeddings
+        except Exception as e:
+            logger.error(f"Batch embedding error: {e}")
+            raise
+
+    def cosine_similarity(self, a: List[float], b: List[float]) -> float:
+        """計算兩個向量的餘弦相似度"""
+        a_np = np.array(a)
+        b_np = np.array(b)
+        norm_a = np.linalg.norm(a_np)
+        norm_b = np.linalg.norm(b_np)
+        if norm_a == 0 or norm_b == 0:
+            return 0.0
+        return float(np.dot(a_np, b_np) / (norm_a * norm_b))
+
+    def build_similarity_matrix(
+        self,
+        embeddings: List[List[float]]
+    ) -> np.ndarray:
+        """建立成對相似度矩陣"""
+        n = len(embeddings)
+        matrix = np.zeros((n, n))
+
+        for i in range(n):
+            matrix[i][i] = 1.0  # 自己與自己的相似度為 1
+            for j in range(i + 1, n):
+                sim = self.cosine_similarity(embeddings[i], embeddings[j])
+                matrix[i][j] = sim
+                matrix[j][i] = sim
+
+        return matrix
+
+    def cluster_by_similarity(
+        self,
+        similarity_matrix: np.ndarray,
+        threshold: float
+    ) -> List[List[int]]:
+        """
+        貪婪聚類：將相似度 >= threshold 的項目分組
+
+        演算法：
+        1. 從第一個未分配的項目開始
+        2. 找出所有與該項目相似度 >= threshold 的項目
+        3. 歸入同一組
+        4. 重複直到所有項目都已分配
+
+        Returns:
+            List[List[int]]: 每個子列表包含同組項目的索引
+        """
+        n = len(similarity_matrix)
+        assigned = [False] * n
+        groups = []
+
+        for i in range(n):
+            if assigned[i]:
+                continue
+
+            # 開始新的分組，以 item i 為代表
+            group = [i]
+            assigned[i] = True
+
+            # 找出所有與 i 相似的項目
+            for j in range(i + 1, n):
+                if not assigned[j] and similarity_matrix[i][j] >= threshold:
+                    group.append(j)
+                    assigned[j] = True
+
+            groups.append(group)
+
+        return groups
+
+    async def deduplicate(
+        self,
+        descriptions: List[ExpertTransformationDescription],
+        threshold: float = 0.85,
+        model: Optional[str] = None
+    ) -> DeduplicationResult:
+        """
+        主要去重方法
+
+        Args:
+            descriptions: 要去重的描述列表
+            threshold: 相似度閾值 (0.0-1.0)，預設 0.85
+            model: Embedding 模型名稱
+
+        Returns:
+            DeduplicationResult: 去重結果，包含分組資訊
+        """
+        model = model or self.default_model
+
+        # 空輸入處理
+        if not descriptions:
+            return DeduplicationResult(
+                total_input=0,
+                total_groups=0,
+                total_duplicates=0,
+                groups=[],
+                threshold_used=threshold,
+                method_used=DeduplicationMethod.EMBEDDING,
+                model_used=model
+            )
+
+        # 提取描述文字
+        texts = [d.description for d in descriptions]
+        logger.info(f"Generating embeddings for {len(texts)} descriptions using model '{model}'...")
+
+        # 批次取得 embeddings
+        try:
+            embeddings = await self.get_embeddings_batch(texts, model)
+        except Exception as e:
+            logger.error(f"Failed to generate embeddings: {e}")
+            raise ValueError(f"Embedding generation failed: {e}. Make sure the model '{model}' is installed (run: ollama pull {model})")
+
+        # 建立相似度矩陣
+        logger.info("Building similarity matrix...")
+        sim_matrix = self.build_similarity_matrix(embeddings)
+
+        # 聚類
+        logger.info(f"Clustering with threshold {threshold}...")
+        clusters = self.cluster_by_similarity(sim_matrix, threshold)
+
+        # 建立結果分組
+        result_groups = []
+        total_duplicates = 0
+
+        for group_idx, indices in enumerate(clusters):
+            if len(indices) == 1:
+                # 獨立項目 - 無重複
+                result_groups.append(DescriptionGroup(
+                    group_id=f"group-{group_idx}",
+                    representative=descriptions[indices[0]],
+                    duplicates=[],
+                    similarity_scores=[]
+                ))
+            else:
+                # 有重複的分組 - 第一個為代表
+                rep_idx = indices[0]
+                dup_indices = indices[1:]
+                dup_scores = [
+                    float(sim_matrix[rep_idx][idx])
+                    for idx in dup_indices
+                ]
+
+                result_groups.append(DescriptionGroup(
+                    group_id=f"group-{group_idx}",
+                    representative=descriptions[rep_idx],
+                    duplicates=[descriptions[idx] for idx in dup_indices],
+                    similarity_scores=dup_scores
+                ))
+                total_duplicates += len(dup_indices)
+
+        logger.info(f"Deduplication complete: {len(descriptions)} -> {len(result_groups)} groups, {total_duplicates} duplicates found")
+
+        return DeduplicationResult(
+            total_input=len(descriptions),
+            total_groups=len(result_groups),
+            total_duplicates=total_duplicates,
+            groups=result_groups,
+            threshold_used=threshold,
+            method_used=DeduplicationMethod.EMBEDDING,
+            model_used=model
+        )
+
+    async def close(self):
+        """關閉 HTTP 客戶端"""
+        await self.client.aclose()
+
+
+# 全域實例
+embedding_service = EmbeddingService()
--- a/backend/app/services/llm_deduplication_service.py
+++ b/backend/app/services/llm_deduplication_service.py
@@ -0,0 +1,252 @@
+"""
+LLM Deduplication Service - 使用 LLM 成對比較進行去重
+
+讓 LLM 判斷兩個描述是否語意重複，透過並行處理加速。
+"""
+
+import asyncio
+import logging
+from typing import List, Tuple, Optional
+
+import httpx
+import numpy as np
+
+from ..config import settings
+from ..models.schemas import (
+    ExpertTransformationDescription,
+    DeduplicationResult,
+    DeduplicationMethod,
+    DescriptionGroup,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class LLMDeduplicationService:
+    """LLM 去重服務：使用 LLM 成對比較判斷語意相似度"""
+
+    def __init__(self):
+        self.base_url = settings.ollama_base_url
+        self.default_model = "qwen3:4b"  # 快速模型，適合簡單判斷
+        self.client = httpx.AsyncClient(timeout=60.0)
+        self.max_concurrent = 5  # 最大並行數，避免 Ollama 過載
+
+    async def compare_pair(
+        self,
+        desc1: str,
+        desc2: str,
+        model: str,
+        semaphore: asyncio.Semaphore
+    ) -> bool:
+        """
+        讓 LLM 判斷兩個描述是否語意重複
+
+        Args:
+            desc1: 第一個描述
+            desc2: 第二個描述
+            model: LLM 模型名稱
+            semaphore: 並行控制信號量
+
+        Returns:
+            bool: 是否為重複描述
+        """
+        async with semaphore:  # 控制並行數
+            prompt = f"""判斷以下兩個創新描述是否表達相同或非常相似的概念：
+
+描述1: {desc1}
+
+描述2: {desc2}
+
+如果兩者描述的創新概念本質相同或非常相似，回答 "YES"
+如果兩者描述不同的創新概念，回答 "NO"
+只回答 YES 或 NO，不要其他文字"""
+
+            try:
+                response = await self.client.post(
+                    f"{self.base_url}/api/generate",
+                    json={
+                        "model": model,
+                        "prompt": prompt,
+                        "stream": False,
+                        "options": {
+                            "temperature": 0.1,  # 低溫度以獲得一致的判斷
+                            "num_predict": 10,   # 只需要短回答
+                        }
+                    }
+                )
+                response.raise_for_status()
+                result = response.json()["response"].strip().upper()
+                is_similar = result.startswith("YES")
+                logger.debug(f"LLM comparison: '{desc1[:30]}...' vs '{desc2[:30]}...' -> {result} ({is_similar})")
+                return is_similar
+            except Exception as e:
+                logger.error(f"LLM comparison failed: {e}")
+                return False  # 失敗時假設不相似
+
+    async def compare_batch(
+        self,
+        pairs: List[Tuple[int, int, str, str]],
+        model: str
+    ) -> List[Tuple[int, int, bool]]:
+        """
+        並行批次比較多個描述對
+
+        Args:
+            pairs: 待比較的配對列表 [(i, j, desc1, desc2), ...]
+            model: LLM 模型名稱
+
+        Returns:
+            比較結果列表 [(i, j, is_similar), ...]
+        """
+        semaphore = asyncio.Semaphore(self.max_concurrent)
+
+        async def compare_one(pair: Tuple[int, int, str, str]) -> Tuple[int, int, bool]:
+            i, j, desc1, desc2 = pair
+            is_similar = await self.compare_pair(desc1, desc2, model, semaphore)
+            return (i, j, is_similar)
+
+        # 使用 asyncio.gather 並行執行所有比較
+        results = await asyncio.gather(*[compare_one(p) for p in pairs])
+        return results
+
+    def cluster_by_similarity(
+        self,
+        similarity_matrix: np.ndarray,
+        threshold: float
+    ) -> List[List[int]]:
+        """
+        貪婪聚類：將相似度 >= threshold 的項目分組
+
+        與 embedding_service 使用相同的演算法
+        """
+        n = len(similarity_matrix)
+        assigned = [False] * n
+        groups = []
+
+        for i in range(n):
+            if assigned[i]:
+                continue
+
+            # 開始新的分組，以 item i 為代表
+            group = [i]
+            assigned[i] = True
+
+            # 找出所有與 i 相似的項目
+            for j in range(i + 1, n):
+                if not assigned[j] and similarity_matrix[i][j] >= threshold:
+                    group.append(j)
+                    assigned[j] = True
+
+            groups.append(group)
+
+        return groups
+
+    async def deduplicate(
+        self,
+        descriptions: List[ExpertTransformationDescription],
+        model: Optional[str] = None
+    ) -> DeduplicationResult:
+        """
+        使用 LLM 成對比較進行去重
+
+        Args:
+            descriptions: 要去重的描述列表
+            model: LLM 模型名稱
+
+        Returns:
+            DeduplicationResult: 去重結果
+        """
+        model = model or self.default_model
+
+        # 空輸入處理
+        if not descriptions:
+            return DeduplicationResult(
+                total_input=0,
+                total_groups=0,
+                total_duplicates=0,
+                groups=[],
+                threshold_used=0.5,  # LLM 方法固定使用 0.5 閾值
+                method_used=DeduplicationMethod.LLM,
+                model_used=model
+            )
+
+        n = len(descriptions)
+        similarity_matrix = np.zeros((n, n))
+
+        # 對角線為 1（自己與自己相似）
+        for i in range(n):
+            similarity_matrix[i][i] = 1.0
+
+        # 建立所有需要比較的配對
+        pairs = []
+        for i in range(n):
+            for j in range(i + 1, n):
+                pairs.append((
+                    i, j,
+                    descriptions[i].description,
+                    descriptions[j].description
+                ))
+
+        total_pairs = len(pairs)
+        logger.info(f"LLM deduplication: {total_pairs} pairs to compare (parallel={self.max_concurrent}, model={model})")
+
+        # 並行批次比較
+        results = await self.compare_batch(pairs, model)
+
+        # 填入相似度矩陣
+        for i, j, is_similar in results:
+            similarity_value = 1.0 if is_similar else 0.0
+            similarity_matrix[i][j] = similarity_value
+            similarity_matrix[j][i] = similarity_value
+
+        # 使用閾值 0.5 聚類（因為 LLM 輸出只有 0/1）
+        logger.info("Clustering results...")
+        clusters = self.cluster_by_similarity(similarity_matrix, 0.5)
+
+        # 建立結果分組
+        result_groups = []
+        total_duplicates = 0
+
+        for group_idx, indices in enumerate(clusters):
+            if len(indices) == 1:
+                # 獨立項目 - 無重複
+                result_groups.append(DescriptionGroup(
+                    group_id=f"group-{group_idx}",
+                    representative=descriptions[indices[0]],
+                    duplicates=[],
+                    similarity_scores=[]
+                ))
+            else:
+                # 有重複的分組 - 第一個為代表
+                rep_idx = indices[0]
+                dup_indices = indices[1:]
+                # LLM 方法的相似度分數都是 1.0（因為是 YES/NO 判斷）
+                dup_scores = [1.0 for _ in dup_indices]
+
+                result_groups.append(DescriptionGroup(
+                    group_id=f"group-{group_idx}",
+                    representative=descriptions[rep_idx],
+                    duplicates=[descriptions[idx] for idx in dup_indices],
+                    similarity_scores=dup_scores
+                ))
+                total_duplicates += len(dup_indices)
+
+        logger.info(f"LLM deduplication complete: {n} -> {len(result_groups)} groups, {total_duplicates} duplicates found")
+
+        return DeduplicationResult(
+            total_input=n,
+            total_groups=len(result_groups),
+            total_duplicates=total_duplicates,
+            groups=result_groups,
+            threshold_used=0.5,  # LLM 方法固定使用 0.5 閾值
+            method_used=DeduplicationMethod.LLM,
+            model_used=model
+        )
+
+    async def close(self):
+        """關閉 HTTP 客戶端"""
+        await self.client.aclose()
+
+
+# 全域實例
+llm_deduplication_service = LLMDeduplicationService()