""" Deduplication Router - 使用 Embedding 或 LLM 去重描述 提供 API 端點將相似的創新描述分組,幫助識別重複的想法。 支援兩種方法: - Embedding: 快速向量相似度比較 - LLM: 精準語意判斷(較慢但更準確) """ import logging from fastapi import APIRouter, HTTPException from ..models.schemas import DeduplicationRequest, DeduplicationResult, DeduplicationMethod from ..services.embedding_service import embedding_service from ..services.llm_deduplication_service import llm_deduplication_service logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/deduplication", tags=["deduplication"]) @router.post("/deduplicate", response_model=DeduplicationResult) async def deduplicate_descriptions(request: DeduplicationRequest) -> DeduplicationResult: """ 去重描述 支援兩種方法: - embedding: 使用向量相似度(快速) - llm: 使用 LLM 成對比較(精準但較慢) Args: request: 去重請求,包含描述列表、方法選擇和相關參數 Returns: DeduplicationResult: 去重結果,包含分組資訊 Raises: HTTPException: 如果去重處理失敗 """ method = request.method logger.info(f"Deduplication request: {len(request.descriptions)} descriptions, method={method.value}, threshold={request.similarity_threshold}") if not request.descriptions: return DeduplicationResult( total_input=0, total_groups=0, total_duplicates=0, groups=[], threshold_used=request.similarity_threshold, method_used=method, model_used=request.model or ("nomic-embed-text" if method == DeduplicationMethod.EMBEDDING else "qwen3:4b") ) try: if method == DeduplicationMethod.EMBEDDING: # 使用 Embedding 相似度去重 result = await embedding_service.deduplicate( descriptions=request.descriptions, threshold=request.similarity_threshold, model=request.model ) else: # 使用 LLM 成對比較去重 result = await llm_deduplication_service.deduplicate( descriptions=request.descriptions, model=request.model ) return result except ValueError as e: logger.error(f"Deduplication failed: {e}") raise HTTPException(status_code=500, detail=str(e)) except Exception as e: logger.error(f"Unexpected error during deduplication: {e}") raise HTTPException(status_code=500, detail=f"Deduplication failed: {str(e)}") @router.get("/models") async def list_embedding_models(): """ 列出可用的 Embedding 模型 Returns: dict: 可用模型列表和建議的預設模型 """ return { "default": "nomic-embed-text", "available": [ {"name": "nomic-embed-text", "description": "Fast and efficient embedding model"}, {"name": "mxbai-embed-large", "description": "High quality embeddings"}, {"name": "all-minilm", "description": "Lightweight embedding model"}, ], "note": "Run 'ollama pull ' to install a model" }