feat: Add Deduplication Agent with embedding and LLM methods
Implement a new Deduplication Agent that identifies and groups similar transformation descriptions. Supports two deduplication methods: - Embedding: Fast vector similarity comparison using cosine similarity - LLM: Accurate pairwise semantic comparison (slower but more precise) Backend changes: - Add deduplication router with /deduplicate endpoint - Add embedding_service for vector-based similarity - Add llm_deduplication_service for LLM-based comparison - Improve expert_transformation error handling and progress reporting Frontend changes: - Add DeduplicationPanel with interactive group visualization - Add useDeduplication hook for state management - Integrate deduplication tab in main App - Add threshold slider and method selector in sidebar 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -221,8 +221,27 @@ async def generate_expert_transformation_events(
|
||||
desc_prompt, model=model, temperature=temperature
|
||||
)
|
||||
|
||||
desc_data = extract_json_from_response(desc_response)
|
||||
desc_text = desc_data.get("description", "")
|
||||
# 嘗試解析 JSON,若失敗則使用原始回應作為描述
|
||||
desc_text = ""
|
||||
try:
|
||||
desc_data = extract_json_from_response(desc_response)
|
||||
# 支援多種可能的 key: description, content, text, desc
|
||||
desc_text = (
|
||||
desc_data.get("description") or
|
||||
desc_data.get("content") or
|
||||
desc_data.get("text") or
|
||||
desc_data.get("desc") or
|
||||
""
|
||||
)
|
||||
except ValueError:
|
||||
# JSON 解析失敗,嘗試清理原始回應作為描述
|
||||
cleaned = desc_response.strip()
|
||||
# 移除可能的 markdown 和多餘符號
|
||||
if cleaned.startswith('"') and cleaned.endswith('"'):
|
||||
cleaned = cleaned[1:-1]
|
||||
if len(cleaned) > 5 and len(cleaned) < 100:
|
||||
desc_text = cleaned
|
||||
logger.info(f"[DESC] 使用 fallback 描述 for '{kw.keyword}': {desc_text[:50]}")
|
||||
|
||||
if desc_text:
|
||||
descriptions.append(ExpertTransformationDescription(
|
||||
@@ -231,15 +250,22 @@ async def generate_expert_transformation_events(
|
||||
expert_name=kw.expert_name,
|
||||
description=desc_text
|
||||
))
|
||||
else:
|
||||
logger.warning(f"[DESC] Empty description for keyword='{kw.keyword}', parsed_data={desc_data}")
|
||||
|
||||
# Send progress update
|
||||
yield f"event: description_progress\ndata: {json.dumps({'current': idx + 1, 'total': len(all_expert_keywords), 'keyword': kw.keyword}, ensure_ascii=False)}\n\n"
|
||||
# Send progress update with success/fail status
|
||||
yield f"event: description_progress\ndata: {json.dumps({'current': idx + 1, 'total': len(all_expert_keywords), 'keyword': kw.keyword, 'success': bool(desc_text)}, ensure_ascii=False)}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to generate description for '{kw.keyword}': {e}")
|
||||
logger.warning(f"[DESC] Failed to generate description for '{kw.keyword}': {e}")
|
||||
yield f"event: description_progress\ndata: {json.dumps({'current': idx + 1, 'total': len(all_expert_keywords), 'keyword': kw.keyword, 'success': False, 'error': str(e)}, ensure_ascii=False)}\n\n"
|
||||
# Continue with next keyword
|
||||
|
||||
yield f"event: description_complete\ndata: {json.dumps({'count': len(descriptions)}, ensure_ascii=False)}\n\n"
|
||||
# 統計成功率
|
||||
success_rate = len(descriptions) / len(all_expert_keywords) * 100 if all_expert_keywords else 0
|
||||
logger.info(f"[DESC] 描述生成完成: {len(descriptions)}/{len(all_expert_keywords)} 成功 ({success_rate:.1f}%)")
|
||||
|
||||
yield f"event: description_complete\ndata: {json.dumps({'count': len(descriptions), 'total': len(all_expert_keywords), 'success_rate': success_rate}, ensure_ascii=False)}\n\n"
|
||||
|
||||
# ========== Build final result ==========
|
||||
result = ExpertTransformationCategoryResult(
|
||||
|
||||
Reference in New Issue
Block a user