feat: Add external expert sources (Wikidata SPARQL + ConceptNet API)

- Add expert_cache.py: TTL-based in-memory cache (1 hour default)
- Add expert_source_service.py: WikidataProvider and ConceptNetProvider
  - Wikidata SPARQL queries for occupations with Chinese labels
  - ConceptNet API queries for occupation-related concepts
  - Random selection from cached pool
- Update schemas.py: Add ExpertSource enum (llm/wikidata/conceptnet)
- Update ExpertTransformationRequest with expert_source and expert_language
- Update router: Conditionally use external sources with LLM fallback
  - New SSE events: expert_source, expert_fallback
- Update frontend types with ExpertSource

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-04 11:42:48 +08:00
parent baea210109
commit 43785db595
5 changed files with 524 additions and 22 deletions

View File

@@ -0,0 +1,92 @@
"""Expert 資料快取模組
提供 TTL-based 快取機制,減少外部 API 呼叫。
"""
import time
from dataclasses import dataclass
from typing import Dict, List, Optional
@dataclass
class CacheEntry:
"""快取項目"""
data: List[dict]
timestamp: float
class ExpertCache:
"""TTL 快取,用於儲存外部來源的職業資料"""
def __init__(self, ttl_seconds: int = 3600):
"""
初始化快取
Args:
ttl_seconds: 快取存活時間(預設 1 小時)
"""
self._cache: Dict[str, CacheEntry] = {}
self._ttl = ttl_seconds
def get(self, key: str) -> Optional[List[dict]]:
"""
取得快取資料
Args:
key: 快取鍵(如 "wikidata:zh:occupations"
Returns:
快取的資料列表,若不存在或已過期則回傳 None
"""
entry = self._cache.get(key)
if entry is None:
return None
# 檢查是否過期
if time.time() - entry.timestamp > self._ttl:
del self._cache[key]
return None
return entry.data
def set(self, key: str, data: List[dict]) -> None:
"""
設定快取資料
Args:
key: 快取鍵
data: 要快取的資料列表
"""
self._cache[key] = CacheEntry(
data=data,
timestamp=time.time()
)
def invalidate(self, key: Optional[str] = None) -> None:
"""
清除快取
Args:
key: 要清除的鍵,若為 None 則清除全部
"""
if key is None:
self._cache.clear()
elif key in self._cache:
del self._cache[key]
def get_stats(self) -> dict:
"""取得快取統計資訊"""
now = time.time()
valid_count = sum(
1 for entry in self._cache.values()
if now - entry.timestamp <= self._ttl
)
return {
"total_entries": len(self._cache),
"valid_entries": valid_count,
"ttl_seconds": self._ttl
}
# 全域快取實例
expert_cache = ExpertCache()