"""Expert 本地資料來源服務 從本地 JSON 檔案讀取職業資料,提供隨機選取功能。 """ import json import logging import random from pathlib import Path from typing import List, Tuple logger = logging.getLogger(__name__) # 資料目錄 DATA_DIR = Path(__file__).parent.parent / "data" class LocalDataProvider: """從本地 JSON 檔案讀取職業資料""" def __init__(self, source: str): """ Args: source: 資料來源名稱 (dbpedia/wikidata) """ self.source = source self._cache: dict = {} # 記憶體快取 def load_occupations(self, language: str = "en") -> List[dict]: """ 載入職業資料 Args: language: 語言代碼 (en/zh) Returns: 職業列表 [{"name": "...", "domain": "..."}, ...] """ cache_key = f"{self.source}:{language}" # 檢查記憶體快取 if cache_key in self._cache: return self._cache[cache_key] # 讀取檔案 file_path = DATA_DIR / f"{self.source}_occupations_{language}.json" if not file_path.exists(): logger.warning(f"資料檔案不存在: {file_path}") return [] try: with open(file_path, "r", encoding="utf-8") as f: data = json.load(f) occupations = data.get("occupations", []) logger.info(f"載入 {len(occupations)} 筆 {self.source} {language} 職業") # 存入快取 self._cache[cache_key] = occupations return occupations except Exception as e: logger.error(f"讀取職業資料失敗: {e}") return [] def random_select(self, count: int, language: str = "en") -> List[dict]: """ 隨機選取指定數量的職業 Args: count: 需要的數量 language: 語言代碼 Returns: 隨機選取的職業列表 """ all_occupations = self.load_occupations(language) if not all_occupations: return [] if len(all_occupations) <= count: return all_occupations return random.sample(all_occupations, count) class ExpertSourceService: """統一的專家來源服務""" def __init__(self): self.curated = LocalDataProvider("curated") # 精選職業 self.dbpedia = LocalDataProvider("dbpedia") self.wikidata = LocalDataProvider("wikidata") def get_experts( self, source: str, count: int, language: str = "en", fallback_to_llm: bool = True ) -> Tuple[List[dict], str]: """ 從指定來源獲取專家資料 Args: source: 來源類型 ("dbpedia" | "wikidata") count: 需要的專家數量 language: 語言代碼 fallback_to_llm: 失敗時是否允許 fallback(由呼叫者處理) Returns: (專家資料列表, 實際使用的來源) Raises: ValueError: 當獲取失敗且資料為空時 """ # 選擇 provider if source == "curated": provider = self.curated # 精選職業支援 zh 和 en,預設使用 zh if language not in ["zh", "en"]: language = "zh" elif source == "wikidata": provider = self.wikidata else: # 預設使用 dbpedia provider = self.dbpedia source = "dbpedia" experts = provider.random_select(count, language) if not experts: raise ValueError(f"No occupations found from {source} ({language})") logger.info(f"從 {source} 取得 {len(experts)} 位專家") return experts, source def get_available_sources(self) -> List[dict]: """ 取得可用的資料來源資訊 Returns: 來源資訊列表 """ sources = [] # 檢查精選職業(中文) curated_zh = DATA_DIR / "curated_occupations_zh.json" if curated_zh.exists(): with open(curated_zh, "r", encoding="utf-8") as f: data = json.load(f) sources.append({ "source": "curated", "language": "zh", "count": data["metadata"]["total_count"], "created_at": data["metadata"]["created_at"] }) # 檢查精選職業(英文) curated_en = DATA_DIR / "curated_occupations_en.json" if curated_en.exists(): with open(curated_en, "r", encoding="utf-8") as f: data = json.load(f) sources.append({ "source": "curated", "language": "en", "count": data["metadata"]["total_count"], "created_at": data["metadata"]["created_at"] }) # 檢查 DBpedia dbpedia_en = DATA_DIR / "dbpedia_occupations_en.json" if dbpedia_en.exists(): with open(dbpedia_en, "r", encoding="utf-8") as f: data = json.load(f) sources.append({ "source": "dbpedia", "language": "en", "count": data["metadata"]["total_count"], "fetched_at": data["metadata"]["fetched_at"] }) # 檢查 Wikidata wikidata_zh = DATA_DIR / "wikidata_occupations_zh.json" if wikidata_zh.exists(): with open(wikidata_zh, "r", encoding="utf-8") as f: data = json.load(f) sources.append({ "source": "wikidata", "language": "zh", "count": data["metadata"]["total_count"], "fetched_at": data["metadata"]["fetched_at"] }) return sources # 全域服務實例 expert_source_service = ExpertSourceService()