Files
novelty-seeking/backend/app/models/schemas.py
2026-01-05 22:32:08 +08:00

281 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from pydantic import BaseModel
from typing import Optional, List, Dict, Literal
from enum import Enum
# Language type for prompts
LanguageType = Literal["zh", "en"]
class AttributeNode(BaseModel):
name: str
category: Optional[str] = None # 材料, 功能, 用途, 使用族群
children: Optional[List["AttributeNode"]] = None
AttributeNode.model_rebuild()
class AnalyzeRequest(BaseModel):
query: str
model: Optional[str] = None
temperature: Optional[float] = 0.7
categories: Optional[List[str]] = None # 如果為 None使用預設類別
class AnalyzeResponse(BaseModel):
query: str
attributes: AttributeNode
class ModelListResponse(BaseModel):
models: List[str]
# ===== Multi-step streaming schemas =====
class Step1Result(BaseModel):
"""Step 1 的結果:各類別屬性列表"""
materials: List[str]
functions: List[str]
usages: List[str]
users: List[str]
class CausalChain(BaseModel):
"""單條因果鏈"""
material: str
function: str
usage: str
user: str
class StreamAnalyzeRequest(BaseModel):
"""Multi-step analysis request (updated to support dynamic categories)"""
query: str
model: Optional[str] = None
temperature: Optional[float] = 0.7
chain_count: int = 5 # User can set how many causal chains to generate
# Dynamic category support
category_mode: Optional[str] = "dynamic_auto" # CategoryMode enum value
custom_categories: Optional[List[str]] = None
suggested_category_count: int = 3 # Suggest LLM to generate this many categories
# Language setting
lang: LanguageType = "zh"
class StreamAnalyzeResponse(BaseModel):
"""最終完整結果"""
query: str
step1_result: Step1Result
causal_chains: List[CausalChain]
attributes: AttributeNode
# ===== Dynamic category system schemas =====
class CategoryMode(str, Enum):
"""類別模式"""
FIXED_ONLY = "fixed_only"
FIXED_PLUS_CUSTOM = "fixed_plus_custom"
FIXED_PLUS_DYNAMIC = "fixed_plus_dynamic" # Fixed + LLM suggested
CUSTOM_ONLY = "custom_only"
DYNAMIC_AUTO = "dynamic_auto"
class CategoryDefinition(BaseModel):
"""類別定義"""
name: str
description: Optional[str] = None
is_fixed: bool = True # LLM 生成的為 False
order: int = 0
class Step0Result(BaseModel):
"""Step 0: LLM 分析建議類別"""
categories: List[CategoryDefinition]
class DynamicStep1Result(BaseModel):
"""動態版本的 Step 1 結果"""
attributes: Dict[str, List[str]] # {類別名: [屬性列表]}
class DynamicCausalChain(BaseModel):
"""動態版本的因果鏈"""
chain: Dict[str, str] # {類別名: 選中屬性}
# ===== DAG (Directed Acyclic Graph) schemas =====
class DAGNode(BaseModel):
"""DAG 節點 - 每個屬性只出現一次"""
id: str # 唯一 ID: "{category}_{index}"
name: str # 顯示名稱
category: str # 所屬類別
order: int # 欄位內位置
class DAGEdge(BaseModel):
"""DAG 邊 - 節點之間的連接"""
source_id: str
target_id: str
class AttributeDAG(BaseModel):
"""完整 DAG 結構"""
query: str
categories: List[CategoryDefinition]
nodes: List[DAGNode]
edges: List[DAGEdge]
class DAGRelationship(BaseModel):
"""Step 2 輸出 - 單一關係"""
source_category: str
source: str # source attribute name
target_category: str
target: str # target attribute name
# ===== Transformation Agent schemas =====
class TransformationRequest(BaseModel):
"""Transformation Agent request"""
query: str # Original query (e.g., "bicycle")
category: str # Category name (e.g., "Functions")
attributes: List[str] # Attribute list for this category
model: Optional[str] = None
temperature: Optional[float] = 0.7
keyword_count: int = 3 # Number of new keywords to generate
lang: LanguageType = "zh" # Language for prompts
class TransformationDescription(BaseModel):
"""單一轉換描述"""
keyword: str # 新關鍵字
description: str # 與 query 結合的描述
class TransformationCategoryResult(BaseModel):
"""單一類別的轉換結果"""
category: str
original_attributes: List[str] # 原始屬性
new_keywords: List[str] # 新生成的關鍵字
descriptions: List[TransformationDescription]
class TransformationDAGResult(BaseModel):
"""完整 Transformation 結果"""
query: str
results: List[TransformationCategoryResult]
# ===== Expert Transformation Agent schemas =====
class ExpertProfile(BaseModel):
"""專家檔案"""
id: str # e.g., "expert-0"
name: str # e.g., "藥師"
domain: str # e.g., "醫療與健康"
perspective: Optional[str] = None # e.g., "從藥物與健康管理角度思考"
class ExpertKeyword(BaseModel):
"""專家視角生成的關鍵字"""
keyword: str # 關鍵字本身
expert_id: str # 哪個專家生成的
expert_name: str # 專家名稱(冗餘,方便前端)
source_attribute: str # 來自哪個原始屬性
class ExpertTransformationDescription(BaseModel):
"""專家關鍵字的描述"""
keyword: str
expert_id: str
expert_name: str
description: str
class ExpertTransformationCategoryResult(BaseModel):
"""單一類別的轉換結果(專家版)"""
category: str
original_attributes: List[str]
expert_keywords: List[ExpertKeyword] # 所有專家生成的關鍵字
descriptions: List[ExpertTransformationDescription]
class ExpertTransformationDAGResult(BaseModel):
"""完整轉換結果(專家版)"""
query: str
experts: List[ExpertProfile] # 使用的專家列表
results: List[ExpertTransformationCategoryResult]
class ExpertSource(str, Enum):
"""專家來源類型"""
LLM = "llm"
CURATED = "curated" # 精選職業210筆含具體領域
DBPEDIA = "dbpedia"
WIKIDATA = "wikidata"
class ExpertTransformationRequest(BaseModel):
"""Expert Transformation Agent request"""
query: str
category: str
attributes: List[str]
# Expert parameters
expert_count: int = 3 # Number of experts (2-8)
keywords_per_expert: int = 1 # Keywords per expert per attribute (1-3)
custom_experts: Optional[List[str]] = None # User-specified experts
# Expert source parameters
expert_source: ExpertSource = ExpertSource.LLM # Expert source
expert_language: str = "en" # Language for external sources
# LLM parameters
model: Optional[str] = None
temperature: Optional[float] = 0.7
# Prompt language
lang: LanguageType = "zh"
# ===== Deduplication Agent schemas =====
class DeduplicationMethod(str, Enum):
"""去重方法"""
EMBEDDING = "embedding" # 向量相似度
LLM = "llm" # LLM 成對判斷
class DeduplicationRequest(BaseModel):
"""Deduplication request"""
descriptions: List[ExpertTransformationDescription]
method: DeduplicationMethod = DeduplicationMethod.EMBEDDING # Deduplication method
similarity_threshold: float = 0.85 # Cosine similarity threshold (0.0-1.0), only for Embedding
model: Optional[str] = None # Embedding/LLM model
lang: LanguageType = "zh" # Prompt language (for LLM method)
class DescriptionGroup(BaseModel):
"""相似描述分組"""
group_id: str # "group-0", "group-1"...
representative: ExpertTransformationDescription # 代表描述
duplicates: List[ExpertTransformationDescription] # 相似描述
similarity_scores: List[float] # 每個重複項的相似度分數
class DeduplicationResult(BaseModel):
"""去重結果"""
total_input: int # 輸入描述總數
total_groups: int # 分組數量
total_duplicates: int # 重複項數量
groups: List[DescriptionGroup]
threshold_used: float
method_used: DeduplicationMethod # 使用的去重方法
model_used: str # 使用的模型