feat: Add Deduplication Agent with embedding and LLM methods

Implement a new Deduplication Agent that identifies and groups similar
transformation descriptions. Supports two deduplication methods:
- Embedding: Fast vector similarity comparison using cosine similarity
- LLM: Accurate pairwise semantic comparison (slower but more precise)

Backend changes:
- Add deduplication router with /deduplicate endpoint
- Add embedding_service for vector-based similarity
- Add llm_deduplication_service for LLM-based comparison
- Improve expert_transformation error handling and progress reporting

Frontend changes:
- Add DeduplicationPanel with interactive group visualization
- Add useDeduplication hook for state management
- Integrate deduplication tab in main App
- Add threshold slider and method selector in sidebar

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-22 20:26:17 +08:00
parent 5571076406
commit bc281b8e0a
18 changed files with 1397 additions and 25 deletions

View File

@@ -1,16 +1,17 @@
import { useState, useRef, useCallback, useEffect } from 'react';
import { ConfigProvider, Layout, theme, Typography, Space, Tabs } from 'antd';
import { ApartmentOutlined, ThunderboltOutlined } from '@ant-design/icons';
import { ConfigProvider, Layout, theme, Typography, Space, Tabs, Slider, Radio } from 'antd';
import { ApartmentOutlined, ThunderboltOutlined, FilterOutlined } from '@ant-design/icons';
import { ThemeToggle } from './components/ThemeToggle';
import { InputPanel } from './components/InputPanel';
import { TransformationInputPanel } from './components/TransformationInputPanel';
import { MindmapPanel } from './components/MindmapPanel';
import { TransformationPanel } from './components/TransformationPanel';
import { DeduplicationPanel } from './components/DeduplicationPanel';
import { useAttribute } from './hooks/useAttribute';
import { getModels } from './services/api';
import type { MindmapDAGRef } from './components/MindmapDAG';
import type { TransformationDAGRef } from './components/TransformationDAG';
import type { CategoryMode, ExpertSource } from './types';
import type { CategoryMode, ExpertSource, ExpertTransformationDAGResult, DeduplicationMethod } from './types';
const { Header, Sider, Content } = Layout;
const { Title } = Typography;
@@ -45,8 +46,14 @@ function App() {
});
const [customExpertsInput, setCustomExpertsInput] = useState('');
const [expertSource, setExpertSource] = useState<ExpertSource>('llm');
const [expertLanguage, setExpertLanguage] = useState<'en' | 'zh'>('en');
const [shouldStartTransform, setShouldStartTransform] = useState(false);
const [transformLoading, setTransformLoading] = useState(false);
const [transformationResult, setTransformationResult] = useState<ExpertTransformationDAGResult | null>(null);
// Deduplication settings
const [deduplicationThreshold, setDeduplicationThreshold] = useState(0.85);
const [deduplicationMethod, setDeduplicationMethod] = useState<DeduplicationMethod>('embedding');
// Available models from API
const [availableModels, setAvailableModels] = useState<string[]>([]);
@@ -188,9 +195,32 @@ function App() {
temperature={transformTemperature}
expertConfig={expertConfig}
expertSource={expertSource}
expertLanguage={expertLanguage}
shouldStartTransform={shouldStartTransform}
onTransformComplete={() => setShouldStartTransform(false)}
onLoadingChange={setTransformLoading}
onResultsChange={setTransformationResult}
/>
</div>
),
},
{
key: 'deduplication',
label: (
<span>
<FilterOutlined style={{ marginRight: 8 }} />
Deduplication
</span>
),
children: (
<div style={{ height: 'calc(100vh - 140px)' }}>
<DeduplicationPanel
transformationResult={transformationResult}
isDark={isDark}
threshold={deduplicationThreshold}
onThresholdChange={setDeduplicationThreshold}
method={deduplicationMethod}
onMethodChange={setDeduplicationMethod}
/>
</div>
),
@@ -206,7 +236,7 @@ function App() {
overflow: 'auto',
}}
>
{activeTab === 'attribute' ? (
{activeTab === 'attribute' && (
<InputPanel
loading={loading}
progress={progress}
@@ -218,7 +248,8 @@ function App() {
visualSettings={visualSettings}
onVisualSettingsChange={setVisualSettings}
/>
) : (
)}
{activeTab === 'transformation' && (
<TransformationInputPanel
onTransform={handleTransform}
loading={transformLoading}
@@ -229,14 +260,85 @@ function App() {
expertConfig={expertConfig}
customExpertsInput={customExpertsInput}
expertSource={expertSource}
expertLanguage={expertLanguage}
onModelChange={setTransformModel}
onTemperatureChange={setTransformTemperature}
onExpertConfigChange={setExpertConfig}
onCustomExpertsInputChange={setCustomExpertsInput}
onExpertSourceChange={setExpertSource}
onExpertLanguageChange={setExpertLanguage}
availableModels={availableModels}
/>
)}
{activeTab === 'deduplication' && (
<div style={{ padding: 16 }}>
<Typography.Title level={5} style={{ marginBottom: 16 }}>
<FilterOutlined style={{ marginRight: 8 }} />
Deduplication Settings
</Typography.Title>
{/* Method Selection */}
<div style={{ marginBottom: 20 }}>
<Typography.Text strong style={{ display: 'block', marginBottom: 8 }}>
Method
</Typography.Text>
<Radio.Group
value={deduplicationMethod}
onChange={(e) => setDeduplicationMethod(e.target.value)}
buttonStyle="solid"
style={{ width: '100%' }}
>
<Radio.Button value="embedding" style={{ width: '50%', textAlign: 'center' }}>
Embedding
</Radio.Button>
<Radio.Button value="llm" style={{ width: '50%', textAlign: 'center' }}>
LLM Judge
</Radio.Button>
</Radio.Group>
<Typography.Text type="secondary" style={{ display: 'block', marginTop: 8, fontSize: 12 }}>
{deduplicationMethod === 'embedding'
? 'Fast vector similarity comparison'
: 'Accurate but slower pairwise LLM comparison'}
</Typography.Text>
</div>
{/* Threshold Slider - Only for Embedding method */}
{deduplicationMethod === 'embedding' && (
<div style={{ marginBottom: 20 }}>
<Typography.Text strong style={{ display: 'block', marginBottom: 8 }}>
Similarity Threshold
</Typography.Text>
<Typography.Text type="secondary" style={{ display: 'block', marginBottom: 12, fontSize: 12 }}>
Higher = stricter matching, fewer groups
</Typography.Text>
<Slider
min={0.5}
max={1.0}
step={0.05}
value={deduplicationThreshold}
onChange={setDeduplicationThreshold}
marks={{
0.5: '50%',
0.7: '70%',
0.85: '85%',
1.0: '100%',
}}
tooltip={{ formatter: (val) => `${((val ?? 0) * 100).toFixed(0)}%` }}
/>
<Typography.Text type="secondary" style={{ fontSize: 12 }}>
Current: {(deduplicationThreshold * 100).toFixed(0)}% similarity required
</Typography.Text>
</div>
)}
{/* LLM Warning */}
{deduplicationMethod === 'llm' && (
<Typography.Text type="warning" style={{ display: 'block', fontSize: 12 }}>
Note: LLM method requires N*(N-1)/2 comparisons. May take longer for many descriptions.
</Typography.Text>
)}
</div>
)}
</Sider>
</Layout>
</Layout>