feat: Add Deduplication Agent with embedding and LLM methods
Implement a new Deduplication Agent that identifies and groups similar transformation descriptions. Supports two deduplication methods: - Embedding: Fast vector similarity comparison using cosine similarity - LLM: Accurate pairwise semantic comparison (slower but more precise) Backend changes: - Add deduplication router with /deduplicate endpoint - Add embedding_service for vector-based similarity - Add llm_deduplication_service for LLM-based comparison - Improve expert_transformation error handling and progress reporting Frontend changes: - Add DeduplicationPanel with interactive group visualization - Add useDeduplication hook for state management - Integrate deduplication tab in main App - Add threshold slider and method selector in sidebar 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,17 @@
|
||||
import { useState, useRef, useCallback, useEffect } from 'react';
|
||||
import { ConfigProvider, Layout, theme, Typography, Space, Tabs } from 'antd';
|
||||
import { ApartmentOutlined, ThunderboltOutlined } from '@ant-design/icons';
|
||||
import { ConfigProvider, Layout, theme, Typography, Space, Tabs, Slider, Radio } from 'antd';
|
||||
import { ApartmentOutlined, ThunderboltOutlined, FilterOutlined } from '@ant-design/icons';
|
||||
import { ThemeToggle } from './components/ThemeToggle';
|
||||
import { InputPanel } from './components/InputPanel';
|
||||
import { TransformationInputPanel } from './components/TransformationInputPanel';
|
||||
import { MindmapPanel } from './components/MindmapPanel';
|
||||
import { TransformationPanel } from './components/TransformationPanel';
|
||||
import { DeduplicationPanel } from './components/DeduplicationPanel';
|
||||
import { useAttribute } from './hooks/useAttribute';
|
||||
import { getModels } from './services/api';
|
||||
import type { MindmapDAGRef } from './components/MindmapDAG';
|
||||
import type { TransformationDAGRef } from './components/TransformationDAG';
|
||||
import type { CategoryMode, ExpertSource } from './types';
|
||||
import type { CategoryMode, ExpertSource, ExpertTransformationDAGResult, DeduplicationMethod } from './types';
|
||||
|
||||
const { Header, Sider, Content } = Layout;
|
||||
const { Title } = Typography;
|
||||
@@ -45,8 +46,14 @@ function App() {
|
||||
});
|
||||
const [customExpertsInput, setCustomExpertsInput] = useState('');
|
||||
const [expertSource, setExpertSource] = useState<ExpertSource>('llm');
|
||||
const [expertLanguage, setExpertLanguage] = useState<'en' | 'zh'>('en');
|
||||
const [shouldStartTransform, setShouldStartTransform] = useState(false);
|
||||
const [transformLoading, setTransformLoading] = useState(false);
|
||||
const [transformationResult, setTransformationResult] = useState<ExpertTransformationDAGResult | null>(null);
|
||||
|
||||
// Deduplication settings
|
||||
const [deduplicationThreshold, setDeduplicationThreshold] = useState(0.85);
|
||||
const [deduplicationMethod, setDeduplicationMethod] = useState<DeduplicationMethod>('embedding');
|
||||
|
||||
// Available models from API
|
||||
const [availableModels, setAvailableModels] = useState<string[]>([]);
|
||||
@@ -188,9 +195,32 @@ function App() {
|
||||
temperature={transformTemperature}
|
||||
expertConfig={expertConfig}
|
||||
expertSource={expertSource}
|
||||
expertLanguage={expertLanguage}
|
||||
shouldStartTransform={shouldStartTransform}
|
||||
onTransformComplete={() => setShouldStartTransform(false)}
|
||||
onLoadingChange={setTransformLoading}
|
||||
onResultsChange={setTransformationResult}
|
||||
/>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
{
|
||||
key: 'deduplication',
|
||||
label: (
|
||||
<span>
|
||||
<FilterOutlined style={{ marginRight: 8 }} />
|
||||
Deduplication
|
||||
</span>
|
||||
),
|
||||
children: (
|
||||
<div style={{ height: 'calc(100vh - 140px)' }}>
|
||||
<DeduplicationPanel
|
||||
transformationResult={transformationResult}
|
||||
isDark={isDark}
|
||||
threshold={deduplicationThreshold}
|
||||
onThresholdChange={setDeduplicationThreshold}
|
||||
method={deduplicationMethod}
|
||||
onMethodChange={setDeduplicationMethod}
|
||||
/>
|
||||
</div>
|
||||
),
|
||||
@@ -206,7 +236,7 @@ function App() {
|
||||
overflow: 'auto',
|
||||
}}
|
||||
>
|
||||
{activeTab === 'attribute' ? (
|
||||
{activeTab === 'attribute' && (
|
||||
<InputPanel
|
||||
loading={loading}
|
||||
progress={progress}
|
||||
@@ -218,7 +248,8 @@ function App() {
|
||||
visualSettings={visualSettings}
|
||||
onVisualSettingsChange={setVisualSettings}
|
||||
/>
|
||||
) : (
|
||||
)}
|
||||
{activeTab === 'transformation' && (
|
||||
<TransformationInputPanel
|
||||
onTransform={handleTransform}
|
||||
loading={transformLoading}
|
||||
@@ -229,14 +260,85 @@ function App() {
|
||||
expertConfig={expertConfig}
|
||||
customExpertsInput={customExpertsInput}
|
||||
expertSource={expertSource}
|
||||
expertLanguage={expertLanguage}
|
||||
onModelChange={setTransformModel}
|
||||
onTemperatureChange={setTransformTemperature}
|
||||
onExpertConfigChange={setExpertConfig}
|
||||
onCustomExpertsInputChange={setCustomExpertsInput}
|
||||
onExpertSourceChange={setExpertSource}
|
||||
onExpertLanguageChange={setExpertLanguage}
|
||||
availableModels={availableModels}
|
||||
/>
|
||||
)}
|
||||
{activeTab === 'deduplication' && (
|
||||
<div style={{ padding: 16 }}>
|
||||
<Typography.Title level={5} style={{ marginBottom: 16 }}>
|
||||
<FilterOutlined style={{ marginRight: 8 }} />
|
||||
Deduplication Settings
|
||||
</Typography.Title>
|
||||
|
||||
{/* Method Selection */}
|
||||
<div style={{ marginBottom: 20 }}>
|
||||
<Typography.Text strong style={{ display: 'block', marginBottom: 8 }}>
|
||||
Method
|
||||
</Typography.Text>
|
||||
<Radio.Group
|
||||
value={deduplicationMethod}
|
||||
onChange={(e) => setDeduplicationMethod(e.target.value)}
|
||||
buttonStyle="solid"
|
||||
style={{ width: '100%' }}
|
||||
>
|
||||
<Radio.Button value="embedding" style={{ width: '50%', textAlign: 'center' }}>
|
||||
Embedding
|
||||
</Radio.Button>
|
||||
<Radio.Button value="llm" style={{ width: '50%', textAlign: 'center' }}>
|
||||
LLM Judge
|
||||
</Radio.Button>
|
||||
</Radio.Group>
|
||||
<Typography.Text type="secondary" style={{ display: 'block', marginTop: 8, fontSize: 12 }}>
|
||||
{deduplicationMethod === 'embedding'
|
||||
? 'Fast vector similarity comparison'
|
||||
: 'Accurate but slower pairwise LLM comparison'}
|
||||
</Typography.Text>
|
||||
</div>
|
||||
|
||||
{/* Threshold Slider - Only for Embedding method */}
|
||||
{deduplicationMethod === 'embedding' && (
|
||||
<div style={{ marginBottom: 20 }}>
|
||||
<Typography.Text strong style={{ display: 'block', marginBottom: 8 }}>
|
||||
Similarity Threshold
|
||||
</Typography.Text>
|
||||
<Typography.Text type="secondary" style={{ display: 'block', marginBottom: 12, fontSize: 12 }}>
|
||||
Higher = stricter matching, fewer groups
|
||||
</Typography.Text>
|
||||
<Slider
|
||||
min={0.5}
|
||||
max={1.0}
|
||||
step={0.05}
|
||||
value={deduplicationThreshold}
|
||||
onChange={setDeduplicationThreshold}
|
||||
marks={{
|
||||
0.5: '50%',
|
||||
0.7: '70%',
|
||||
0.85: '85%',
|
||||
1.0: '100%',
|
||||
}}
|
||||
tooltip={{ formatter: (val) => `${((val ?? 0) * 100).toFixed(0)}%` }}
|
||||
/>
|
||||
<Typography.Text type="secondary" style={{ fontSize: 12 }}>
|
||||
Current: {(deduplicationThreshold * 100).toFixed(0)}% similarity required
|
||||
</Typography.Text>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* LLM Warning */}
|
||||
{deduplicationMethod === 'llm' && (
|
||||
<Typography.Text type="warning" style={{ display: 'block', fontSize: 12 }}>
|
||||
Note: LLM method requires N*(N-1)/2 comparisons. May take longer for many descriptions.
|
||||
</Typography.Text>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</Sider>
|
||||
</Layout>
|
||||
</Layout>
|
||||
|
||||
Reference in New Issue
Block a user