feat: Add Deduplication Agent with embedding and LLM methods
Implement a new Deduplication Agent that identifies and groups similar transformation descriptions. Supports two deduplication methods: - Embedding: Fast vector similarity comparison using cosine similarity - LLM: Accurate pairwise semantic comparison (slower but more precise) Backend changes: - Add deduplication router with /deduplicate endpoint - Add embedding_service for vector-based similarity - Add llm_deduplication_service for LLM-based comparison - Improve expert_transformation error handling and progress reporting Frontend changes: - Add DeduplicationPanel with interactive group visualization - Add useDeduplication hook for state management - Integrate deduplication tab in main App - Add threshold slider and method selector in sidebar 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,16 +1,17 @@
|
||||
import { useState, useRef, useCallback, useEffect } from 'react';
|
||||
import { ConfigProvider, Layout, theme, Typography, Space, Tabs } from 'antd';
|
||||
import { ApartmentOutlined, ThunderboltOutlined } from '@ant-design/icons';
|
||||
import { ConfigProvider, Layout, theme, Typography, Space, Tabs, Slider, Radio } from 'antd';
|
||||
import { ApartmentOutlined, ThunderboltOutlined, FilterOutlined } from '@ant-design/icons';
|
||||
import { ThemeToggle } from './components/ThemeToggle';
|
||||
import { InputPanel } from './components/InputPanel';
|
||||
import { TransformationInputPanel } from './components/TransformationInputPanel';
|
||||
import { MindmapPanel } from './components/MindmapPanel';
|
||||
import { TransformationPanel } from './components/TransformationPanel';
|
||||
import { DeduplicationPanel } from './components/DeduplicationPanel';
|
||||
import { useAttribute } from './hooks/useAttribute';
|
||||
import { getModels } from './services/api';
|
||||
import type { MindmapDAGRef } from './components/MindmapDAG';
|
||||
import type { TransformationDAGRef } from './components/TransformationDAG';
|
||||
import type { CategoryMode, ExpertSource } from './types';
|
||||
import type { CategoryMode, ExpertSource, ExpertTransformationDAGResult, DeduplicationMethod } from './types';
|
||||
|
||||
const { Header, Sider, Content } = Layout;
|
||||
const { Title } = Typography;
|
||||
@@ -45,8 +46,14 @@ function App() {
|
||||
});
|
||||
const [customExpertsInput, setCustomExpertsInput] = useState('');
|
||||
const [expertSource, setExpertSource] = useState<ExpertSource>('llm');
|
||||
const [expertLanguage, setExpertLanguage] = useState<'en' | 'zh'>('en');
|
||||
const [shouldStartTransform, setShouldStartTransform] = useState(false);
|
||||
const [transformLoading, setTransformLoading] = useState(false);
|
||||
const [transformationResult, setTransformationResult] = useState<ExpertTransformationDAGResult | null>(null);
|
||||
|
||||
// Deduplication settings
|
||||
const [deduplicationThreshold, setDeduplicationThreshold] = useState(0.85);
|
||||
const [deduplicationMethod, setDeduplicationMethod] = useState<DeduplicationMethod>('embedding');
|
||||
|
||||
// Available models from API
|
||||
const [availableModels, setAvailableModels] = useState<string[]>([]);
|
||||
@@ -188,9 +195,32 @@ function App() {
|
||||
temperature={transformTemperature}
|
||||
expertConfig={expertConfig}
|
||||
expertSource={expertSource}
|
||||
expertLanguage={expertLanguage}
|
||||
shouldStartTransform={shouldStartTransform}
|
||||
onTransformComplete={() => setShouldStartTransform(false)}
|
||||
onLoadingChange={setTransformLoading}
|
||||
onResultsChange={setTransformationResult}
|
||||
/>
|
||||
</div>
|
||||
),
|
||||
},
|
||||
{
|
||||
key: 'deduplication',
|
||||
label: (
|
||||
<span>
|
||||
<FilterOutlined style={{ marginRight: 8 }} />
|
||||
Deduplication
|
||||
</span>
|
||||
),
|
||||
children: (
|
||||
<div style={{ height: 'calc(100vh - 140px)' }}>
|
||||
<DeduplicationPanel
|
||||
transformationResult={transformationResult}
|
||||
isDark={isDark}
|
||||
threshold={deduplicationThreshold}
|
||||
onThresholdChange={setDeduplicationThreshold}
|
||||
method={deduplicationMethod}
|
||||
onMethodChange={setDeduplicationMethod}
|
||||
/>
|
||||
</div>
|
||||
),
|
||||
@@ -206,7 +236,7 @@ function App() {
|
||||
overflow: 'auto',
|
||||
}}
|
||||
>
|
||||
{activeTab === 'attribute' ? (
|
||||
{activeTab === 'attribute' && (
|
||||
<InputPanel
|
||||
loading={loading}
|
||||
progress={progress}
|
||||
@@ -218,7 +248,8 @@ function App() {
|
||||
visualSettings={visualSettings}
|
||||
onVisualSettingsChange={setVisualSettings}
|
||||
/>
|
||||
) : (
|
||||
)}
|
||||
{activeTab === 'transformation' && (
|
||||
<TransformationInputPanel
|
||||
onTransform={handleTransform}
|
||||
loading={transformLoading}
|
||||
@@ -229,14 +260,85 @@ function App() {
|
||||
expertConfig={expertConfig}
|
||||
customExpertsInput={customExpertsInput}
|
||||
expertSource={expertSource}
|
||||
expertLanguage={expertLanguage}
|
||||
onModelChange={setTransformModel}
|
||||
onTemperatureChange={setTransformTemperature}
|
||||
onExpertConfigChange={setExpertConfig}
|
||||
onCustomExpertsInputChange={setCustomExpertsInput}
|
||||
onExpertSourceChange={setExpertSource}
|
||||
onExpertLanguageChange={setExpertLanguage}
|
||||
availableModels={availableModels}
|
||||
/>
|
||||
)}
|
||||
{activeTab === 'deduplication' && (
|
||||
<div style={{ padding: 16 }}>
|
||||
<Typography.Title level={5} style={{ marginBottom: 16 }}>
|
||||
<FilterOutlined style={{ marginRight: 8 }} />
|
||||
Deduplication Settings
|
||||
</Typography.Title>
|
||||
|
||||
{/* Method Selection */}
|
||||
<div style={{ marginBottom: 20 }}>
|
||||
<Typography.Text strong style={{ display: 'block', marginBottom: 8 }}>
|
||||
Method
|
||||
</Typography.Text>
|
||||
<Radio.Group
|
||||
value={deduplicationMethod}
|
||||
onChange={(e) => setDeduplicationMethod(e.target.value)}
|
||||
buttonStyle="solid"
|
||||
style={{ width: '100%' }}
|
||||
>
|
||||
<Radio.Button value="embedding" style={{ width: '50%', textAlign: 'center' }}>
|
||||
Embedding
|
||||
</Radio.Button>
|
||||
<Radio.Button value="llm" style={{ width: '50%', textAlign: 'center' }}>
|
||||
LLM Judge
|
||||
</Radio.Button>
|
||||
</Radio.Group>
|
||||
<Typography.Text type="secondary" style={{ display: 'block', marginTop: 8, fontSize: 12 }}>
|
||||
{deduplicationMethod === 'embedding'
|
||||
? 'Fast vector similarity comparison'
|
||||
: 'Accurate but slower pairwise LLM comparison'}
|
||||
</Typography.Text>
|
||||
</div>
|
||||
|
||||
{/* Threshold Slider - Only for Embedding method */}
|
||||
{deduplicationMethod === 'embedding' && (
|
||||
<div style={{ marginBottom: 20 }}>
|
||||
<Typography.Text strong style={{ display: 'block', marginBottom: 8 }}>
|
||||
Similarity Threshold
|
||||
</Typography.Text>
|
||||
<Typography.Text type="secondary" style={{ display: 'block', marginBottom: 12, fontSize: 12 }}>
|
||||
Higher = stricter matching, fewer groups
|
||||
</Typography.Text>
|
||||
<Slider
|
||||
min={0.5}
|
||||
max={1.0}
|
||||
step={0.05}
|
||||
value={deduplicationThreshold}
|
||||
onChange={setDeduplicationThreshold}
|
||||
marks={{
|
||||
0.5: '50%',
|
||||
0.7: '70%',
|
||||
0.85: '85%',
|
||||
1.0: '100%',
|
||||
}}
|
||||
tooltip={{ formatter: (val) => `${((val ?? 0) * 100).toFixed(0)}%` }}
|
||||
/>
|
||||
<Typography.Text type="secondary" style={{ fontSize: 12 }}>
|
||||
Current: {(deduplicationThreshold * 100).toFixed(0)}% similarity required
|
||||
</Typography.Text>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* LLM Warning */}
|
||||
{deduplicationMethod === 'llm' && (
|
||||
<Typography.Text type="warning" style={{ display: 'block', fontSize: 12 }}>
|
||||
Note: LLM method requires N*(N-1)/2 comparisons. May take longer for many descriptions.
|
||||
</Typography.Text>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</Sider>
|
||||
</Layout>
|
||||
</Layout>
|
||||
|
||||
271
frontend/src/components/DeduplicationPanel.tsx
Normal file
271
frontend/src/components/DeduplicationPanel.tsx
Normal file
@@ -0,0 +1,271 @@
|
||||
import React, { useEffect, useMemo } from 'react';
|
||||
import {
|
||||
Card,
|
||||
Button,
|
||||
Slider,
|
||||
Statistic,
|
||||
Row,
|
||||
Col,
|
||||
Empty,
|
||||
Spin,
|
||||
Alert,
|
||||
Typography,
|
||||
Space,
|
||||
Divider,
|
||||
} from 'antd';
|
||||
import {
|
||||
FilterOutlined,
|
||||
ReloadOutlined,
|
||||
CheckCircleOutlined,
|
||||
ClusterOutlined,
|
||||
CopyOutlined,
|
||||
} from '@ant-design/icons';
|
||||
import { useDeduplication } from '../hooks/useDeduplication';
|
||||
import { GroupCard } from './deduplication/GroupCard';
|
||||
import type {
|
||||
ExpertTransformationDAGResult,
|
||||
ExpertTransformationDescription,
|
||||
DeduplicationMethod,
|
||||
} from '../types';
|
||||
|
||||
const { Title, Text } = Typography;
|
||||
|
||||
interface DeduplicationPanelProps {
|
||||
transformationResult: ExpertTransformationDAGResult | null;
|
||||
isDark: boolean;
|
||||
threshold: number;
|
||||
onThresholdChange: (value: number) => void;
|
||||
method: DeduplicationMethod;
|
||||
onMethodChange?: (method: DeduplicationMethod) => void; // Optional, handled in App.tsx sidebar
|
||||
}
|
||||
|
||||
/**
|
||||
* Panel for deduplicating transformation descriptions
|
||||
*/
|
||||
export const DeduplicationPanel: React.FC<DeduplicationPanelProps> = ({
|
||||
transformationResult,
|
||||
isDark,
|
||||
threshold,
|
||||
onThresholdChange,
|
||||
method,
|
||||
// onMethodChange is handled in App.tsx sidebar
|
||||
}) => {
|
||||
const { loading, result, error, progress, deduplicate, clearResult } = useDeduplication();
|
||||
|
||||
// Extract all descriptions from transformation result
|
||||
const allDescriptions = useMemo<ExpertTransformationDescription[]>(() => {
|
||||
if (!transformationResult) return [];
|
||||
|
||||
const descriptions: ExpertTransformationDescription[] = [];
|
||||
for (const categoryResult of transformationResult.results) {
|
||||
descriptions.push(...categoryResult.descriptions);
|
||||
}
|
||||
return descriptions;
|
||||
}, [transformationResult]);
|
||||
|
||||
// Clear result when transformation result or method changes
|
||||
useEffect(() => {
|
||||
clearResult();
|
||||
}, [transformationResult, method, clearResult]);
|
||||
|
||||
const handleDeduplicate = () => {
|
||||
if (allDescriptions.length > 0) {
|
||||
deduplicate(allDescriptions, threshold, method);
|
||||
}
|
||||
};
|
||||
|
||||
const containerStyle: React.CSSProperties = {
|
||||
height: '100%',
|
||||
display: 'flex',
|
||||
flexDirection: 'column',
|
||||
padding: 16,
|
||||
overflow: 'hidden',
|
||||
};
|
||||
|
||||
const headerCardStyle: React.CSSProperties = {
|
||||
marginBottom: 16,
|
||||
background: isDark ? '#1f1f1f' : '#fff',
|
||||
borderRadius: 8,
|
||||
};
|
||||
|
||||
const resultsContainerStyle: React.CSSProperties = {
|
||||
flex: 1,
|
||||
overflow: 'auto',
|
||||
paddingRight: 8,
|
||||
};
|
||||
|
||||
// No transformation data
|
||||
if (!transformationResult) {
|
||||
return (
|
||||
<div style={{ ...containerStyle, justifyContent: 'center', alignItems: 'center' }}>
|
||||
<Empty
|
||||
description={
|
||||
<Space direction="vertical" size={4}>
|
||||
<Text style={{ color: isDark ? '#999' : '#666' }}>
|
||||
No transformation data available
|
||||
</Text>
|
||||
<Text type="secondary" style={{ fontSize: 12 }}>
|
||||
Please run the Transformation Agent first
|
||||
</Text>
|
||||
</Space>
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// No descriptions found
|
||||
if (allDescriptions.length === 0) {
|
||||
return (
|
||||
<div style={{ ...containerStyle, justifyContent: 'center', alignItems: 'center' }}>
|
||||
<Empty description="No descriptions found in transformation result" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div style={containerStyle}>
|
||||
{/* Header Card with Controls */}
|
||||
<Card size="small" style={headerCardStyle}>
|
||||
<Row gutter={[16, 16]} align="middle">
|
||||
<Col span={6}>
|
||||
<Statistic
|
||||
title="Total Descriptions"
|
||||
value={allDescriptions.length}
|
||||
prefix={<FilterOutlined />}
|
||||
/>
|
||||
</Col>
|
||||
<Col span={6}>
|
||||
<Statistic
|
||||
title="Unique Groups"
|
||||
value={result?.total_groups ?? '-'}
|
||||
prefix={<ClusterOutlined />}
|
||||
valueStyle={{ color: result ? '#52c41a' : undefined }}
|
||||
/>
|
||||
</Col>
|
||||
<Col span={6}>
|
||||
<Statistic
|
||||
title="Duplicates Found"
|
||||
value={result?.total_duplicates ?? '-'}
|
||||
prefix={<CopyOutlined />}
|
||||
valueStyle={{ color: result?.total_duplicates ? '#fa8c16' : undefined }}
|
||||
/>
|
||||
</Col>
|
||||
<Col span={6}>
|
||||
<Space direction="vertical" size={4} style={{ width: '100%' }}>
|
||||
<Text type="secondary" style={{ fontSize: 12 }}>
|
||||
Similarity Threshold: {(threshold * 100).toFixed(0)}%
|
||||
</Text>
|
||||
<Slider
|
||||
min={0.5}
|
||||
max={1.0}
|
||||
step={0.05}
|
||||
value={threshold}
|
||||
onChange={onThresholdChange}
|
||||
disabled={loading}
|
||||
tooltip={{ formatter: (val) => `${((val ?? 0) * 100).toFixed(0)}%` }}
|
||||
/>
|
||||
</Space>
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
<Divider style={{ margin: '12px 0' }} />
|
||||
|
||||
<Row justify="space-between" align="middle">
|
||||
<Col>
|
||||
<Text type="secondary" style={{ fontSize: 12 }}>
|
||||
{progress.message || 'Ready to analyze'}
|
||||
</Text>
|
||||
</Col>
|
||||
<Col>
|
||||
<Space>
|
||||
{result && (
|
||||
<Button
|
||||
icon={<ReloadOutlined />}
|
||||
onClick={clearResult}
|
||||
disabled={loading}
|
||||
>
|
||||
Clear
|
||||
</Button>
|
||||
)}
|
||||
<Button
|
||||
type="primary"
|
||||
icon={<CheckCircleOutlined />}
|
||||
onClick={handleDeduplicate}
|
||||
loading={loading}
|
||||
>
|
||||
{loading ? 'Processing...' : 'Deduplicate'}
|
||||
</Button>
|
||||
</Space>
|
||||
</Col>
|
||||
</Row>
|
||||
</Card>
|
||||
|
||||
{/* Error Alert */}
|
||||
{error && (
|
||||
<Alert
|
||||
message="Deduplication Error"
|
||||
description={error}
|
||||
type="error"
|
||||
showIcon
|
||||
closable
|
||||
style={{ marginBottom: 16 }}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Loading State */}
|
||||
{loading && (
|
||||
<div style={{ textAlign: 'center', padding: 40 }}>
|
||||
<Spin size="large" />
|
||||
<div style={{ marginTop: 16 }}>
|
||||
<Text type="secondary">{progress.message}</Text>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Results */}
|
||||
{!loading && result && (
|
||||
<div style={resultsContainerStyle}>
|
||||
<Title level={5} style={{ marginBottom: 16, color: isDark ? '#fff' : '#000' }}>
|
||||
<ClusterOutlined style={{ marginRight: 8 }} />
|
||||
{result.total_groups} Groups
|
||||
{result.total_duplicates > 0 && (
|
||||
<Text type="secondary" style={{ fontSize: 14, fontWeight: 'normal', marginLeft: 8 }}>
|
||||
({result.total_duplicates} duplicates removed)
|
||||
</Text>
|
||||
)}
|
||||
</Title>
|
||||
|
||||
{result.groups.map((group, index) => (
|
||||
<GroupCard
|
||||
key={group.group_id}
|
||||
group={group}
|
||||
isDark={isDark}
|
||||
index={index}
|
||||
/>
|
||||
))}
|
||||
|
||||
{result.total_groups === 0 && (
|
||||
<Empty description="No groups found" />
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Initial State - show prompt */}
|
||||
{!loading && !result && !error && (
|
||||
<div style={{ textAlign: 'center', padding: 40 }}>
|
||||
<FilterOutlined style={{ fontSize: 48, color: '#1890ff', marginBottom: 16 }} />
|
||||
<Title level={4} style={{ color: isDark ? '#fff' : '#000' }}>
|
||||
Ready to Deduplicate
|
||||
</Title>
|
||||
<Text type="secondary">
|
||||
Click the "Deduplicate" button to analyze {allDescriptions.length} descriptions
|
||||
and group similar ones together.
|
||||
</Text>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default DeduplicationPanel;
|
||||
@@ -12,6 +12,11 @@ const EXPERT_SOURCE_OPTIONS = [
|
||||
{ label: 'Wikidata', value: 'wikidata' as ExpertSource, description: '從 Wikidata 查詢職業 (需等待 API)' },
|
||||
];
|
||||
|
||||
const EXPERT_LANGUAGE_OPTIONS = [
|
||||
{ label: 'English', value: 'en' as const },
|
||||
{ label: '中文', value: 'zh' as const },
|
||||
];
|
||||
|
||||
interface TransformationInputPanelProps {
|
||||
onTransform: () => void;
|
||||
loading: boolean;
|
||||
@@ -26,6 +31,7 @@ interface TransformationInputPanelProps {
|
||||
};
|
||||
customExpertsInput: string;
|
||||
expertSource: ExpertSource;
|
||||
expertLanguage: 'en' | 'zh';
|
||||
onModelChange: (model: string) => void;
|
||||
onTemperatureChange: (temperature: number) => void;
|
||||
onExpertConfigChange: (config: {
|
||||
@@ -35,6 +41,7 @@ interface TransformationInputPanelProps {
|
||||
}) => void;
|
||||
onCustomExpertsInputChange: (value: string) => void;
|
||||
onExpertSourceChange: (source: ExpertSource) => void;
|
||||
onExpertLanguageChange: (language: 'en' | 'zh') => void;
|
||||
availableModels: string[];
|
||||
}
|
||||
|
||||
@@ -48,11 +55,13 @@ export const TransformationInputPanel: React.FC<TransformationInputPanelProps> =
|
||||
expertConfig,
|
||||
customExpertsInput,
|
||||
expertSource,
|
||||
expertLanguage,
|
||||
onModelChange,
|
||||
onTemperatureChange,
|
||||
onExpertConfigChange,
|
||||
onCustomExpertsInputChange,
|
||||
onExpertSourceChange,
|
||||
onExpertLanguageChange,
|
||||
availableModels,
|
||||
}) => {
|
||||
return (
|
||||
@@ -142,6 +151,19 @@ export const TransformationInputPanel: React.FC<TransformationInputPanelProps> =
|
||||
<Text type="secondary" style={{ fontSize: 11 }}>
|
||||
{EXPERT_SOURCE_OPTIONS.find((opt) => opt.value === expertSource)?.description}
|
||||
</Text>
|
||||
|
||||
{/* Language selector - only for curated source */}
|
||||
{expertSource === 'curated' && (
|
||||
<div style={{ marginTop: 8 }}>
|
||||
<Text style={{ fontSize: 12 }}>職業名稱語言</Text>
|
||||
<Select
|
||||
value={expertLanguage}
|
||||
onChange={onExpertLanguageChange}
|
||||
style={{ width: '100%', marginTop: 4 }}
|
||||
options={EXPERT_LANGUAGE_OPTIONS}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</Space>
|
||||
</Card>
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { forwardRef, useMemo, useCallback, useEffect } from 'react';
|
||||
import { Empty, Spin, Button, Progress, Card, Space, Typography, Tag } from 'antd';
|
||||
import { ReloadOutlined } from '@ant-design/icons';
|
||||
import type { AttributeDAG, ExpertTransformationInput, ExpertSource } from '../types';
|
||||
import type { AttributeDAG, ExpertTransformationInput, ExpertSource, ExpertTransformationDAGResult } from '../types';
|
||||
import { TransformationDAG } from './TransformationDAG';
|
||||
import type { TransformationDAGRef } from './TransformationDAG';
|
||||
import { useExpertTransformation } from '../hooks/useExpertTransformation';
|
||||
@@ -19,26 +19,33 @@ interface TransformationPanelProps {
|
||||
custom_experts?: string[];
|
||||
};
|
||||
expertSource: ExpertSource;
|
||||
expertLanguage: 'en' | 'zh';
|
||||
shouldStartTransform: boolean;
|
||||
onTransformComplete: () => void;
|
||||
onLoadingChange: (loading: boolean) => void;
|
||||
onResultsChange?: (results: ExpertTransformationDAGResult | null) => void;
|
||||
}
|
||||
|
||||
export const TransformationPanel = forwardRef<TransformationDAGRef, TransformationPanelProps>(
|
||||
({ attributeData, isDark, model, temperature, expertConfig, expertSource, shouldStartTransform, onTransformComplete, onLoadingChange }, ref) => {
|
||||
({ attributeData, isDark, model, temperature, expertConfig, expertSource, expertLanguage, shouldStartTransform, onTransformComplete, onLoadingChange, onResultsChange }, ref) => {
|
||||
const {
|
||||
loading,
|
||||
progress,
|
||||
results,
|
||||
transformAll,
|
||||
clearResults,
|
||||
} = useExpertTransformation({ model, temperature, expertSource });
|
||||
} = useExpertTransformation({ model, temperature, expertSource, expertLanguage });
|
||||
|
||||
// Notify parent of loading state changes
|
||||
useEffect(() => {
|
||||
onLoadingChange(loading);
|
||||
}, [loading, onLoadingChange]);
|
||||
|
||||
// Notify parent of results changes
|
||||
useEffect(() => {
|
||||
onResultsChange?.(results);
|
||||
}, [results, onResultsChange]);
|
||||
|
||||
// Build expert transformation input from attribute data
|
||||
const transformationInput = useMemo((): ExpertTransformationInput | null => {
|
||||
if (!attributeData) return null;
|
||||
|
||||
147
frontend/src/components/deduplication/GroupCard.tsx
Normal file
147
frontend/src/components/deduplication/GroupCard.tsx
Normal file
@@ -0,0 +1,147 @@
|
||||
import React, { useState } from 'react';
|
||||
import { Card, Tag, Collapse, Typography, Space, Badge } from 'antd';
|
||||
import { StarFilled, CopyOutlined, UserOutlined } from '@ant-design/icons';
|
||||
import type { DescriptionGroup } from '../../types';
|
||||
|
||||
const { Text, Paragraph } = Typography;
|
||||
const { Panel } = Collapse;
|
||||
|
||||
interface GroupCardProps {
|
||||
group: DescriptionGroup;
|
||||
isDark: boolean;
|
||||
index: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Card component for displaying a group of similar descriptions
|
||||
*/
|
||||
export const GroupCard: React.FC<GroupCardProps> = ({ group, isDark, index }) => {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const hasDuplicates = group.duplicates.length > 0;
|
||||
|
||||
const cardStyle: React.CSSProperties = {
|
||||
marginBottom: 16,
|
||||
borderRadius: 8,
|
||||
border: isDark ? '1px solid #303030' : '1px solid #f0f0f0',
|
||||
background: isDark ? '#1f1f1f' : '#fff',
|
||||
};
|
||||
|
||||
const representativeStyle: React.CSSProperties = {
|
||||
background: isDark
|
||||
? 'linear-gradient(135deg, #1a472a 0%, #2d5a3d 100%)'
|
||||
: 'linear-gradient(135deg, #f6ffed 0%, #d9f7be 100%)',
|
||||
padding: 12,
|
||||
borderRadius: 6,
|
||||
marginBottom: hasDuplicates ? 12 : 0,
|
||||
};
|
||||
|
||||
const duplicateItemStyle: React.CSSProperties = {
|
||||
background: isDark ? '#2a2a2a' : '#fafafa',
|
||||
padding: 10,
|
||||
borderRadius: 4,
|
||||
marginBottom: 8,
|
||||
borderLeft: `3px solid ${isDark ? '#faad14' : '#fa8c16'}`,
|
||||
};
|
||||
|
||||
return (
|
||||
<Card
|
||||
size="small"
|
||||
style={cardStyle}
|
||||
title={
|
||||
<Space>
|
||||
<Badge
|
||||
count={index + 1}
|
||||
style={{
|
||||
backgroundColor: hasDuplicates ? '#52c41a' : '#1890ff',
|
||||
}}
|
||||
/>
|
||||
<Text strong style={{ color: isDark ? '#fff' : '#000' }}>
|
||||
{group.representative.keyword}
|
||||
</Text>
|
||||
{hasDuplicates && (
|
||||
<Tag color="orange" icon={<CopyOutlined />}>
|
||||
{group.duplicates.length} similar
|
||||
</Tag>
|
||||
)}
|
||||
</Space>
|
||||
}
|
||||
extra={
|
||||
<Tag color={isDark ? 'geekblue' : 'blue'}>
|
||||
<UserOutlined style={{ marginRight: 4 }} />
|
||||
{group.representative.expert_name}
|
||||
</Tag>
|
||||
}
|
||||
>
|
||||
{/* Representative description */}
|
||||
<div style={representativeStyle}>
|
||||
<Space direction="vertical" size={4} style={{ width: '100%' }}>
|
||||
<Space>
|
||||
<StarFilled style={{ color: '#52c41a' }} />
|
||||
<Text type="secondary" style={{ fontSize: 12 }}>
|
||||
Representative
|
||||
</Text>
|
||||
</Space>
|
||||
<Paragraph
|
||||
style={{
|
||||
margin: 0,
|
||||
color: isDark ? '#e0e0e0' : '#333',
|
||||
fontSize: 14,
|
||||
}}
|
||||
>
|
||||
{group.representative.description}
|
||||
</Paragraph>
|
||||
</Space>
|
||||
</div>
|
||||
|
||||
{/* Duplicates section */}
|
||||
{hasDuplicates && (
|
||||
<Collapse
|
||||
ghost
|
||||
activeKey={expanded ? ['duplicates'] : []}
|
||||
onChange={() => setExpanded(!expanded)}
|
||||
style={{ marginTop: 8 }}
|
||||
>
|
||||
<Panel
|
||||
key="duplicates"
|
||||
header={
|
||||
<Text type="secondary" style={{ fontSize: 12 }}>
|
||||
View {group.duplicates.length} similar description(s)
|
||||
</Text>
|
||||
}
|
||||
style={{ padding: 0 }}
|
||||
>
|
||||
<Space direction="vertical" size={0} style={{ width: '100%' }}>
|
||||
{group.duplicates.map((dup, dupIndex) => (
|
||||
<div key={`${dup.expert_id}-${dupIndex}`} style={duplicateItemStyle}>
|
||||
<Space direction="vertical" size={2} style={{ width: '100%' }}>
|
||||
<Space size="small">
|
||||
<Tag color="default" style={{ fontSize: 11 }}>
|
||||
{dup.keyword}
|
||||
</Tag>
|
||||
<Tag color="cyan" style={{ fontSize: 11 }}>
|
||||
{dup.expert_name}
|
||||
</Tag>
|
||||
<Tag color="orange" style={{ fontSize: 11 }}>
|
||||
{(group.similarity_scores[dupIndex] * 100).toFixed(0)}% similar
|
||||
</Tag>
|
||||
</Space>
|
||||
<Text
|
||||
style={{
|
||||
fontSize: 13,
|
||||
color: isDark ? '#b0b0b0' : '#666',
|
||||
}}
|
||||
>
|
||||
{dup.description}
|
||||
</Text>
|
||||
</Space>
|
||||
</div>
|
||||
))}
|
||||
</Space>
|
||||
</Panel>
|
||||
</Collapse>
|
||||
)}
|
||||
</Card>
|
||||
);
|
||||
};
|
||||
|
||||
export default GroupCard;
|
||||
100
frontend/src/hooks/useDeduplication.ts
Normal file
100
frontend/src/hooks/useDeduplication.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import { useState, useCallback } from 'react';
|
||||
import { deduplicateDescriptions } from '../services/api';
|
||||
import type {
|
||||
ExpertTransformationDescription,
|
||||
DeduplicationResult,
|
||||
DeduplicationProgress,
|
||||
DeduplicationMethod,
|
||||
} from '../types';
|
||||
|
||||
/**
|
||||
* Hook for managing deduplication state and operations
|
||||
*/
|
||||
export function useDeduplication() {
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [result, setResult] = useState<DeduplicationResult | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [progress, setProgress] = useState<DeduplicationProgress>({
|
||||
step: 'idle',
|
||||
message: '',
|
||||
});
|
||||
|
||||
/**
|
||||
* Execute deduplication on a list of descriptions
|
||||
*
|
||||
* @param descriptions - List of descriptions to deduplicate
|
||||
* @param threshold - Similarity threshold (only used for embedding method)
|
||||
* @param method - Deduplication method: 'embedding' (fast) or 'llm' (accurate but slow)
|
||||
*/
|
||||
const deduplicate = useCallback(async (
|
||||
descriptions: ExpertTransformationDescription[],
|
||||
threshold: number = 0.85,
|
||||
method: DeduplicationMethod = 'embedding'
|
||||
) => {
|
||||
if (!descriptions || descriptions.length === 0) {
|
||||
setError('No descriptions to deduplicate');
|
||||
return;
|
||||
}
|
||||
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
setResult(null);
|
||||
|
||||
// 根據方法顯示不同的進度訊息
|
||||
const methodLabel = method === 'embedding' ? 'Embedding' : 'LLM';
|
||||
const pairCount = (descriptions.length * (descriptions.length - 1)) / 2;
|
||||
const progressMessage = method === 'llm'
|
||||
? `Processing ${descriptions.length} descriptions with LLM (${pairCount} comparisons)...`
|
||||
: `Processing ${descriptions.length} descriptions with ${methodLabel}...`;
|
||||
|
||||
setProgress({
|
||||
step: 'processing',
|
||||
message: progressMessage,
|
||||
});
|
||||
|
||||
try {
|
||||
const deduplicationResult = await deduplicateDescriptions({
|
||||
descriptions,
|
||||
similarity_threshold: threshold,
|
||||
method,
|
||||
});
|
||||
|
||||
setResult(deduplicationResult);
|
||||
setProgress({
|
||||
step: 'done',
|
||||
message: `Found ${deduplicationResult.total_groups} unique groups, ${deduplicationResult.total_duplicates} duplicates (${methodLabel})`,
|
||||
});
|
||||
} catch (err) {
|
||||
const errorMessage = err instanceof Error ? err.message : 'Unknown error';
|
||||
setError(errorMessage);
|
||||
setProgress({
|
||||
step: 'error',
|
||||
message: 'Deduplication failed',
|
||||
error: errorMessage,
|
||||
});
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Clear results and reset state
|
||||
*/
|
||||
const clearResult = useCallback(() => {
|
||||
setResult(null);
|
||||
setError(null);
|
||||
setProgress({
|
||||
step: 'idle',
|
||||
message: '',
|
||||
});
|
||||
}, []);
|
||||
|
||||
return {
|
||||
loading,
|
||||
result,
|
||||
error,
|
||||
progress,
|
||||
deduplicate,
|
||||
clearResult,
|
||||
};
|
||||
}
|
||||
@@ -14,6 +14,7 @@ interface UseExpertTransformationOptions {
|
||||
model?: string;
|
||||
temperature?: number;
|
||||
expertSource?: ExpertSource;
|
||||
expertLanguage?: 'en' | 'zh';
|
||||
}
|
||||
|
||||
export function useExpertTransformation(options: UseExpertTransformationOptions = {}) {
|
||||
@@ -63,6 +64,7 @@ export function useExpertTransformation(options: UseExpertTransformationOptions
|
||||
keywords_per_expert: expertConfig.keywords_per_expert,
|
||||
custom_experts: expertConfig.custom_experts,
|
||||
expert_source: options.expertSource,
|
||||
expert_language: options.expertLanguage,
|
||||
model: options.model,
|
||||
temperature: options.temperature,
|
||||
},
|
||||
@@ -155,7 +157,7 @@ export function useExpertTransformation(options: UseExpertTransformationOptions
|
||||
});
|
||||
});
|
||||
},
|
||||
[options.model, options.temperature, options.expertSource]
|
||||
[options.model, options.temperature, options.expertSource, options.expertLanguage]
|
||||
);
|
||||
|
||||
const transformAll = useCallback(
|
||||
|
||||
@@ -10,7 +10,9 @@ import type {
|
||||
TransformationCategoryResult,
|
||||
ExpertTransformationRequest,
|
||||
ExpertTransformationCategoryResult,
|
||||
ExpertProfile
|
||||
ExpertProfile,
|
||||
DeduplicationRequest,
|
||||
DeduplicationResult
|
||||
} from '../types';
|
||||
|
||||
// 自動使用當前瀏覽器的 hostname,支援遠端存取
|
||||
@@ -299,3 +301,24 @@ export async function expertTransformCategoryStream(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ===== Deduplication Agent API =====
|
||||
|
||||
export async function deduplicateDescriptions(
|
||||
request: DeduplicationRequest
|
||||
): Promise<DeduplicationResult> {
|
||||
const response = await fetch(`${API_BASE_URL}/deduplication/deduplicate`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(request),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
@@ -265,3 +265,37 @@ export interface ExpertTransformationInput {
|
||||
custom_experts?: string[];
|
||||
};
|
||||
}
|
||||
|
||||
// ===== Deduplication Agent types =====
|
||||
|
||||
export type DeduplicationMethod = 'embedding' | 'llm';
|
||||
|
||||
export interface DeduplicationRequest {
|
||||
descriptions: ExpertTransformationDescription[];
|
||||
method?: DeduplicationMethod; // 去重方法,default: 'embedding'
|
||||
similarity_threshold?: number; // 0.0-1.0, default 0.85,僅 embedding 使用
|
||||
model?: string; // Embedding/LLM model
|
||||
}
|
||||
|
||||
export interface DescriptionGroup {
|
||||
group_id: string;
|
||||
representative: ExpertTransformationDescription;
|
||||
duplicates: ExpertTransformationDescription[];
|
||||
similarity_scores: number[];
|
||||
}
|
||||
|
||||
export interface DeduplicationResult {
|
||||
total_input: number;
|
||||
total_groups: number;
|
||||
total_duplicates: number;
|
||||
groups: DescriptionGroup[];
|
||||
threshold_used: number;
|
||||
method_used: DeduplicationMethod; // 使用的方法
|
||||
model_used: string; // 使用的模型
|
||||
}
|
||||
|
||||
export interface DeduplicationProgress {
|
||||
step: 'idle' | 'processing' | 'done' | 'error';
|
||||
message: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user