feat: Enhance patent search and update research documentation

- Improve patent search service with expanded functionality - Update PatentSearchPanel UI component - Add new research_report.md - Update experimental protocol, literature review, paper outline, and theoretical framework Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 15:52:33 +08:00
parent ec48709755
commit 26a56a2a07
13 changed files with 1446 additions and 537 deletions
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -3,10 +3,11 @@ from typing import Optional
 class Settings(BaseSettings):
-    ollama_base_url: str = "http://192.168.30.36:11434"
+    ollama_base_url: str = "http://localhost:11435"
    default_model: str = "qwen3:8b"
    openai_api_key: Optional[str] = None
    openai_base_url: Optional[str] = None
    lens_api_token: Optional[str] = None
    class Config:
        env_file = ".env"
--- a/backend/app/routers/patent_search.py
+++ b/backend/app/routers/patent_search.py
@@ -1,4 +1,4 @@
-"""Patent Search Router - Search for similar patents"""
+"""Patent Search Router - Search for similar patents using Lens.org API"""
 import logging
 from typing import Optional, List
@@ -21,16 +21,20 @@ class PatentSearchRequest(BaseModel):
 class PatentResult(BaseModel):
-    """Single patent result"""
+    """Single patent result from Lens.org"""
-    publication_number: str
+    lens_id: str
    doc_number: str
    jurisdiction: str
    kind: str
    title: str
-    snippet: str
+    abstract: Optional[str] = None
-    publication_date: Optional[str] = None
+    date_published: Optional[str] = None
-    assignee: Optional[str] = None
+    applicants: List[str] = []
-    inventor: Optional[str] = None
+    inventors: List[str] = []
-    status: str                         # ACTIVE, NOT_ACTIVE, UNKNOWN
+    legal_status: Optional[str] = None
-    pdf_url: Optional[str] = None
+    classifications_cpc: List[str] = []
-    thumbnail_url: Optional[str] = None
+    families_simple: List[str] = []
    url: str
 class PatentSearchResponse(BaseModel):
@@ -68,7 +72,7 @@ async def search_patents(request: PatentSearchRequest):
    """
    Search for patents similar to the given description/query.
-    Uses Google Patents to find related patents based on keywords.
+    Uses Lens.org API to find related patents based on title, abstract, and claims.
    """
    logger.info(f"Patent search request: {request.query[:100]}...")
--- a/backend/app/services/patent_search_service.py
+++ b/backend/app/services/patent_search_service.py
@@ -1,74 +1,48 @@
-"""Patent Search Service using Google Patents XHR API"""
+"""Patent Search Service using Lens.org API"""
 import httpx
 import logging
-from typing import List, Optional
+from typing import List, Optional, Dict, Any
-from urllib.parse import quote_plus
+from dataclasses import dataclass, asdict
 from app.config import settings
 logger = logging.getLogger(__name__)
@dataclass
 class PatentSearchResult:
-    """Single patent search result"""
+    """Single patent search result from Lens.org"""
-    def __init__(
+    lens_id: str
-        self,
+    doc_number: str
-        publication_number: str,
+    jurisdiction: str
-        title: str,
+    kind: str
-        snippet: str,
+    title: str
-        publication_date: Optional[str],
+    abstract: Optional[str]
-        assignee: Optional[str],
+    date_published: Optional[str]
-        inventor: Optional[str],
+    applicants: List[str]
-        status: str,
+    inventors: List[str]
-        pdf_url: Optional[str] = None,
+    legal_status: Optional[str]
-        thumbnail_url: Optional[str] = None,
+    classifications_cpc: List[str]
-    ):
+    families_simple: List[str]
-        self.publication_number = publication_number
+    url: str
        self.title = title
        self.snippet = snippet
        self.publication_date = publication_date
        self.assignee = assignee
        self.inventor = inventor
        self.status = status
        self.pdf_url = pdf_url
        self.thumbnail_url = thumbnail_url
-    def to_dict(self):
+    def to_dict(self) -> Dict[str, Any]:
-        return {
+        return asdict(self)
            "publication_number": self.publication_number,
            "title": self.title,
            "snippet": self.snippet,
            "publication_date": self.publication_date,
            "assignee": self.assignee,
            "inventor": self.inventor,
            "status": self.status,
            "pdf_url": self.pdf_url,
            "thumbnail_url": self.thumbnail_url,
        }
 class PatentSearchService:
-    """Service for searching patents using Google Patents"""
+    """Service for searching patents using Lens.org API"""
-    GOOGLE_PATENTS_XHR_URL = "https://patents.google.com/xhr/query"
+    LENS_API_URL = "https://api.lens.org/patent/search"
    GOOGLE_PATENTS_PDF_BASE = "https://patentimages.storage.googleapis.com/"
    def __init__(self):
        self._client: Optional[httpx.AsyncClient] = None
    # Browser-like headers to avoid being blocked
    DEFAULT_HEADERS = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "en-US,en;q=0.9",
        "Referer": "https://patents.google.com/",
        "Origin": "https://patents.google.com",
    }
    async def _get_client(self) -> httpx.AsyncClient:
        if self._client is None or self._client.is_closed:
            self._client = httpx.AsyncClient(
                timeout=30.0,
                headers=self.DEFAULT_HEADERS,
                follow_redirects=True,
            )
        return self._client
@@ -77,16 +51,27 @@ class PatentSearchService:
        if self._client and not self._client.is_closed:
            await self._client.aclose()
    def _get_headers(self) -> Dict[str, str]:
        """Get headers with authorization token"""
        token = settings.lens_api_token
        if not token:
            raise ValueError("LENS_API_TOKEN environment variable is not set")
        return {
            "Authorization": f"Bearer {token}",
            "Content-Type": "application/json",
            "Accept": "application/json",
        }
    async def search(
        self,
        query: str,
        max_results: int = 10,
    ) -> dict:
        """
-        Search Google Patents for relevant patents
+        Search Lens.org for relevant patents
        Args:
-            query: Search query (can be a description or keywords)
+            query: Search query (searches title, abstract, and claims)
            max_results: Maximum number of results to return
        Returns:
@@ -95,16 +80,39 @@ class PatentSearchService:
        try:
            client = await self._get_client()
-            # URL encode the query
+            # Build Lens.org query using query string format for full-text search
-            encoded_query = quote_plus(query)
+            request_body = {
-            url = f"{self.GOOGLE_PATENTS_XHR_URL}?url=q%3D{encoded_query}&exp=&tags="
+                "query": query,
                "size": max_results,
                "sort": [{"_score": "desc"}]
            }
-            logger.info(f"Searching patents with query: {query[:100]}...")
+            logger.info(f"Searching Lens.org patents with query: {query[:100]}...")
-            response = await client.get(url)
+            response = await client.post(
                self.LENS_API_URL,
                json=request_body,
                headers=self._get_headers(),
            )
            if response.status_code == 401:
                logger.error("Lens.org API authentication failed - check LENS_API_TOKEN")
                return {
                    "total_results": 0,
                    "patents": [],
                    "error": "Authentication failed - invalid API token"
                }
            if response.status_code == 429:
                logger.warning("Lens.org API rate limit exceeded")
                return {
                    "total_results": 0,
                    "patents": [],
                    "error": "Rate limit exceeded - please try again later"
                }
            if response.status_code != 200:
-                logger.error(f"Google Patents API returned status {response.status_code}")
+                logger.error(f"Lens.org API returned status {response.status_code}: {response.text}")
                return {
                    "total_results": 0,
                    "patents": [],
@@ -112,56 +120,28 @@ class PatentSearchService:
                }
            data = response.json()
-
+            total_results = data.get("total", 0)
-            # Parse results
+            results = data.get("data", [])
            results = data.get("results", {})
            total_num = results.get("total_num_results", 0)
            clusters = results.get("cluster", [])
            patents: List[PatentSearchResult] = []
            for item in results:
                patent = self._parse_patent(item)
                patents.append(patent)
-            if clusters and len(clusters) > 0:
+            logger.info(f"Found {total_results} total patents, returning {len(patents)}")
                patent_results = clusters[0].get("result", [])
                for item in patent_results[:max_results]:
                    patent_data = item.get("patent", {})
                    family_meta = patent_data.get("family_metadata", {})
                    aggregated = family_meta.get("aggregated", {})
                    country_status = aggregated.get("country_status", [])
                    status = "UNKNOWN"
                    if country_status and len(country_status) > 0:
                        best_stage = country_status[0].get("best_patent_stage", {})
                        status = best_stage.get("state", "UNKNOWN")
                    # Build PDF URL if available
                    pdf_path = patent_data.get("pdf", "")
                    pdf_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{pdf_path}" if pdf_path else None
                    # Build thumbnail URL
                    thumbnail = patent_data.get("thumbnail", "")
                    thumbnail_url = f"{self.GOOGLE_PATENTS_PDF_BASE}{thumbnail}" if thumbnail else None
                    patent = PatentSearchResult(
                        publication_number=patent_data.get("publication_number", ""),
                        title=self._clean_html(patent_data.get("title", "")),
                        snippet=self._clean_html(patent_data.get("snippet", "")),
                        publication_date=patent_data.get("publication_date"),
                        assignee=patent_data.get("assignee"),
                        inventor=patent_data.get("inventor"),
                        status=status,
                        pdf_url=pdf_url,
                        thumbnail_url=thumbnail_url,
                    )
                    patents.append(patent)
            logger.info(f"Found {total_num} total patents, returning {len(patents)}")
            return {
-                "total_results": total_num,
+                "total_results": total_results,
                "patents": [p.to_dict() for p in patents],
            }
        except ValueError as e:
            logger.error(f"Configuration error: {e}")
            return {
                "total_results": 0,
                "patents": [],
                "error": str(e)
            }
        except httpx.HTTPError as e:
            logger.error(f"HTTP error searching patents: {e}")
            return {
@@ -177,18 +157,107 @@ class PatentSearchService:
                "error": str(e)
            }
-    def _clean_html(self, text: str) -> str:
+    def _parse_patent(self, item: Dict[str, Any]) -> PatentSearchResult:
-        """Remove HTML entities and tags from text"""
+        """Parse a single patent result from Lens.org response"""
-        if not text:
+        lens_id = item.get("lens_id", "")
        jurisdiction = item.get("jurisdiction", "")
        doc_number = item.get("doc_number", "")
        kind = item.get("kind", "")
        # Get biblio section (contains title, parties, classifications)
        biblio = item.get("biblio", {})
        # Extract title from biblio.invention_title (list with lang info)
        title_data = biblio.get("invention_title", [])
        title = self._extract_text_with_lang(title_data)
        # Extract abstract (top-level, list with lang info)
        abstract_data = item.get("abstract", [])
        abstract = self._extract_text_with_lang(abstract_data)
        # Extract applicants from biblio.parties.applicants
        parties = biblio.get("parties", {})
        applicants = []
        applicant_data = parties.get("applicants", [])
        if isinstance(applicant_data, list):
            for app in applicant_data:
                if isinstance(app, dict):
                    name = app.get("extracted_name", {}).get("value", "")
                    if name:
                        applicants.append(name)
        # Extract inventors from biblio.parties.inventors
        inventors = []
        inventor_data = parties.get("inventors", [])
        if isinstance(inventor_data, list):
            for inv in inventor_data:
                if isinstance(inv, dict):
                    name = inv.get("extracted_name", {}).get("value", "")
                    if name:
                        inventors.append(name)
        # Extract legal status
        legal_status_data = item.get("legal_status", {})
        legal_status = None
        if isinstance(legal_status_data, dict):
            legal_status = legal_status_data.get("patent_status")
        # Extract CPC classifications from biblio.classifications_cpc
        classifications_cpc = []
        cpc_data = biblio.get("classifications_cpc", [])
        if isinstance(cpc_data, list):
            for cpc in cpc_data:
                if isinstance(cpc, dict):
                    symbol = cpc.get("symbol", "")
                    if symbol:
                        classifications_cpc.append(symbol)
        # Extract simple family members
        families_simple = []
        families_data = item.get("families", {})
        if isinstance(families_data, dict):
            simple_family = families_data.get("simple", {})
            if isinstance(simple_family, dict):
                members = simple_family.get("members", [])
                if isinstance(members, list):
                    families_simple = [m.get("lens_id", "") for m in members if isinstance(m, dict) and m.get("lens_id")]
        # Build URL to Lens.org patent page
        url = f"https://www.lens.org/lens/patent/{lens_id}" if lens_id else ""
        return PatentSearchResult(
            lens_id=lens_id,
            doc_number=doc_number,
            jurisdiction=jurisdiction,
            kind=kind,
            title=title,
            abstract=abstract,
            date_published=item.get("date_published"),
            applicants=applicants,
            inventors=inventors,
            legal_status=legal_status,
            classifications_cpc=classifications_cpc,
            families_simple=families_simple,
            url=url,
        )
    def _extract_text_with_lang(self, data: Any, prefer_lang: str = "en") -> str:
        """Extract text from Lens.org language-tagged list, preferring specified language"""
        if not data:
            return ""
-        # Replace common HTML entities
+        if isinstance(data, str):
-        text = text.replace("&hellip;", "...")
+            return data
-        text = text.replace("&amp;", "&")
+        if isinstance(data, list) and data:
-        text = text.replace("&lt;", "<")
+            # Prefer specified language
-        text = text.replace("&gt;", ">")
+            for item in data:
-        text = text.replace("&quot;", '"')
+                if isinstance(item, dict) and item.get("lang") == prefer_lang:
-        text = text.replace("&#39;", "'")
+                    return item.get("text", "")
-        return text.strip()
+            # Fall back to first item
            first = data[0]
            if isinstance(first, dict):
                return first.get("text", "")
            return str(first)
        return ""
 # Singleton instance
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -155,7 +155,6 @@
      "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "@babel/code-frame": "^7.27.1",
        "@babel/generator": "^7.28.5",
@@ -2446,7 +2445,6 @@
      "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "undici-types": "~7.16.0"
      }
@@ -2457,7 +2455,6 @@
      "integrity": "sha512-MWtvHrGZLFttgeEj28VXHxpmwYbor/ATPYbBfSFZEIRK0ecCFLl2Qo55z52Hss+UV9CRN7trSeq1zbgx7YDWWg==",
      "devOptional": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "csstype": "^3.2.2"
      }
@@ -2518,7 +2515,6 @@
      "integrity": "sha512-jCzKdm/QK0Kg4V4IK/oMlRZlY+QOcdjv89U2NgKHZk1CYTj82/RVSx1mV/0gqCVMJ/DA+Zf/S4NBWNF8GQ+eqQ==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "@typescript-eslint/scope-manager": "8.48.0",
        "@typescript-eslint/types": "8.48.0",
@@ -2802,7 +2798,6 @@
      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "bin": {
        "acorn": "bin/acorn"
      },
@@ -2971,7 +2966,6 @@
        }
      ],
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "baseline-browser-mapping": "^2.8.25",
        "caniuse-lite": "^1.0.30001754",
@@ -3442,7 +3436,6 @@
      "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
      "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
      "license": "ISC",
      "peer": true,
      "engines": {
        "node": ">=12"
      }
@@ -3531,8 +3524,7 @@
      "version": "1.11.19",
      "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.19.tgz",
      "integrity": "sha512-t5EcLVS6QPBNqM2z8fakk/NKel+Xzshgt8FFKAn+qwlD1pzZWxh0nVCrvFK7ZDb6XucZeF9z8C7CBWTRIVApAw==",
-      "license": "MIT",
+      "license": "MIT"
      "peer": true
    },
    "node_modules/debug": {
      "version": "4.4.3",
@@ -3646,7 +3638,6 @@
      "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "@eslint-community/eslint-utils": "^4.8.0",
        "@eslint-community/regexpp": "^4.12.1",
@@ -4376,7 +4367,6 @@
      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "engines": {
        "node": ">=12"
      },
@@ -4503,7 +4493,6 @@
      "resolved": "https://registry.npmjs.org/react/-/react-19.2.0.tgz",
      "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==",
      "license": "MIT",
      "peer": true,
      "engines": {
        "node": ">=0.10.0"
      }
@@ -4513,7 +4502,6 @@
      "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.0.tgz",
      "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==",
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "scheduler": "^0.27.0"
      },
@@ -4767,7 +4755,6 @@
      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
      "dev": true,
      "license": "Apache-2.0",
      "peer": true,
      "bin": {
        "tsc": "bin/tsc",
        "tsserver": "bin/tsserver"
@@ -4863,7 +4850,6 @@
      "integrity": "sha512-tI2l/nFHC5rLh7+5+o7QjKjSR04ivXDF4jcgV0f/bTQ+OJiITy5S6gaynVsEM+7RqzufMnVbIon6Sr5x1SDYaQ==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "dependencies": {
        "esbuild": "^0.25.0",
        "fdir": "^6.5.0",
@@ -4985,7 +4971,6 @@
      "integrity": "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig==",
      "dev": true,
      "license": "MIT",
      "peer": true,
      "funding": {
        "url": "https://github.com/sponsors/colinhacks"
      }
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -489,6 +489,37 @@ function App() {
                availableModels={availableModels}
              />
            )}
            {activeTab === 'patent' && (
              <div style={{ padding: 16 }}>
                <Typography.Title level={5} style={{ marginBottom: 16 }}>
                  <FileSearchOutlined style={{ marginRight: 8 }} />
                  Patent Search Info
                </Typography.Title>
                <Typography.Paragraph type="secondary" style={{ fontSize: 12 }}>
                  Search patents using the Lens.org API to find prior art and similar inventions.
                </Typography.Paragraph>
                <Typography.Title level={5} style={{ marginTop: 24, marginBottom: 12 }}>
                  How to Use
                </Typography.Title>
                <Typography.Paragraph style={{ fontSize: 12 }}>
                  <ol style={{ paddingLeft: 16, margin: 0 }}>
                    <li style={{ marginBottom: 8 }}>Click a generated description on the left to load it into the search box</li>
                    <li style={{ marginBottom: 8 }}>Edit the description to refine your search query</li>
                    <li style={{ marginBottom: 8 }}>Click "Search Patents" to find similar patents</li>
                    <li style={{ marginBottom: 8 }}>Results appear on the right - click to view on Lens.org</li>
                  </ol>
                </Typography.Paragraph>
                <Typography.Title level={5} style={{ marginTop: 24, marginBottom: 12 }}>
                  Result Interpretation
                </Typography.Title>
                <Typography.Paragraph type="secondary" style={{ fontSize: 12 }}>
                  <strong>Many results:</strong> Query may overlap with existing prior art - consider making it more specific.
                </Typography.Paragraph>
                <Typography.Paragraph type="secondary" style={{ fontSize: 12 }}>
                  <strong>Few/no results:</strong> Potentially novel concept - good candidate for further exploration.
                </Typography.Paragraph>
              </div>
            )}
            {activeTab === 'deduplication' && (
              <div style={{ padding: 16 }}>
                <Typography.Title level={5} style={{ marginBottom: 16 }}>
--- a/frontend/src/components/PatentSearchPanel.tsx
+++ b/frontend/src/components/PatentSearchPanel.tsx
@@ -1,4 +1,4 @@
-import { useState, useCallback } from 'react';
+import { useState, useCallback, useEffect } from 'react';
 import {
  Card,
  Button,
@@ -10,17 +10,24 @@ import {
  List,
  Tooltip,
  message,
  Badge,
 } from 'antd';
 import {
  SearchOutlined,
  LinkOutlined,
  CopyOutlined,
  DeleteOutlined,
-  GlobalOutlined,
+  CheckCircleOutlined,
  CloseCircleOutlined,
  ClockCircleOutlined,
  QuestionCircleOutlined,
  EditOutlined,
 } from '@ant-design/icons';
 import type {
  ExpertTransformationDescription,
  PatentResult,
 } from '../types';
 import { searchPatents } from '../services/api';
 const { Text, Paragraph } = Typography;
 const { TextArea } = Input;
@@ -30,315 +37,402 @@ interface PatentSearchPanelProps {
  isDark: boolean;
 }
-interface SearchItem {
+interface SearchResultItem {
  id: string;
  query: string;
  searchUrl: string;
  expertName?: string;
  keyword?: string;
  loading: boolean;
  error?: string;
  totalResults: number;
  patents: PatentResult[];
 }
-// Generate Google Patents search URL
+// Get status icon and color
-function generatePatentSearchUrl(query: string): string {
+function getStatusDisplay(status: string | null): { icon: React.ReactNode; color: string; text: string } {
-  // Extract key terms and create a search-friendly query
+  switch (status) {
-  const encodedQuery = encodeURIComponent(query);
+    case 'ACTIVE':
-  return `https://patents.google.com/?q=${encodedQuery}`;
+      return { icon: <CheckCircleOutlined />, color: 'green', text: 'Active' };
-}
+    case 'PENDING':
-
+      return { icon: <ClockCircleOutlined />, color: 'blue', text: 'Pending' };
-// Generate Lens.org search URL (alternative)
+    case 'DISCONTINUED':
-function generateLensSearchUrl(query: string): string {
+    case 'EXPIRED':
-  const encodedQuery = encodeURIComponent(query);
+      return { icon: <CloseCircleOutlined />, color: 'red', text: status };
-  return `https://www.lens.org/lens/search/patent/list?q=${encodedQuery}`;
+    default:
      return { icon: <QuestionCircleOutlined />, color: 'default', text: status || 'Unknown' };
  }
 }
 export function PatentSearchPanel({ descriptions, isDark }: PatentSearchPanelProps) {
  const [customQuery, setCustomQuery] = useState('');
-  const [searchItems, setSearchItems] = useState<SearchItem[]>([]);
+  const [searchResults, setSearchResults] = useState<SearchResultItem[]>([]);
-  const [selectedDescriptions, setSelectedDescriptions] = useState<Set<number>>(new Set());
+  const [isSearching, setIsSearching] = useState(false);
  const [apiStatus, setApiStatus] = useState<'checking' | 'connected' | 'error'>('checking');
-  // Add custom query to search list
+  // Check API connection on mount
-  const handleAddCustomQuery = useCallback(() => {
+  useEffect(() => {
    const checkApi = async () => {
      try {
        const res = await fetch(`http://${window.location.hostname}:8001/health`);
        setApiStatus(res.ok ? 'connected' : 'error');
      } catch {
        setApiStatus('error');
      }
    };
    checkApi();
  }, []);
  // Search patents for a query
  const doSearch = useCallback(async (
    query: string,
    expertName?: string,
    keyword?: string
  ): Promise<SearchResultItem> => {
    const id = `search-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
    try {
      const response = await searchPatents({ query, max_results: 10 });
      return {
        id,
        query,
        expertName,
        keyword,
        loading: false,
        totalResults: response.total_results,
        patents: response.patents,
        error: response.error || undefined,
      };
    } catch (err) {
      const errorMsg = err instanceof Error ? err.message : 'Search failed';
      return {
        id,
        query,
        expertName,
        keyword,
        loading: false,
        totalResults: 0,
        patents: [],
        error: `${errorMsg} (API: ${window.location.hostname}:8001)`,
      };
    }
  }, []);
  // Handle custom query search
  const handleSearchCustom = useCallback(async () => {
    if (!customQuery.trim()) return;
-    const newItem: SearchItem = {
+    setIsSearching(true);
-      id: `custom-${Date.now()}`,
+    const result = await doSearch(customQuery.trim());
-      query: customQuery.trim(),
+    setSearchResults(prev => [result, ...prev]);
      searchUrl: generatePatentSearchUrl(customQuery.trim()),
    };
    setSearchItems(prev => [newItem, ...prev]);
    setCustomQuery('');
-    message.success('Added to search list');
+    setIsSearching(false);
  }, [customQuery]);
-  // Add selected descriptions to search list
+    if (result.error) {
-  const handleAddSelected = useCallback(() => {
+      message.error(`Search failed: ${result.error}`);
-    if (!descriptions || selectedDescriptions.size === 0) return;
+    } else {
      message.success(`Found ${result.totalResults.toLocaleString()} patents (${result.patents.length} returned)`);
    }
  }, [customQuery, doSearch]);
-    const newItems: SearchItem[] = Array.from(selectedDescriptions).map(idx => {
+  // Handle clicking a generated description - put it in search input
-      const desc = descriptions[idx];
+  const handleSelectDescription = useCallback((desc: ExpertTransformationDescription) => {
-      return {
+    setCustomQuery(desc.description);
-        id: `desc-${idx}-${Date.now()}`,
+    message.info('Description loaded into search box - edit and search when ready');
        query: desc.description,
        searchUrl: generatePatentSearchUrl(desc.description),
        expertName: desc.expert_name,
        keyword: desc.keyword,
      };
    });
    setSearchItems(prev => [...newItems, ...prev]);
    setSelectedDescriptions(new Set());
    message.success(`Added ${newItems.length} items to search list`);
  }, [descriptions, selectedDescriptions]);
  // Remove item from list
  const handleRemoveItem = useCallback((id: string) => {
    setSearchItems(prev => prev.filter(item => item.id !== id));
  }, []);
-  // Copy URL to clipboard
+  // Remove result from list
-  const handleCopyUrl = useCallback((url: string) => {
+  const handleRemoveResult = useCallback((id: string) => {
-    navigator.clipboard.writeText(url);
+    setSearchResults(prev => prev.filter(item => item.id !== id));
    message.success('URL copied to clipboard');
  }, []);
-  // Toggle description selection
+  // Copy patent info to clipboard
-  const toggleDescription = useCallback((index: number) => {
+  const handleCopyPatent = useCallback((patent: PatentResult) => {
-    setSelectedDescriptions(prev => {
+    const text = `${patent.title}\n${patent.jurisdiction}-${patent.doc_number}\n${patent.url}`;
-      const next = new Set(prev);
+    navigator.clipboard.writeText(text);
-      if (next.has(index)) {
+    message.success('Patent info copied');
        next.delete(index);
      } else {
        next.add(index);
      }
      return next;
    });
  }, []);
-  // Clear all
+  // Clear all results
  const handleClearAll = useCallback(() => {
-    setSearchItems([]);
+    setSearchResults([]);
  }, []);
  const containerStyle: React.CSSProperties = {
    height: '100%',
    display: 'flex',
    flexDirection: 'column',
    gap: 16,
    padding: 16,
    overflow: 'auto',
  };
  const cardStyle: React.CSSProperties = {
    background: isDark ? '#1f1f1f' : '#fff',
  };
  return (
-    <div style={containerStyle}>
+    <div style={{
-      {/* Info banner */}
+      height: 'calc(100vh - 180px)',
-      <Card size="small" style={cardStyle}>
+      width: '100%',
-        <Space>
+      padding: 16,
-          <GlobalOutlined style={{ color: '#1890ff' }} />
+      boxSizing: 'border-box',
-          <Text>
+      display: 'flex',
-            Generate search links to check for similar patents on Google Patents or Lens.org
+      flexDirection: 'row',
-          </Text>
+      gap: 16,
-        </Space>
+    }}>
-      </Card>
+        {/* Left Column - Search Input & Generated Descriptions */}
-
+        <div style={{
-      {/* Custom search input */}
+          flex: '0 0 40%',
-      <Card size="small" title="Add Custom Search" style={cardStyle}>
+          minWidth: 0,
-        <TextArea
+          display: 'flex',
-          placeholder="Enter a description to search for similar patents..."
+          flexDirection: 'column',
-          value={customQuery}
+          gap: 12,
-          onChange={e => setCustomQuery(e.target.value)}
+          overflow: 'auto',
-          autoSize={{ minRows: 2, maxRows: 4 }}
+        }}>
-          style={{ marginBottom: 8 }}
+            {/* Search input */}
-        />
+            <Card
        <Button
          type="primary"
          icon={<SearchOutlined />}
          onClick={handleAddCustomQuery}
          disabled={!customQuery.trim()}
        >
          Add to Search List
        </Button>
      </Card>
      {/* Description selection (if available) */}
      {descriptions && descriptions.length > 0 && (
        <Card
          size="small"
          title={`Generated Descriptions (${descriptions.length})`}
          style={cardStyle}
          extra={
            <Button
              type="primary"
              size="small"
-              icon={<SearchOutlined />}
+              title={
-              onClick={handleAddSelected}
+                <Space>
-              disabled={selectedDescriptions.size === 0}
+                  <span>Patent Search</span>
                  {apiStatus === 'checking' && <Tag color="processing">Checking...</Tag>}
                  {apiStatus === 'connected' && <Tag color="success">Connected</Tag>}
                  {apiStatus === 'error' && (
                    <Tooltip title={`Cannot reach ${window.location.hostname}:8001`}>
                      <Tag color="error">Unreachable</Tag>
                    </Tooltip>
                  )}
                </Space>
              }
              style={cardStyle}
            >
-              Add Selected ({selectedDescriptions.size})
+              <TextArea
-            </Button>
+                placeholder="Enter a description to search for similar patents... Click a generated description below to load it here for editing."
-          }
+                value={customQuery}
-        >
+                onChange={e => setCustomQuery(e.target.value)}
-          <div style={{ maxHeight: 200, overflow: 'auto' }}>
+                onPressEnter={e => {
-            <Space direction="vertical" style={{ width: '100%' }}>
+                  if (!e.shiftKey) {
-              {descriptions.slice(0, 20).map((desc, idx) => (
+                    e.preventDefault();
-                <div
+                    handleSearchCustom();
-                  key={idx}
+                  }
                  onClick={() => toggleDescription(idx)}
                  style={{
                    padding: 8,
                    borderRadius: 4,
                    cursor: 'pointer',
                    background: selectedDescriptions.has(idx)
                      ? (isDark ? '#177ddc22' : '#1890ff11')
                      : (isDark ? '#141414' : '#fafafa'),
                    border: selectedDescriptions.has(idx)
                      ? `1px solid ${isDark ? '#177ddc' : '#1890ff'}`
                      : `1px solid ${isDark ? '#303030' : '#f0f0f0'}`,
                  }}
                >
                  <Space size={4}>
                    <Tag color="blue" style={{ fontSize: 10 }}>{desc.expert_name}</Tag>
                    <Tag style={{ fontSize: 10 }}>{desc.keyword}</Tag>
                  </Space>
                  <Paragraph
                    ellipsis={{ rows: 2 }}
                    style={{ marginBottom: 0, marginTop: 4, fontSize: 12 }}
                  >
                    {desc.description}
                  </Paragraph>
                </div>
              ))}
              {descriptions.length > 20 && (
                <Text type="secondary">
                  And {descriptions.length - 20} more descriptions...
                </Text>
              )}
            </Space>
          </div>
        </Card>
      )}
      {/* Search list */}
      {searchItems.length > 0 && (
        <Card
          size="small"
          title={`Search List (${searchItems.length})`}
          style={{ ...cardStyle, flex: 1, minHeight: 0, overflow: 'hidden', display: 'flex', flexDirection: 'column' }}
          extra={
            <Button size="small" danger onClick={handleClearAll}>
              Clear All
            </Button>
          }
          bodyStyle={{ flex: 1, overflow: 'auto', padding: 0 }}
        >
          <List
            dataSource={searchItems}
            renderItem={item => (
              <List.Item
                style={{
                  padding: '12px 16px',
                  borderBottom: `1px solid ${isDark ? '#303030' : '#f0f0f0'}`,
                }}
-                actions={[
+                autoSize={{ minRows: 3, maxRows: 6 }}
-                  <Tooltip title="Open in Google Patents" key="google">
+                style={{ marginBottom: 8 }}
-                    <Button
+                disabled={isSearching}
-                      type="link"
+              />
-                      icon={<LinkOutlined />}
+              <Button
-                      href={item.searchUrl}
+                type="primary"
-                      target="_blank"
+                icon={<SearchOutlined />}
-                    >
+                onClick={handleSearchCustom}
-                      Google
+                disabled={!customQuery.trim()}
-                    </Button>
+                loading={isSearching}
-                  </Tooltip>,
+                block
                  <Tooltip title="Open in Lens.org" key="lens">
                    <Button
                      type="link"
                      icon={<GlobalOutlined />}
                      href={generateLensSearchUrl(item.query)}
                      target="_blank"
                    >
                      Lens
                    </Button>
                  </Tooltip>,
                  <Tooltip title="Copy URL" key="copy">
                    <Button
                      type="text"
                      icon={<CopyOutlined />}
                      onClick={() => handleCopyUrl(item.searchUrl)}
                    />
                  </Tooltip>,
                  <Tooltip title="Remove" key="remove">
                    <Button
                      type="text"
                      danger
                      icon={<DeleteOutlined />}
                      onClick={() => handleRemoveItem(item.id)}
                    />
                  </Tooltip>,
                ]}
              >
-                <List.Item.Meta
+                Search Patents
-                  title={
+              </Button>
-                    <Space size={4}>
+            </Card>
-                      {item.expertName && (
+
-                        <Tag color="blue" style={{ fontSize: 10 }}>{item.expertName}</Tag>
+            {/* Generated Descriptions */}
-                      )}
+            {descriptions && descriptions.length > 0 && (
-                      {item.keyword && (
+              <Card
-                        <Tag style={{ fontSize: 10 }}>{item.keyword}</Tag>
+                size="small"
-                      )}
+                title={`Generated Descriptions (${descriptions.length})`}
                style={{ ...cardStyle, flex: 1, overflow: 'hidden', display: 'flex', flexDirection: 'column' }}
                bodyStyle={{ flex: 1, overflow: 'auto', padding: 8 }}
              >
                <Space direction="vertical" style={{ width: '100%' }} size={8}>
                  {descriptions.map((desc, idx) => (
                    <div
                      key={idx}
                      onClick={() => handleSelectDescription(desc)}
                      style={{
                        padding: 8,
                        borderRadius: 4,
                        cursor: 'pointer',
                        background: isDark ? '#141414' : '#fafafa',
                        border: `1px solid ${isDark ? '#303030' : '#f0f0f0'}`,
                        transition: 'all 0.2s',
                      }}
                      onMouseEnter={e => {
                        e.currentTarget.style.borderColor = isDark ? '#177ddc' : '#1890ff';
                        e.currentTarget.style.background = isDark ? '#177ddc22' : '#1890ff11';
                      }}
                      onMouseLeave={e => {
                        e.currentTarget.style.borderColor = isDark ? '#303030' : '#f0f0f0';
                        e.currentTarget.style.background = isDark ? '#141414' : '#fafafa';
                      }}
                    >
                      <Space size={4} style={{ marginBottom: 4 }}>
                        <Tag color="blue" style={{ fontSize: 10 }}>{desc.expert_name}</Tag>
                        <Tag style={{ fontSize: 10 }}>{desc.keyword}</Tag>
                        <EditOutlined style={{ fontSize: 10, color: isDark ? '#177ddc' : '#1890ff' }} />
                      </Space>
                      <Paragraph
                        ellipsis={{ rows: 2 }}
                        style={{ marginBottom: 0, fontSize: 12 }}
                      >
                        {desc.description}
                      </Paragraph>
                    </div>
                  ))}
                </Space>
              </Card>
            )}
            {/* Empty state when no descriptions */}
            {(!descriptions || descriptions.length === 0) && (
              <Card style={{ ...cardStyle, flex: 1 }}>
                <Empty
                  image={Empty.PRESENTED_IMAGE_SIMPLE}
                  description={
                    <Space direction="vertical">
                      <Text>No generated descriptions available</Text>
                      <Text type="secondary" style={{ fontSize: 12 }}>
                        Run expert transformation first to generate descriptions
                      </Text>
                    </Space>
                  }
                />
              </Card>
            )}
        </div>
        {/* Right Column - Search Results */}
        <div style={{
          flex: 1,
          minWidth: 0,
          display: 'flex',
          flexDirection: 'column',
          overflow: 'auto',
        }}>
            <Card
              size="small"
              title={`Search Results (${searchResults.length} queries)`}
              style={{ ...cardStyle, flex: 1, overflow: 'hidden', display: 'flex', flexDirection: 'column' }}
              bodyStyle={{ flex: 1, overflow: 'auto', padding: 8 }}
              extra={
                searchResults.length > 0 && (
                  <Button size="small" danger onClick={handleClearAll}>
                    Clear All
                  </Button>
                )
              }
            >
              {searchResults.length === 0 ? (
                <Empty
                  image={Empty.PRESENTED_IMAGE_SIMPLE}
                  description={
-                    <Paragraph
+                    <Space direction="vertical">
-                      ellipsis={{ rows: 2 }}
+                      <Text>No search results yet</Text>
-                      style={{ marginBottom: 0, fontSize: 12 }}
+                      <Text type="secondary" style={{ fontSize: 12 }}>
-                    >
+                        Enter a query or click a description to search
-                      {item.query}
+                      </Text>
-                    </Paragraph>
+                    </Space>
                  }
                />
-              </List.Item>
+              ) : (
-            )}
+                <Space direction="vertical" style={{ width: '100%' }} size={8}>
-          />
+                  {searchResults.map(result => (
-        </Card>
+                    <Card
-      )}
+                      key={result.id}
-
+                      size="small"
-      {/* Empty state */}
+                      style={{ background: isDark ? '#141414' : '#fafafa' }}
-      {searchItems.length === 0 && (!descriptions || descriptions.length === 0) && (
+                      title={
-        <Card style={cardStyle}>
+                        <Space>
-          <Empty
+                          <Text style={{ fontSize: 12, maxWidth: 300 }} ellipsis>
-            description={
+                            {result.query.substring(0, 60)}{result.query.length > 60 ? '...' : ''}
-              <Space direction="vertical">
+                          </Text>
-                <Text>Enter a description or run transformations first</Text>
+                          <Badge
-                <Text type="secondary">
+                            count={result.totalResults.toLocaleString()}
-                  Search links will open in Google Patents or Lens.org
+                            style={{ backgroundColor: result.error ? '#ff4d4f' : '#52c41a' }}
-                </Text>
+                            overflowCount={999999}
-              </Space>
+                          />
-            }
+                        </Space>
-          />
+                      }
-        </Card>
+                      extra={
-      )}
+                        <Button
-
+                          type="text"
-      {/* Empty state with descriptions but no search items */}
+                          size="small"
-      {searchItems.length === 0 && descriptions && descriptions.length > 0 && (
+                          danger
-        <Card style={cardStyle}>
+                          icon={<DeleteOutlined />}
-          <Empty
+                          onClick={() => handleRemoveResult(result.id)}
-            image={Empty.PRESENTED_IMAGE_SIMPLE}
+                        />
-            description={
+                      }
-              <Space direction="vertical">
+                    >
-                <Text>Select descriptions above to add to search list</Text>
+                      {result.error ? (
-                <Text type="secondary">
+                        <Text type="danger">{result.error}</Text>
-                  Then click the links to search on Google Patents or Lens.org
+                      ) : result.patents.length === 0 ? (
-                </Text>
+                        <Empty
-              </Space>
+                          image={Empty.PRESENTED_IMAGE_SIMPLE}
-            }
+                          description={
-          />
+                            <Space direction="vertical" size={4}>
-        </Card>
+                              <Text strong>No matching patents found</Text>
-      )}
+                              <Text type="secondary" style={{ fontSize: 12 }}>
                                This may indicate a novel concept with no existing prior art.
                              </Text>
                            </Space>
                          }
                        />
                      ) : (
                        <List
                          size="small"
                          dataSource={result.patents}
                          renderItem={(patent) => {
                            const status = getStatusDisplay(patent.legal_status);
                            return (
                              <List.Item
                                style={{
                                  padding: '8px',
                                  borderBottom: `1px solid ${isDark ? '#303030' : '#f0f0f0'}`,
                                }}
                                actions={[
                                  <Button
                                    key="open"
                                    type="link"
                                    size="small"
                                    icon={<LinkOutlined />}
                                    href={patent.url}
                                    target="_blank"
                                  />,
                                  <Button
                                    key="copy"
                                    type="text"
                                    size="small"
                                    icon={<CopyOutlined />}
                                    onClick={() => handleCopyPatent(patent)}
                                  />,
                                ]}
                              >
                                <List.Item.Meta
                                  title={
                                    <Space direction="vertical" size={2}>
                                      <Text strong style={{ fontSize: 13 }}>{patent.title || 'Untitled'}</Text>
                                      <Space size={4} wrap>
                                        <Tag>{patent.jurisdiction}-{patent.doc_number}</Tag>
                                        <Tag color={status.color}>{status.text}</Tag>
                                        {patent.date_published && (
                                          <Text type="secondary" style={{ fontSize: 11 }}>
                                            {patent.date_published}
                                          </Text>
                                        )}
                                      </Space>
                                    </Space>
                                  }
                                  description={
                                    patent.abstract && (
                                      <Paragraph
                                        ellipsis={{ rows: 2, expandable: true, symbol: 'more' }}
                                        style={{ marginBottom: 0, marginTop: 4, fontSize: 12 }}
                                      >
                                        {patent.abstract}
                                      </Paragraph>
                                    )
                                  }
                                />
                              </List.Item>
                            );
                          }}
                        />
                      )}
                    </Card>
                  ))}
                </Space>
              )}
            </Card>
        </div>
    </div>
  );
 }
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@@ -402,18 +402,22 @@ export interface CrossoverTransformationResult {
  transformedIdeas: ExpertTransformationDescription[];
 }
-// ===== Patent Search types =====
+// ===== Patent Search types (Lens.org API) =====
 export interface PatentResult {
-  publication_number: string;
+  lens_id: string;
  doc_number: string;
  jurisdiction: string;
  kind: string;
  title: string;
-  snippet: string;
+  abstract: string | null;
-  publication_date: string | null;
+  date_published: string | null;
-  assignee: string | null;
+  applicants: string[];
-  inventor: string | null;
+  inventors: string[];
-  status: 'ACTIVE' | 'NOT_ACTIVE' | 'UNKNOWN';
+  legal_status: string | null;
-  pdf_url: string | null;
+  classifications_cpc: string[];
-  thumbnail_url: string | null;
+  families_simple: string[];
  url: string;
 }
 export interface PatentSearchRequest {
--- a/research/experimental_protocol.md
+++ b/research/experimental_protocol.md
@@ -10,29 +10,47 @@ This document outlines a comprehensive experimental design to test the hypothesi
 | ID | Research Question |
 |----|-------------------|
-| **RQ1** | Does multi-expert generation produce higher semantic diversity than direct LLM generation? |
+| **RQ1** | Does attribute decomposition improve semantic diversity of generated ideas? |
-| **RQ2** | Does multi-expert generation produce ideas with lower patent overlap (higher novelty)? |
+| **RQ2** | Does expert perspective transformation improve semantic diversity of generated ideas? |
-| **RQ3** | What is the optimal number of experts for maximizing diversity? |
+| **RQ3** | Is there an interaction effect between attribute decomposition and expert perspectives? |
-| **RQ4** | How do different expert sources (LLM vs Curated vs DBpedia) affect idea quality? |
+| **RQ4** | Which combination produces the highest patent novelty (lowest overlap)? |
-| **RQ5** | Does structured attribute decomposition enhance the multi-expert effect? |
+| **RQ5** | How do different expert sources (LLM vs Curated vs External) affect idea quality? |
 | **RQ6** | Does context-free keyword generation (current design) increase hallucination/nonsense rate? |
 ### Design Note: Context-Free Keyword Generation
 Our system intentionally excludes the original query during keyword generation (Stage 1):
 ```
 Stage 1 (Keyword): Expert sees "木質" (wood) + "會計師" (accountant)
                   Expert does NOT see "椅子" (chair)
                   → Generates: "資金流動" (cash flow)
 Stage 2 (Description): Expert sees "椅子" + "資金流動"
                       → Applies keyword to original query
 ```
 **Rationale**: This forces maximum semantic distance in keyword generation.
 **Risk**: Some keywords may be too distant, resulting in nonsensical or unusable ideas.
 **RQ6 investigates**: What is the hallucination/nonsense rate, and is the tradeoff worthwhile?
 ---
 ## 2. Experimental Design Overview
 ### 2.1 Design Type
-**Mixed Design**: Between-subjects for main conditions × Within-subjects for queries
+**2×2 Factorial Design**: Attribute Decomposition (With/Without) × Expert Perspectives (With/Without)
 - Within-subjects for queries (all queries tested across all conditions)
 ### 2.2 Variables
 #### Independent Variables (Manipulated)
-| Variable | Levels | Your System Parameter |
+| Variable | Levels | Description |
-|----------|--------|----------------------|
+|----------|--------|-------------|
-| **Generation Method** | 5 levels (see conditions) | Condition-dependent |
+| **Attribute Decomposition** | 2 levels: With / Without | Whether to decompose query into structured attributes |
-| **Expert Count** | 1, 2, 4, 6, 8 | `expert_count` |
+| **Expert Perspectives** | 2 levels: With / Without | Whether to use expert personas for idea generation |
-| **Expert Source** | LLM, Curated, DBpedia | `expert_source` |
+| **Expert Source** (secondary) | LLM, Curated, External | Source of expert occupations (tested within Expert=With conditions) |
 | **Attribute Structure** | With/Without decomposition | Pipeline inclusion |
 #### Dependent Variables (Measured)
@@ -61,34 +79,28 @@ This document outlines a comprehensive experimental design to test the hypothesi
 ## 3. Experimental Conditions
-### 3.1 Main Study: Generation Method Comparison
+### 3.1 Main Study: 2×2 Factorial Design
-| Condition | Description | Implementation |
+| Condition | Attributes | Experts | Description |
-|-----------|-------------|----------------|
+|-----------|------------|---------|-------------|
-| **C1: Direct** | Direct LLM generation | Prompt: "Generate 20 creative ideas for [query]" |
+| **C1: Direct** | ❌ Without | ❌ Without | Baseline: "Generate 20 creative ideas for [query]" |
-| **C2: Single-Expert** | 1 expert × 20 ideas | `expert_count=1`, `keywords_per_expert=20` |
+| **C2: Expert-Only** | ❌ Without | ✅ With | Expert personas generate for whole query |
-| **C3: Multi-Expert-4** | 4 experts × 5 ideas each | `expert_count=4`, `keywords_per_expert=5` |
+| **C3: Attribute-Only** | ✅ With | ❌ Without | Decompose query, direct generate per attribute |
-| **C4: Multi-Expert-8** | 8 experts × 2-3 ideas each | `expert_count=8`, `keywords_per_expert=2-3` |
+| **C4: Full Pipeline** | ✅ With | ✅ With | Decompose query, experts generate per attribute |
 | **C5: Random-Perspective** | 4 random words as "perspectives" | Custom prompt with random nouns |
-### 3.2 Expert Count Study
+### 3.2 Control Condition
-| Condition | Expert Count | Ideas per Expert |
+| Condition | Description | Purpose |
-|-----------|--------------|------------------|
+|-----------|-------------|---------|
-| **E1** | 1 | 20 |
+| **C5: Random-Perspective** | 4 random words as "perspectives" | Tests if ANY perspective shift helps, or if EXPERT knowledge specifically matters |
 | **E2** | 2 | 10 |
 | **E4** | 4 | 5 |
 | **E6** | 6 | 3-4 |
 | **E8** | 8 | 2-3 |
-### 3.3 Expert Source Study
+### 3.3 Expert Source Study (Secondary, within Expert=With conditions)
 | Condition | Source | Implementation |
 |-----------|--------|----------------|
-| **S-LLM** | LLM-generated | `expert_source=ExpertSource.LLM` |
+| **S-LLM** | LLM-generated | Query-specific experts generated by LLM |
-| **S-Curated** | Curated 210 occupations | `expert_source=ExpertSource.CURATED` |
+| **S-Curated** | Curated occupations | Pre-selected high-quality occupations |
-| **S-DBpedia** | DBpedia 2164 occupations | `expert_source=ExpertSource.DBPEDIA` |
+| **S-External** | External sources | Wikidata/ConceptNet occupations |
 | **S-Random** | Random word "experts" | Custom implementation |
 ---
@@ -251,7 +263,69 @@ def compute_patent_novelty(ideas: List[str], query: str) -> dict:
    }
 ```
-### 5.3 Metrics Summary Table
+### 5.3 Hallucination/Nonsense Metrics (RQ6)
 Since our design intentionally excludes the original query during keyword generation, we need to measure the "cost" of this approach.
 #### 5.3.1 LLM-as-Judge for Relevance
 ```python
 def compute_relevance_score(query: str, ideas: List[str], judge_model: str) -> dict:
    """
    Use LLM to judge if each idea is relevant/applicable to the original query.
    """
    relevant_count = 0
    nonsense_count = 0
    results = []
    for idea in ideas:
        prompt = f"""
        Original query: {query}
        Generated idea: {idea}
        Is this idea relevant and applicable to the original query?
        Rate: 1 (nonsense/irrelevant), 2 (weak connection), 3 (relevant)
        Return JSON: {{"score": N, "reason": "brief explanation"}}
        """
        result = llm_judge(prompt, model=judge_model)
        results.append(result)
        if result['score'] == 1:
            nonsense_count += 1
        elif result['score'] >= 2:
            relevant_count += 1
    return {
        'relevance_rate': relevant_count / len(ideas),
        'nonsense_rate': nonsense_count / len(ideas),
        'details': results
    }
 ```
 #### 5.3.2 Semantic Distance Threshold Analysis
 ```python
 def analyze_distance_threshold(query: str, ideas: List[str], embedding_model: str) -> dict:
    """
    Analyze which ideas exceed a "too far" semantic distance threshold.
    Ideas beyond threshold may be creative OR nonsensical.
    """
    query_emb = get_embedding(query, model=embedding_model)
    idea_embs = get_embeddings(ideas, model=embedding_model)
    distances = [1 - cosine_similarity(query_emb, e) for e in idea_embs]
    # Define thresholds (to be calibrated)
    CREATIVE_THRESHOLD = 0.6  # Ideas this far are "creative"
    NONSENSE_THRESHOLD = 0.85  # Ideas this far may be "nonsense"
    return {
        'creative_zone': sum(1 for d in distances if CREATIVE_THRESHOLD <= d < NONSENSE_THRESHOLD),
        'potential_nonsense': sum(1 for d in distances if d >= NONSENSE_THRESHOLD),
        'safe_zone': sum(1 for d in distances if d < CREATIVE_THRESHOLD),
        'distance_distribution': distances
    }
 ```
 ### 5.4 Metrics Summary Table
 | Metric | Formula | Interpretation |
 |--------|---------|----------------|
@@ -261,6 +335,18 @@ def compute_patent_novelty(ideas: List[str], query: str) -> dict:
 | **Query Distance** | 1 - cos_sim(query, idea) | Higher = farther from original |
 | **Patent Novelty Rate** | 1 - (matches / total) | Higher = more novel |
 ### 5.5 Nonsense/Hallucination Analysis (RQ6) - Three Methods
 | Method | Metric | How it works | Pros/Cons |
 |--------|--------|--------------|-----------|
 | **Automatic** | Semantic Distance Threshold | Ideas with distance > 0.85 flagged as "potential nonsense" | Fast, cheap; May miss contextual nonsense |
 | **LLM-as-Judge** | Relevance Score (1-3) | GPT-4 rates if idea is relevant to original query | Moderate cost; Good balance |
 | **Human Evaluation** | Relevance Rating (1-7 Likert) | Humans rate coherence/relevance | Gold standard; Most expensive |
 **Triangulation**: Compare all three methods to validate findings:
 - If automatic + LLM + human agree → high confidence
 - If they disagree → investigate why (interesting edge cases)
 ---
 ## 6. Human Evaluation Protocol
@@ -306,6 +392,22 @@ How creative is this idea overall?
 7 = Extremely creative
 ```
 #### 6.2.4 Relevance/Coherence (7-point Likert) - For RQ6
 ```
 How relevant and coherent is this idea to the original query?
 1 = Nonsense/completely irrelevant (no logical connection)
 2 = Very weak connection (hard to see relevance)
 3 = Weak connection (requires stretch to see relevance)
 4 = Moderate connection (somewhat relevant)
 5 = Good connection (clearly relevant)
 6 = Strong connection (directly applicable)
 7 = Perfect fit (highly relevant and coherent)
 ```
 **Note**: This scale specifically measures the "cost" of context-free generation.
 - Ideas with high novelty but low relevance (1-3) = potential hallucination
 - Ideas with high novelty AND high relevance (5-7) = successful creative leap
 ### 6.3 Procedure
 1. **Introduction** (5 min)
@@ -361,21 +463,27 @@ For each query Q in QuerySet:
    For each condition C in Conditions:
        If C == "Direct":
            # No attributes, no experts
            ideas = direct_llm_generation(Q, n=20)
-        Elif C == "Single-Expert":
+        Elif C == "Expert-Only":
-            expert = generate_expert(Q, n=1)
+            # No attributes, with experts
            ideas = expert_transformation(Q, expert, ideas_per_expert=20)
        Elif C == "Multi-Expert-4":
            experts = generate_experts(Q, n=4)
-            ideas = expert_transformation(Q, experts, ideas_per_expert=5)
+            ideas = expert_generation_whole_query(Q, experts, ideas_per_expert=5)
-        Elif C == "Multi-Expert-8":
+        Elif C == "Attribute-Only":
-            experts = generate_experts(Q, n=8)
+            # With attributes, no experts
-            ideas = expert_transformation(Q, experts, ideas_per_expert=2-3)
+            attributes = decompose_attributes(Q)
            ideas = direct_generation_per_attribute(Q, attributes, ideas_per_attr=5)
        Elif C == "Full-Pipeline":
            # With attributes, with experts
            attributes = decompose_attributes(Q)
            experts = generate_experts(Q, n=4)
            ideas = expert_transformation(Q, attributes, experts, ideas_per_combo=1-2)
        Elif C == "Random-Perspective":
            # Control: random words instead of experts
            perspectives = random.sample(RANDOM_WORDS, 4)
            ideas = perspective_generation(Q, perspectives, ideas_per=5)
@@ -469,20 +577,34 @@ Plot: Expert count vs diversity curve
 ## 9. Expected Results & Hypotheses
-### 9.1 Primary Hypotheses
+### 9.1 Primary Hypotheses (2×2 Factorial)
 | Hypothesis | Prediction | Metric |
 |------------|------------|--------|
-| **H1** | Multi-Expert-4 > Single-Expert > Direct | Semantic diversity |
+| **H1: Main Effect of Attributes** | Attribute-Only > Direct | Semantic diversity |
-| **H2** | Multi-Expert-8 ≈ Multi-Expert-4 (diminishing returns) | Semantic diversity |
+| **H2: Main Effect of Experts** | Expert-Only > Direct | Semantic diversity |
-| **H3** | Multi-Expert > Direct | Patent novelty rate |
+| **H3: Interaction Effect** | Full Pipeline > (Attribute-Only + Expert-Only - Direct) | Semantic diversity |
-| **H4** | LLM experts > Curated > DBpedia | Unconventionality |
+| **H4: Novelty** | Full Pipeline > all other conditions | Patent novelty rate |
-| **H5** | With attributes > Without attributes | Overall diversity |
+| **H5: Expert vs Random** | Expert-Only > Random-Perspective | Validates expert knowledge matters |
 | **H6: Novelty-Usefulness Tradeoff** | Full Pipeline has higher nonsense rate than Direct, but acceptable (<20%) | Nonsense rate |
-### 9.2 Expected Effect Sizes
+### 9.2 Expected Pattern
 ```
                    Without Experts    With Experts
                    ---------------    ------------
 Without Attributes    Direct (low)      Expert-Only (medium)
 With Attributes       Attr-Only (medium) Full Pipeline (high)
 ```
 **Expected interaction**: The combination (Full Pipeline) should produce super-additive effects - the benefit of experts is amplified when combined with structured attributes.
 ### 9.3 Expected Effect Sizes
 Based on related work:
- Diversity increase: d = 0.5-0.8 (medium to large)
+- Main effect of attributes: d = 0.3-0.5 (small to medium)
 - Main effect of experts: d = 0.4-0.6 (medium)
 - Interaction effect: d = 0.2-0.4 (small)
 - Patent novelty increase: 20-40% improvement
 - Human creativity rating: d = 0.3-0.5 (small to medium)
--- a/research/literature_review.md
+++ b/research/literature_review.md
@@ -14,7 +14,26 @@ Groups of people tend to generate more diverse ideas than individuals because ea
 PersonaFlow provides multiple perspectives by using LLMs to simulate domain-specific experts. User studies showed it increased the perceived relevance and creativity of ideated research directions and promoted users' critical thinking activities without increasing perceived cognitive load.
-**Gap for our work**: PersonaFlow focuses on research ideation. Our system applies to product/innovation ideation with structured attribute decomposition.
+**Critical Gap - Our Key Differentiation**:
 ```
 PersonaFlow approach:
  Query → Experts → Ideas
  (Experts see the WHOLE query, no problem structure)
 Our approach:
  Query → Attribute Decomposition → (Attributes × Experts) → Ideas
  (Experts see SPECIFIC attributes, systematic coverage)
 ```
 | Limitation of PersonaFlow | Our Solution |
 |---------------------------|--------------|
 | No problem structure | Attribute decomposition structures the problem space |
 | Experts applied to whole query | Experts applied to specific attributes |
 | Cannot test what helps (experts vs structure) | 2×2 factorial isolates each contribution |
 | Implicit/random coverage of idea space | Systematic coverage via attribute × expert matrix |
 **Our unique contribution**: We hypothesize that attribute decomposition **amplifies** expert effectiveness (interaction effect). PersonaFlow cannot test this because they never decomposed the problem.
 ### 1.3 PopBlends: Conceptual Blending with LLMs
 **PopBlends: Strategies for Conceptual Blending with Large Language Models** (CHI 2023)
--- a/research/paper_outline.md
+++ b/research/paper_outline.md
@@ -11,7 +11,7 @@
 ## Abstract (Draft)
-Large Language Models (LLMs) are increasingly used for creative ideation, yet they exhibit a phenomenon we term "semantic gravity" - the tendency to generate outputs clustered around high-probability regions of their training distribution. This limits the novelty and diversity of generated ideas. We propose a multi-expert transformation framework that systematically activates diverse semantic regions by conditioning LLM generation on simulated expert perspectives. Our system decomposes concepts into structured attributes, generates ideas through multiple domain-expert viewpoints, and employs semantic deduplication to ensure genuine diversity. Through experiments comparing multi-expert generation against direct LLM generation and single-expert baselines, we demonstrate that our approach produces ideas with [X]% higher semantic diversity and [Y]% lower patent overlap. We contribute a theoretical framework explaining LLM creativity limitations and an open-source system for innovation ideation.
+Large Language Models (LLMs) are increasingly used for creative ideation, yet they exhibit a phenomenon we term "semantic gravity" - the tendency to generate outputs clustered around high-probability regions of their training distribution. This limits the novelty and diversity of generated ideas. We investigate two complementary strategies to overcome this limitation: (1) **attribute decomposition**, which structures the problem space before creative exploration, and (2) **expert perspective transformation**, which conditions LLM generation on simulated domain-expert viewpoints. Through a 2×2 factorial experiment comparing Direct generation, Expert-Only, Attribute-Only, and Full Pipeline (both factors combined), we demonstrate that each factor independently improves semantic diversity, with the combination producing super-additive effects. Our Full Pipeline achieves [X]% higher semantic diversity and [Y]% lower patent overlap compared to direct generation. We contribute a theoretical framework explaining LLM creativity limitations and an open-source system for innovation ideation.
 ---
@@ -61,8 +61,17 @@ Large Language Models (LLMs) are increasingly used for creative ideation, yet th
 - Evaluation methods (CAT, semantic distance)
 ### 2.5 Positioning Our Work
- Gap: No end-to-end system combining structured decomposition + multi-expert transformation + deduplication
+
- Distinction from PersonaFlow: product innovation focus, attribute structure
+**Key distinction from PersonaFlow (closest related work)**:
 ```
 PersonaFlow:   Query → Experts → Ideas (no problem structure)
 Our approach:  Query → Attributes → (Attributes × Experts) → Ideas
 ```
 - PersonaFlow applies experts to whole query; we apply experts to decomposed attributes
 - PersonaFlow cannot isolate what helps; our 2×2 factorial design tests each factor
 - We hypothesize attribute decomposition **amplifies** expert effectiveness (interaction effect)
 - PersonaFlow showed experts help; we test whether **structuring the problem first** makes experts more effective
 ---
@@ -102,30 +111,41 @@ Large Language Models (LLMs) are increasingly used for creative ideation, yet th
 ## 4. Experiments
 ### 4.1 Research Questions
- RQ1: Does multi-expert generation increase semantic diversity?
+- RQ1: Does attribute decomposition improve semantic diversity?
- RQ2: Does multi-expert generation reduce patent overlap?
+- RQ2: Does expert perspective transformation improve semantic diversity?
- RQ3: What is the optimal number of experts?
+- RQ3: Is there an interaction effect between the two factors?
- RQ4: How do expert sources affect output quality?
+- RQ4: Which combination produces the highest patent novelty?
 - RQ5: How do expert sources (LLM vs Curated vs External) affect quality?
 - RQ6: What is the hallucination/nonsense rate of context-free keyword generation?
 ### 4.1.1 Design Note: Context-Free Keyword Generation
 Our system intentionally excludes the original query during keyword generation:
 - Stage 1: Expert sees attribute only (e.g., "wood" + "accountant"), NOT the query ("chair")
 - Stage 2: Expert applies keyword to original query with context
 - Rationale: Maximize semantic distance for novelty
 - Risk: Some ideas may be too distant (nonsense/hallucination)
 - RQ6 investigates this tradeoff
 ### 4.2 Experimental Setup
 #### 4.2.1 Dataset
- N concepts/queries for ideation
+- 30 queries for ideation (see experimental_protocol.md)
- Selection criteria (diverse domains, complexity levels)
+- Selection criteria: diverse domains, complexity levels
 - Categories: everyday objects, technology/tools, services/systems
-#### 4.2.2 Conditions
+#### 4.2.2 Conditions (2×2 Factorial Design)
-| Condition | Description |
+| Condition | Attributes | Experts | Description |
-|-----------|-------------|
+|-----------|------------|---------|-------------|
-| Baseline | Direct LLM: "Generate 20 creative ideas for X" |
+| **C1: Direct** | ❌ | ❌ | Baseline: "Generate 20 creative ideas for [query]" |
-| Single-Expert | 1 expert × 20 ideas |
+| **C2: Expert-Only** | ❌ | ✅ | Expert personas generate for whole query |
-| Multi-Expert-4 | 4 experts × 5 ideas each |
+| **C3: Attribute-Only** | ✅ | ❌ | Decompose query, direct generate per attribute |
-| Multi-Expert-8 | 8 experts × 2-3 ideas each |
+| **C4: Full Pipeline** | ✅ | ✅ | Decompose query, experts generate per attribute |
-| Random-Perspective | 4 random words as "perspectives" |
+| **C5: Random-Perspective** | ❌ | (random) | Control: 4 random words as "perspectives" |
 #### 4.2.3 Controls
 - Same LLM model (specify version)
 - Same temperature settings
- Same total idea count per condition
+- Same total idea count per condition (20 ideas)
 ### 4.3 Metrics
@@ -142,8 +162,18 @@ Large Language Models (LLMs) are increasingly used for creative ideation, yet th
 - Novelty rating (1-7 Likert)
 - Usefulness rating (1-7 Likert)
 - Creativity rating (1-7 Likert)
 - **Relevance rating (1-7 Likert) - for RQ6**
 - Interrater reliability (Cronbach's alpha)
 #### 4.3.4 Nonsense/Hallucination Analysis (RQ6) - Three Methods
 | Method | Metric | Purpose |
 |--------|--------|---------|
 | Automatic | Semantic distance threshold (>0.85) | Fast screening |
 | LLM-as-Judge | GPT-4 relevance score (1-3) | Scalable evaluation |
 | Human | Relevance rating (1-7 Likert) | Gold standard validation |
 Triangulate all three to validate findings
 ### 4.4 Procedure
 - Idea generation process
 - Evaluation process
@@ -153,27 +183,44 @@ Large Language Models (LLMs) are increasingly used for creative ideation, yet th
 ## 5. Results
-### 5.1 Semantic Diversity (RQ1)
+### 5.1 Main Effect of Attribute Decomposition (RQ1)
 - Compare: (Attribute-Only + Full Pipeline) vs (Direct + Expert-Only)
 - Quantitative results
- Visualization (t-SNE/UMAP of idea embeddings)
+- Statistical significance (ANOVA main effect)
 - Statistical significance tests
-### 5.2 Patent Novelty (RQ2)
+### 5.2 Main Effect of Expert Perspectives (RQ2)
 - Compare: (Expert-Only + Full Pipeline) vs (Direct + Attribute-Only)
 - Quantitative results
 - Statistical significance (ANOVA main effect)
 ### 5.3 Interaction Effect (RQ3)
 - 2×2 interaction analysis
 - Visualization: interaction plot
 - Evidence for super-additive vs additive effects
 ### 5.4 Patent Novelty (RQ4)
 - Overlap rates by condition
 - Full Pipeline vs other conditions
 - Examples of high-novelty ideas
-### 5.3 Expert Count Analysis (RQ3)
+### 5.5 Expert Source Comparison (RQ5)
- Diversity vs. expert count curve
+- LLM-generated vs curated vs external
 - Diminishing returns analysis
 - Optimal expert count recommendation
 ### 5.4 Expert Source Comparison (RQ4)
 - LLM-generated vs. curated vs. random
 - Unconventionality metrics
 - Within Expert=With conditions only
-### 5.5 Human Evaluation Results
+### 5.6 Control Condition Analysis
- Rating distributions
+- Expert-Only vs Random-Perspective
- Condition comparisons
+- Validates expert knowledge matters
 ### 5.7 Hallucination/Nonsense Analysis (RQ6)
 - Nonsense rate by condition (LLM-as-judge)
 - Semantic distance threshold analysis
 - Novelty-usefulness tradeoff visualization
 - Is the context-free design worth the hallucination cost?
 ### 5.8 Human Evaluation Results
 - Rating distributions by condition
 - 2×2 pattern in human judgments
 - Correlation with automatic metrics
 ---
@@ -181,14 +228,14 @@ Large Language Models (LLMs) are increasingly used for creative ideation, yet th
 ## 6. Discussion
 ### 6.1 Interpreting the Results
- Why multi-expert works
+- Why each factor contributes independently
- The role of structured decomposition
+- The interaction: why attributes amplify expert effectiveness
- Deduplication importance
+- Theoretical explanation via conceptual blending
 ### 6.2 Theoretical Implications
 - Semantic gravity as framework for LLM creativity
- Expert perspectives as productive constraints
+- Two complementary escape mechanisms
- Inner crowd wisdom
+- Structured decomposition as "scaffolding" for creative exploration
 ### 6.3 Practical Implications
 - When to use multi-expert approach
--- a/research/research_report.md
+++ b/research/research_report.md
@@ -0,0 +1,472 @@
 ---
 marp: true
 theme: default
 paginate: true
 size: 16:9
 style: |
  section {
    font-size: 24px;
  }
  h1 {
    color: #2563eb;
  }
  h2 {
    color: #1e40af;
  }
  table {
    font-size: 20px;
  }
  .columns {
    display: grid;
    grid-template-columns: 1fr 1fr;
    gap: 1rem;
  }
 ---
 # Breaking Semantic Gravity
 ## Expert-Augmented LLM Ideation for Enhanced Creativity
 **Research Progress Report**
 January 2026
 ---
 # Agenda
 1. Research Problem & Motivation
 2. Theoretical Framework: "Semantic Gravity"
 3. Proposed Solution: Expert-Augmented Ideation
 4. Experimental Design
 5. Implementation Progress
 6. Timeline & Next Steps
 ---
 # 1. Research Problem
 ## The Myth, Problem and Myth of LLM Creativity
 **Myth**: LLMs enable infinite idea generation for creative tasks
 **Problem**: Generated ideas lack **diversity** and **novelty**
 - Ideas cluster around high-probability training distributions
 - Limited exploration of distant conceptual spaces
 - "Creative" outputs are **interpolations**, not **extrapolations**
 ---
 # The "Semantic Gravity" Phenomenon
 ```
 Direct LLM Generation:
  Input: "Generate creative ideas for a chair"
  Result:
    - "Ergonomic office chair"      (high probability)
    - "Foldable portable chair"     (high probability)
    - "Eco-friendly bamboo chair"   (moderate probability)
  Problem:
    → Ideas cluster in predictable semantic neighborhoods
    → Limited exploration of distant conceptual spaces
 ```
 ---
 # Why Does Semantic Gravity Occur?
 | Factor | Description |
 |--------|-------------|
 | **Statistical Pattern Learning** | LLMs learn co-occurrence patterns from training data |
 | **Model Collapse** (再看看) | Sampling from "creative ideas" distribution seen in training |
 | **Relevance Trap** (再看看) | Strong associations dominate weak ones |
 | **Domain Bias** | Outputs gravitate toward category prototypes |
 ---
 # 2. Theoretical Framework
 ## Three Key Foundations
 1. **Semantic Distance Theory** (Mednick, 1962)
   - Creativity correlates with conceptual "jump" distance
 2. **Conceptual Blending Theory** (Fauconnier & Turner, 2002)
   - Creative products emerge from blending input spaces
 3. **Design Fixation** (Jansson & Smith, 1991)
   - Blind adherence to initial ideas limits creativity
 ---
 # Semantic Distance in Action
 ```
 Without Expert:
  "Chair" → furniture, sitting, comfort, design
  Semantic distance: SHORT
 With Marine Biologist Expert:
  "Chair" → underwater pressure, coral structure, buoyancy
  Semantic distance: LONG
 Result: Novel ideas like "pressure-adaptive seating"
 ```
 **Key Insight**: Expert perspectives force semantic jumps that LLMs wouldn't naturally make.
 ---
 # 3. Proposed Solution
 ## Expert-Augmented LLM Ideation Pipeline
 ```
 ┌──────────────┐   ┌──────────────┐   ┌──────────────┐
 │   Attribute  │ → │    Expert    │ → │    Expert    │
 │ Decomposition│   │  Generation  │   │Transformation│
 └──────────────┘   └──────────────┘   └──────────────┘
                                              │
                                              ▼
                   ┌──────────────┐   ┌──────────────┐
                   │   Novelty    │ ← │ Deduplication│
                   │  Validation  │   │              │
                   └──────────────┘   └──────────────┘
 ```
 ---
 # From "Wisdom of Crowds" to "Inner Crowd"
 **Traditional Crowd**:
 - Person 1 → Ideas from perspective 1
 - Person 2 → Ideas from perspective 2
 - Aggregation → Diverse idea pool
 **Our "Inner Crowd"**:
 - LLM + Expert 1 Persona → Ideas from perspective 1
 - LLM + Expert 2 Persona → Ideas from perspective 2
 - Aggregation → Diverse idea pool (simulated crowd)
 ---
 # Expert Sources
 | Source | Description | Coverage |
 |--------|-------------|----------|
 | **LLM-Generated** | Query-specific, prioritizes unconventional | Flexible |
 | **Curated** | 210 pre-selected high-quality occupations | Controlled |
 | **DBpedia** | 2,164 occupations from database | Broad |
 Note: use the domain list (嘗試加入杜威分類法兩層? Future work?  )
 ---
 # 4. Research Questions (2×2 Factorial Design)
 | ID | Research Question |
 |----|-------------------|
 | **RQ1** | Does attribute decomposition improve semantic diversity? |
 | **RQ2** | Does expert perspective transformation improve semantic diversity? |
 | **RQ3** | Is there an interaction effect between the two factors? |
 | **RQ4** | Which combination produces the highest patent novelty? |
 | **RQ5** | How do expert sources (LLM vs Curated vs External) affect quality? |
 | **RQ6** | What is the hallucination/nonsense rate of context-free generation? |
 ---
 # Design Choice: Context-Free Keyword Generation
 Our system intentionally excludes the original query during keyword generation:
 ```
 Stage 1 (Keyword):     Expert sees "木質" (wood) + "會計師" (accountant)
                       Expert does NOT see "椅子" (chair)
                       → Generates: "資金流動" (cash flow)
 Stage 2 (Description): Expert sees "椅子" + "資金流動"
                       → Applies keyword to original query
 ```
 **Rationale**: Forces maximum semantic distance for novelty
 **Risk**: Some keywords may be too distant → nonsense/hallucination
 **RQ6**: Measure this tradeoff
 ---
 # The Semantic Distance Tradeoff
 ```
 Too Close                 Optimal Zone                   Too Far
 (Semantic Gravity)        (Creative)                     (Hallucination)
 ├─────────────────────────┼──────────────────────────────┼─────────────────────────┤
 "Ergonomic office chair"  "Pressure-adaptive seating"    "Quantum chair consciousness"
 High usefulness           High novelty + useful          High novelty, nonsense
 Low novelty                                              Low usefulness
 ```
 **H6**: Full Pipeline has higher nonsense rate than Direct, but acceptable (<20%)
 ---
 # Measuring Nonsense/Hallucination (RQ6) - Three Methods
 | Method | Metric | Pros | Cons |
 |--------|--------|------|------|
 | **Automatic** | Semantic distance > 0.85 | Fast, cheap | May miss contextual nonsense |
 | **LLM-as-Judge** | GPT-4 relevance score (1-3) | Moderate cost, scalable | Potential LLM bias |
 | **Human Evaluation** | Relevance rating (1-7 Likert) | Gold standard | Expensive, slow |
 **Triangulation**: Compare all three methods
 - Agreement → high confidence in nonsense detection
 - Disagreement → interesting edge cases to analyze
 ---
 # Core Hypotheses (2×2 Factorial)
 | Hypothesis | Prediction | Metric |
 |------------|------------|--------|
 | **H1: Attributes** | (Attr-Only + Full) > (Direct + Expert-Only) | Semantic diversity |
 | **H2: Experts** | (Expert-Only + Full) > (Direct + Attr-Only) | Semantic diversity |
 | **H3: Interaction** | Full > (Attr-Only + Expert-Only - Direct) | Super-additive effect |
 | **H4: Novelty** | Full Pipeline > all others | Patent novelty rate |
 | **H5: Control** | Expert-Only > Random-Perspective | Validates expert knowledge |
 | **H6: Tradeoff** | Full Pipeline nonsense rate < 20% | Nonsense rate |
 ---
 # Experimental Conditions (2×2 Factorial)
 | Condition | Attributes | Experts | Description |
 |-----------|------------|---------|-------------|
 | **C1: Direct** | ❌ | ❌ | Baseline: "Generate 20 ideas for [query]" |
 | **C2: Expert-Only** | ❌ | ✅ | Expert personas generate for whole query |
 | **C3: Attribute-Only** | ✅ | ❌ | Decompose query, direct generate per attribute |
 | **C4: Full Pipeline** | ✅ | ✅ | Decompose query, experts generate per attribute |
 | **C5: Random-Perspective** | ❌ | (random) | Control: random words as "perspectives" |
 ---
 # Expected 2×2 Pattern
 ```
                      Without Experts       With Experts
                      ---------------       ------------
 Without Attributes    Direct (low)          Expert-Only (medium)
 With Attributes       Attr-Only (medium)    Full Pipeline (high)
 ```
 **Key prediction**: The combination (Full Pipeline) produces **super-additive** effects
 - Experts are more effective when given structured attributes to transform
 - The interaction term should be statistically significant
 ---
 # Query Dataset (30 Queries)
 **Category A: Everyday Objects (10)**
 - Chair, Umbrella, Backpack, Coffee mug, Bicycle...
 **Category B: Technology & Tools (10)**
 - Solar panel, Electric vehicle, 3D printer, Drone...
 **Category C: Services & Systems (10)**
 - Food delivery, Online education, Healthcare appointment...
 **Total**: 30 queries × 5 conditions (4 factorial + 1 control) × 20 ideas = **3,000 ideas**
 ---
 # Metrics: Stastic Evaluation
 | Metric | Formula | Interpretation |
 |--------|---------|----------------|
 | **Mean Pairwise Distance** | avg(1 - cos_sim(i, j)) | Higher = more diverse |
 | **Silhouette Score** | Cluster cohesion vs separation | Higher = clearer clusters |
 | **Query Distance** | 1 - cos_sim(query, idea) | Higher = farther from original |
 | **Patent Novelty Rate** | 1 - (matches / total) | Higher = more novel |
 ---
 # Metrics: Human Evaluation
 **Participants**: 60 evaluators (Prolific/MTurk)
 **Rating Scales** (7-point Likert):
 - **Novelty**: How novel/surprising is this idea?
 - **Usefulness**: How practical is this idea?
 - **Creativity**: How creative is this idea overall?
 - **Relevance**: How relevant/coherent is this idea to the query? **(RQ6)**
 - Nonsense ? 
 **Quality Control**:
 - Attention checks, completion time monitoring
 - Inter-rater reliability (Cronbach's α > 0.7)
 ---
 # What is Prolific/MTurk?
 Online platforms for recruiting human participants for research studies.
 | Platform | Description | Best For |
 |----------|-------------|----------|
 | **Prolific** | Academic-focused crowdsourcing | Research studies (higher quality) |
 | **MTurk** | Amazon Mechanical Turk | Large-scale tasks (lower cost) |
 **How it works for our study**:
 1. Upload 600 ideas to evaluate (subset of generated ideas)
 2. Recruit 60 participants (~$8-15/hour compensation)
 3. Each participant rates ~30 ideas (novelty, usefulness, creativity)
 4. Download ratings → statistical analysis
 **Cost estimate**: 60 participants × 30 min × $12/hr = ~$360
 ---
 # Alternative: LLM-as-Judge
 If human evaluation is too expensive or time-consuming:
 | Approach | Pros | Cons |
 |----------|------|------|
 | **Human (Prolific/MTurk)** | Gold standard, publishable | Cost, time, IRB approval |
 | **LLM-as-Judge (GPT-4)** | Fast, cheap, reproducible | Less rigorous, potential bias |
 | **Automatic metrics only** | No human cost | Missing subjective quality |
 **Recommendation**: Start with automatic metrics, add human evaluation for final paper submission.
 ---
 # 5. Implementation Status
 ## System Components (Implemented)
 - Attribute decomposition pipeline
 - Expert team generation (LLM, Curated, DBpedia sources)
 - Expert transformation with parallel processing
 - Semantic deduplication (embedding + LLM methods)
 - Patent search integration
 - Web-based visualization interface
 ---
 # Implementation Checklist
 ### Experiment Scripts (To Do)
 - [ ] `experiments/generate_ideas.py` - Idea generation
 - [ ] `experiments/compute_metrics.py` - Automatic metrics
 - [ ] `experiments/export_for_evaluation.py` - Human evaluation prep
 - [ ] `experiments/analyze_results.py` - Statistical analysis
 - [ ] `experiments/visualize.py` - Generate figures
 ---
 # 6. Timeline
 | Phase | Activity |
 |-------|----------|
 | **Phase 1** | Implement idea generation scripts |
 | **Phase 2** | Generate all ideas (5 conditions × 30 queries) |
 | **Phase 3** | Compute automatic metrics |
 | **Phase 4** | Design and pilot human evaluation |
 | **Phase 5** | Run human evaluation (60 participants) |
 | **Phase 6** | Analyze results and write paper |
 ---
 # Target Venues
 ### Tier 1 (Recommended)
 - **CHI** - ACM Conference on Human Factors (Sept deadline)
 - **CSCW** - Computer-Supported Cooperative Work (Apr/Jan deadline)
 - **Creativity & Cognition** - Specialized computational creativity
 ### Journal Options
 - **IJHCS** - International Journal of Human-Computer Studies
 - **TOCHI** - ACM Transactions on CHI
 ---
 # Key Contributions
 1. **Theoretical**: "Semantic gravity" framework + two-factor solution
 2. **Methodological**: 2×2 factorial design isolates attribute vs expert contributions
 3. **Empirical**: Quantitative evidence for interaction effects in LLM creativity
 4. **Practical**: Open-source system with both factors for maximum diversity
 ---
 # Key Differentiator vs PersonaFlow
 ```
 PersonaFlow (2024):   Query → Experts → Ideas
                      (Experts see WHOLE query, no structure)
 Our Approach:         Query → Attributes → (Attributes × Experts) → Ideas
                      (Experts see SPECIFIC attributes, systematic)
 ```
 **What we can answer that PersonaFlow cannot:**
 1. Does problem structure alone help? (Attribute-Only vs Direct)
 2. Do experts help beyond structure? (Full vs Attribute-Only)
 3. Is there an interaction effect? (amplification hypothesis)
 ---
 # Related Work Comparison
 | Approach | Limitation | Our Advantage |
 |----------|------------|---------------|
 | Direct LLM | Semantic gravity | Two-factor enhancement |
 | **PersonaFlow** | **No problem structure** | **Attribute decomposition amplifies experts** |
 | PopBlends | Two-concept only | Systematic attribute × expert matrix |
 | BILLY | Cannot isolate factors | 2×2 factorial isolates contributions |
 ---
 # References (Key Papers)
 1. Siangliulue et al. (2017) - Wisdom of Crowds via Role Assumption
 2. Liu et al. (2024) - PersonaFlow: LLM-Simulated Expert Perspectives
 3. Choi et al. (2023) - PopBlends: Conceptual Blending with LLMs
 4. Wadinambiarachchi et al. (2024) - Effects of Generative AI on Design Fixation
 5. Mednick (1962) - Semantic Distance Theory
 6. Fauconnier & Turner (2002) - Conceptual Blending Theory
 *Full reference list: 55+ papers in `research/references.md`*
 ---
 # Questions & Discussion
 ## Next Steps
 1. Finalize experimental design details
 2. Implement experiment scripts
 3. Collect pilot data for validation
 4. Submit IRB for human evaluation (if needed)
 ---
 # Thank You
 **Project Repository**: novelty-seeking
 **Research Materials**:
 - `research/literature_review.md`
 - `research/theoretical_framework.md`
 - `research/experimental_protocol.md`
 - `research/paper_outline.md`
 - `research/references.md`
--- a/research/theoretical_framework.md
+++ b/research/theoretical_framework.md
@@ -59,6 +59,27 @@ With Marine Biologist Expert:
 Result: Novel ideas like "pressure-adaptive seating" or "coral-inspired structural support"
 ```
 #### The Semantic Distance Tradeoff
 However, semantic distance is not always beneficial. There exists a tradeoff:
 ```
 Semantic Distance Spectrum:
 Too Close                    Optimal Zone                    Too Far
 (Semantic Gravity)           (Creative)                      (Hallucination)
 ├────────────────────────────┼────────────────────────────────┼────────────────────────────┤
 "Ergonomic office chair"     "Pressure-adaptive seating"     "Quantum-entangled
                             "Coral-inspired support"         chair consciousness"
 High usefulness              High novelty + useful            High novelty, nonsense
 Low novelty                                                   Low usefulness
 ```
 **Our Design Choice**: Context-free keyword generation (Stage 1 excludes original query) intentionally pushes toward the "far" end to maximize novelty. Stage 2 re-introduces query context to ground the ideas.
 **Research Question**: What is the hallucination/nonsense rate of this approach, and is the tradeoff worthwhile?
 #### 2. Conceptual Blending Theory (Fauconnier & Turner, 2002)
 > "Creative products emerge from blending elements of two input spaces into a novel integrated space."
@@ -136,12 +157,22 @@ Our "Inner Crowd":
  Aggregation → Diverse idea pool (simulated crowd)
 ```
-### Why Multiple Experts Work
+### Why This Approach Works: Two Complementary Mechanisms
-1. **Coverage**: Different experts activate different semantic regions
+**Factor 1: Attribute Decomposition**
-2. **Redundancy Reduction**: Deduplication removes overlapping ideas
+- Structures the problem space before creative exploration
-3. **Diversity by Design**: Expert selection can be optimized for maximum diversity
+- Prevents premature fixation on holistic solutions
-4. **Diminishing Returns**: Beyond ~4-6 experts, marginal diversity gains decrease
+- Ensures coverage across different aspects of the target concept
 **Factor 2: Expert Perspectives**
 - Different experts activate different semantic regions
 - Forces semantic jumps that LLMs wouldn't naturally make
 - Each expert provides a distinct input space for conceptual blending
 **Combined Effect (Interaction)**
 - Experts are more effective when given structured attributes to transform
 - Attributes without expert perspectives still generate predictable ideas
 - The combination creates systematic exploration of remote conceptual spaces
 ---
@@ -231,32 +262,43 @@ Output:
 ---
-## Testable Hypotheses
+## Testable Hypotheses (2×2 Factorial Design)
-### H1: Semantic Diversity
+Our experimental design manipulates two independent factors:
-> Multi-expert generation produces higher semantic diversity than single-expert or direct generation.
+1. **Attribute Decomposition**: With / Without
 2. **Expert Perspectives**: With / Without
 ### H1: Main Effect of Attribute Decomposition
 > Conditions with attribute decomposition produce higher semantic diversity than those without.
 **Prediction**: (Attribute-Only + Full Pipeline) > (Direct + Expert-Only)
 **Measurement**: Mean pairwise cosine distance between idea embeddings
-### H2: Novelty
+### H2: Main Effect of Expert Perspectives
-> Ideas from multi-expert generation have lower patent overlap than direct generation.
+> Conditions with expert perspectives produce higher semantic diversity than those without.
-**Measurement**: Percentage of ideas with existing patent matches
+**Prediction**: (Expert-Only + Full Pipeline) > (Direct + Attribute-Only)
 **Measurement**: Mean pairwise cosine distance between idea embeddings
-### H3: Expert Count Effect
+### H3: Interaction Effect
-> Semantic diversity increases with expert count, with diminishing returns beyond 4-6 experts.
+> The combination of attributes and experts produces super-additive benefits.
-**Measurement**: Diversity vs. expert count curve
+**Prediction**: Full Pipeline > (Attribute-Only + Expert-Only - Direct)
 **Rationale**: Experts are more effective when given structured problem decomposition to work with.
 **Measurement**: Interaction term in 2×2 ANOVA
-### H4: Expert Source Effect
+### H4: Novelty
-> LLM-generated experts produce more unconventional ideas than curated/database experts.
+> The Full Pipeline produces ideas with lowest patent overlap.
-**Measurement**: Semantic distance from query centroid
+**Prediction**: Full Pipeline has highest novelty rate across all conditions
 **Measurement**: Percentage of ideas without existing patent matches
-### H5: Fixation Breaking
+### H5: Expert vs Random Control
-> Multi-expert approach produces more ideas outside the top-3 semantic clusters than direct generation.
+> Expert perspectives outperform random word perspectives.
-**Measurement**: Cluster distribution analysis
+**Prediction**: Expert-Only > Random-Perspective
 **Rationale**: Validates that domain knowledge (not just any perspective shift) drives improvement
 **Measurement**: Semantic diversity and human creativity ratings
 ---
@@ -271,10 +313,29 @@ Output:
 ## Positioning Against Related Work
 ### Key Differentiator: Attribute Decomposition
 ```
 PersonaFlow (2024):        Query → Experts → Ideas
 Our Approach:              Query → Attributes → (Attributes × Experts) → Ideas
 ```
 **Why this matters**: Attribute decomposition provides **scaffolding** that makes expert perspectives more effective. An expert seeing "chair materials" generates more focused ideas than an expert seeing just "chair."
 ### Comparison Table
 | Approach | Limitation | Our Advantage |
 |----------|------------|---------------|
-| Direct LLM generation | Semantic gravity, fixation | Expert-forced semantic jumps |
+| Direct LLM generation | Semantic gravity, fixation | Two-factor enhancement (attributes + experts) |
-| Human brainstorming | Cognitive fatigue, social dynamics | Tireless LLM generation |
+| **PersonaFlow (2024)** | **No problem structure, experts see whole query** | **Attribute decomposition amplifies expert effect** |
-| PersonaFlow (2024) | Research-focused, no attribute structure | Product innovation, structured decomposition |
+| PopBlends (2023) | Two-concept blending only | Systematic attribute × expert exploration |
-| PopBlends (2023) | Two-concept blending only | Multi-expert, multi-attribute blending |
+| BILLY (2025) | Cannot isolate what helps | 2×2 factorial design isolates contributions |
-| BILLY (2025) | Vector fusion less interpretable | Sequential generation, explicit control |
+| Persona prompting alone | Random coverage | Systematic coverage via attribute × expert matrix |
 ### What We Can Answer That PersonaFlow Cannot
 1. **Does problem structure alone help?** (Attribute-Only vs Direct)
 2. **Do experts help beyond structure?** (Full Pipeline vs Attribute-Only)
 3. **Is there an interaction effect?** (Full Pipeline > Attribute-Only + Expert-Only - Direct)
 PersonaFlow showed experts help, but never tested whether **structuring the problem first** makes experts more effective.
--- a/start.sh
+++ b/start.sh
@@ -37,7 +37,7 @@ source venv/bin/activate
 pip install -r requirements.txt -q
 # Start uvicorn in background
-uvicorn app.main:app --host 0.0.0.0 --port 8000 &
+uvicorn app.main:app --host 0.0.0.0 --port 8001 &
 BACKEND_PID=$!
 echo "Backend PID: $BACKEND_PID"
@@ -65,8 +65,8 @@ echo -e "${GREEN}========================================${NC}"
 echo -e "${GREEN}Attribute Agent is running!${NC}"
 echo -e "${GREEN}========================================${NC}"
 echo ""
-echo -e "Backend:  ${YELLOW}http://localhost:8000${NC}"
+echo -e "Backend:  ${YELLOW}http://localhost:8001${NC}"
 echo -e "Frontend: ${YELLOW}http://localhost:5173${NC}"
-echo -e "API Docs: ${YELLOW}http://localhost:8000/docs${NC}"
+echo -e "API Docs: ${YELLOW}http://localhost:8001/docs${NC}"
 echo ""
 echo -e "Run ${YELLOW}./stop.sh${NC} to stop all services"