- Improve patent search service with expanded functionality - Update PatentSearchPanel UI component - Add new research_report.md - Update experimental protocol, literature review, paper outline, and theoretical framework Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
265 lines
8.9 KiB
Python
265 lines
8.9 KiB
Python
"""Patent Search Service using Lens.org API"""
|
|
|
|
import httpx
|
|
import logging
|
|
from typing import List, Optional, Dict, Any
|
|
from dataclasses import dataclass, asdict
|
|
|
|
from app.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class PatentSearchResult:
|
|
"""Single patent search result from Lens.org"""
|
|
lens_id: str
|
|
doc_number: str
|
|
jurisdiction: str
|
|
kind: str
|
|
title: str
|
|
abstract: Optional[str]
|
|
date_published: Optional[str]
|
|
applicants: List[str]
|
|
inventors: List[str]
|
|
legal_status: Optional[str]
|
|
classifications_cpc: List[str]
|
|
families_simple: List[str]
|
|
url: str
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return asdict(self)
|
|
|
|
|
|
class PatentSearchService:
|
|
"""Service for searching patents using Lens.org API"""
|
|
|
|
LENS_API_URL = "https://api.lens.org/patent/search"
|
|
|
|
def __init__(self):
|
|
self._client: Optional[httpx.AsyncClient] = None
|
|
|
|
async def _get_client(self) -> httpx.AsyncClient:
|
|
if self._client is None or self._client.is_closed:
|
|
self._client = httpx.AsyncClient(
|
|
timeout=30.0,
|
|
follow_redirects=True,
|
|
)
|
|
return self._client
|
|
|
|
async def close(self):
|
|
if self._client and not self._client.is_closed:
|
|
await self._client.aclose()
|
|
|
|
def _get_headers(self) -> Dict[str, str]:
|
|
"""Get headers with authorization token"""
|
|
token = settings.lens_api_token
|
|
if not token:
|
|
raise ValueError("LENS_API_TOKEN environment variable is not set")
|
|
return {
|
|
"Authorization": f"Bearer {token}",
|
|
"Content-Type": "application/json",
|
|
"Accept": "application/json",
|
|
}
|
|
|
|
async def search(
|
|
self,
|
|
query: str,
|
|
max_results: int = 10,
|
|
) -> dict:
|
|
"""
|
|
Search Lens.org for relevant patents
|
|
|
|
Args:
|
|
query: Search query (searches title, abstract, and claims)
|
|
max_results: Maximum number of results to return
|
|
|
|
Returns:
|
|
Dict with total_results count and list of patent results
|
|
"""
|
|
try:
|
|
client = await self._get_client()
|
|
|
|
# Build Lens.org query using query string format for full-text search
|
|
request_body = {
|
|
"query": query,
|
|
"size": max_results,
|
|
"sort": [{"_score": "desc"}]
|
|
}
|
|
|
|
logger.info(f"Searching Lens.org patents with query: {query[:100]}...")
|
|
|
|
response = await client.post(
|
|
self.LENS_API_URL,
|
|
json=request_body,
|
|
headers=self._get_headers(),
|
|
)
|
|
|
|
if response.status_code == 401:
|
|
logger.error("Lens.org API authentication failed - check LENS_API_TOKEN")
|
|
return {
|
|
"total_results": 0,
|
|
"patents": [],
|
|
"error": "Authentication failed - invalid API token"
|
|
}
|
|
|
|
if response.status_code == 429:
|
|
logger.warning("Lens.org API rate limit exceeded")
|
|
return {
|
|
"total_results": 0,
|
|
"patents": [],
|
|
"error": "Rate limit exceeded - please try again later"
|
|
}
|
|
|
|
if response.status_code != 200:
|
|
logger.error(f"Lens.org API returned status {response.status_code}: {response.text}")
|
|
return {
|
|
"total_results": 0,
|
|
"patents": [],
|
|
"error": f"API returned status {response.status_code}"
|
|
}
|
|
|
|
data = response.json()
|
|
total_results = data.get("total", 0)
|
|
results = data.get("data", [])
|
|
|
|
patents: List[PatentSearchResult] = []
|
|
for item in results:
|
|
patent = self._parse_patent(item)
|
|
patents.append(patent)
|
|
|
|
logger.info(f"Found {total_results} total patents, returning {len(patents)}")
|
|
|
|
return {
|
|
"total_results": total_results,
|
|
"patents": [p.to_dict() for p in patents],
|
|
}
|
|
|
|
except ValueError as e:
|
|
logger.error(f"Configuration error: {e}")
|
|
return {
|
|
"total_results": 0,
|
|
"patents": [],
|
|
"error": str(e)
|
|
}
|
|
except httpx.HTTPError as e:
|
|
logger.error(f"HTTP error searching patents: {e}")
|
|
return {
|
|
"total_results": 0,
|
|
"patents": [],
|
|
"error": str(e)
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error searching patents: {e}")
|
|
return {
|
|
"total_results": 0,
|
|
"patents": [],
|
|
"error": str(e)
|
|
}
|
|
|
|
def _parse_patent(self, item: Dict[str, Any]) -> PatentSearchResult:
|
|
"""Parse a single patent result from Lens.org response"""
|
|
lens_id = item.get("lens_id", "")
|
|
jurisdiction = item.get("jurisdiction", "")
|
|
doc_number = item.get("doc_number", "")
|
|
kind = item.get("kind", "")
|
|
|
|
# Get biblio section (contains title, parties, classifications)
|
|
biblio = item.get("biblio", {})
|
|
|
|
# Extract title from biblio.invention_title (list with lang info)
|
|
title_data = biblio.get("invention_title", [])
|
|
title = self._extract_text_with_lang(title_data)
|
|
|
|
# Extract abstract (top-level, list with lang info)
|
|
abstract_data = item.get("abstract", [])
|
|
abstract = self._extract_text_with_lang(abstract_data)
|
|
|
|
# Extract applicants from biblio.parties.applicants
|
|
parties = biblio.get("parties", {})
|
|
applicants = []
|
|
applicant_data = parties.get("applicants", [])
|
|
if isinstance(applicant_data, list):
|
|
for app in applicant_data:
|
|
if isinstance(app, dict):
|
|
name = app.get("extracted_name", {}).get("value", "")
|
|
if name:
|
|
applicants.append(name)
|
|
|
|
# Extract inventors from biblio.parties.inventors
|
|
inventors = []
|
|
inventor_data = parties.get("inventors", [])
|
|
if isinstance(inventor_data, list):
|
|
for inv in inventor_data:
|
|
if isinstance(inv, dict):
|
|
name = inv.get("extracted_name", {}).get("value", "")
|
|
if name:
|
|
inventors.append(name)
|
|
|
|
# Extract legal status
|
|
legal_status_data = item.get("legal_status", {})
|
|
legal_status = None
|
|
if isinstance(legal_status_data, dict):
|
|
legal_status = legal_status_data.get("patent_status")
|
|
|
|
# Extract CPC classifications from biblio.classifications_cpc
|
|
classifications_cpc = []
|
|
cpc_data = biblio.get("classifications_cpc", [])
|
|
if isinstance(cpc_data, list):
|
|
for cpc in cpc_data:
|
|
if isinstance(cpc, dict):
|
|
symbol = cpc.get("symbol", "")
|
|
if symbol:
|
|
classifications_cpc.append(symbol)
|
|
|
|
# Extract simple family members
|
|
families_simple = []
|
|
families_data = item.get("families", {})
|
|
if isinstance(families_data, dict):
|
|
simple_family = families_data.get("simple", {})
|
|
if isinstance(simple_family, dict):
|
|
members = simple_family.get("members", [])
|
|
if isinstance(members, list):
|
|
families_simple = [m.get("lens_id", "") for m in members if isinstance(m, dict) and m.get("lens_id")]
|
|
|
|
# Build URL to Lens.org patent page
|
|
url = f"https://www.lens.org/lens/patent/{lens_id}" if lens_id else ""
|
|
|
|
return PatentSearchResult(
|
|
lens_id=lens_id,
|
|
doc_number=doc_number,
|
|
jurisdiction=jurisdiction,
|
|
kind=kind,
|
|
title=title,
|
|
abstract=abstract,
|
|
date_published=item.get("date_published"),
|
|
applicants=applicants,
|
|
inventors=inventors,
|
|
legal_status=legal_status,
|
|
classifications_cpc=classifications_cpc,
|
|
families_simple=families_simple,
|
|
url=url,
|
|
)
|
|
|
|
def _extract_text_with_lang(self, data: Any, prefer_lang: str = "en") -> str:
|
|
"""Extract text from Lens.org language-tagged list, preferring specified language"""
|
|
if not data:
|
|
return ""
|
|
if isinstance(data, str):
|
|
return data
|
|
if isinstance(data, list) and data:
|
|
# Prefer specified language
|
|
for item in data:
|
|
if isinstance(item, dict) and item.get("lang") == prefer_lang:
|
|
return item.get("text", "")
|
|
# Fall back to first item
|
|
first = data[0]
|
|
if isinstance(first, dict):
|
|
return first.get("text", "")
|
|
return str(first)
|
|
return ""
|
|
|
|
|
|
# Singleton instance
|
|
patent_search_service = PatentSearchService()
|