"""Patent Search Service using Lens.org API""" import httpx import logging from typing import List, Optional, Dict, Any from dataclasses import dataclass, asdict from app.config import settings logger = logging.getLogger(__name__) @dataclass class PatentSearchResult: """Single patent search result from Lens.org""" lens_id: str doc_number: str jurisdiction: str kind: str title: str abstract: Optional[str] date_published: Optional[str] applicants: List[str] inventors: List[str] legal_status: Optional[str] classifications_cpc: List[str] families_simple: List[str] url: str def to_dict(self) -> Dict[str, Any]: return asdict(self) class PatentSearchService: """Service for searching patents using Lens.org API""" LENS_API_URL = "https://api.lens.org/patent/search" def __init__(self): self._client: Optional[httpx.AsyncClient] = None async def _get_client(self) -> httpx.AsyncClient: if self._client is None or self._client.is_closed: self._client = httpx.AsyncClient( timeout=30.0, follow_redirects=True, ) return self._client async def close(self): if self._client and not self._client.is_closed: await self._client.aclose() def _get_headers(self) -> Dict[str, str]: """Get headers with authorization token""" token = settings.lens_api_token if not token: raise ValueError("LENS_API_TOKEN environment variable is not set") return { "Authorization": f"Bearer {token}", "Content-Type": "application/json", "Accept": "application/json", } async def search( self, query: str, max_results: int = 10, ) -> dict: """ Search Lens.org for relevant patents Args: query: Search query (searches title, abstract, and claims) max_results: Maximum number of results to return Returns: Dict with total_results count and list of patent results """ try: client = await self._get_client() # Build Lens.org query using query string format for full-text search request_body = { "query": query, "size": max_results, "sort": [{"_score": "desc"}] } logger.info(f"Searching Lens.org patents with query: {query[:100]}...") response = await client.post( self.LENS_API_URL, json=request_body, headers=self._get_headers(), ) if response.status_code == 401: logger.error("Lens.org API authentication failed - check LENS_API_TOKEN") return { "total_results": 0, "patents": [], "error": "Authentication failed - invalid API token" } if response.status_code == 429: logger.warning("Lens.org API rate limit exceeded") return { "total_results": 0, "patents": [], "error": "Rate limit exceeded - please try again later" } if response.status_code != 200: logger.error(f"Lens.org API returned status {response.status_code}: {response.text}") return { "total_results": 0, "patents": [], "error": f"API returned status {response.status_code}" } data = response.json() total_results = data.get("total", 0) results = data.get("data", []) patents: List[PatentSearchResult] = [] for item in results: patent = self._parse_patent(item) patents.append(patent) logger.info(f"Found {total_results} total patents, returning {len(patents)}") return { "total_results": total_results, "patents": [p.to_dict() for p in patents], } except ValueError as e: logger.error(f"Configuration error: {e}") return { "total_results": 0, "patents": [], "error": str(e) } except httpx.HTTPError as e: logger.error(f"HTTP error searching patents: {e}") return { "total_results": 0, "patents": [], "error": str(e) } except Exception as e: logger.error(f"Error searching patents: {e}") return { "total_results": 0, "patents": [], "error": str(e) } def _parse_patent(self, item: Dict[str, Any]) -> PatentSearchResult: """Parse a single patent result from Lens.org response""" lens_id = item.get("lens_id", "") jurisdiction = item.get("jurisdiction", "") doc_number = item.get("doc_number", "") kind = item.get("kind", "") # Get biblio section (contains title, parties, classifications) biblio = item.get("biblio", {}) # Extract title from biblio.invention_title (list with lang info) title_data = biblio.get("invention_title", []) title = self._extract_text_with_lang(title_data) # Extract abstract (top-level, list with lang info) abstract_data = item.get("abstract", []) abstract = self._extract_text_with_lang(abstract_data) # Extract applicants from biblio.parties.applicants parties = biblio.get("parties", {}) applicants = [] applicant_data = parties.get("applicants", []) if isinstance(applicant_data, list): for app in applicant_data: if isinstance(app, dict): name = app.get("extracted_name", {}).get("value", "") if name: applicants.append(name) # Extract inventors from biblio.parties.inventors inventors = [] inventor_data = parties.get("inventors", []) if isinstance(inventor_data, list): for inv in inventor_data: if isinstance(inv, dict): name = inv.get("extracted_name", {}).get("value", "") if name: inventors.append(name) # Extract legal status legal_status_data = item.get("legal_status", {}) legal_status = None if isinstance(legal_status_data, dict): legal_status = legal_status_data.get("patent_status") # Extract CPC classifications from biblio.classifications_cpc classifications_cpc = [] cpc_data = biblio.get("classifications_cpc", []) if isinstance(cpc_data, list): for cpc in cpc_data: if isinstance(cpc, dict): symbol = cpc.get("symbol", "") if symbol: classifications_cpc.append(symbol) # Extract simple family members families_simple = [] families_data = item.get("families", {}) if isinstance(families_data, dict): simple_family = families_data.get("simple", {}) if isinstance(simple_family, dict): members = simple_family.get("members", []) if isinstance(members, list): families_simple = [m.get("lens_id", "") for m in members if isinstance(m, dict) and m.get("lens_id")] # Build URL to Lens.org patent page url = f"https://www.lens.org/lens/patent/{lens_id}" if lens_id else "" return PatentSearchResult( lens_id=lens_id, doc_number=doc_number, jurisdiction=jurisdiction, kind=kind, title=title, abstract=abstract, date_published=item.get("date_published"), applicants=applicants, inventors=inventors, legal_status=legal_status, classifications_cpc=classifications_cpc, families_simple=families_simple, url=url, ) def _extract_text_with_lang(self, data: Any, prefer_lang: str = "en") -> str: """Extract text from Lens.org language-tagged list, preferring specified language""" if not data: return "" if isinstance(data, str): return data if isinstance(data, list) and data: # Prefer specified language for item in data: if isinstance(item, dict) and item.get("lang") == prefer_lang: return item.get("text", "") # Fall back to first item first = data[0] if isinstance(first, dict): return first.get("text", "") return str(first) return "" # Singleton instance patent_search_service = PatentSearchService()