""" USPTO Patent Search Uses the USPTO PatentsView API for patent searches. Free, no API key required. API Documentation: https://patentsview.org/apis/api-endpoints """ import logging import requests from typing import List, Optional, Dict, Any from datetime import datetime from ..search.base import BaseSearcher, SearchResult logger = logging.getLogger(__name__) class PatentSearcher(BaseSearcher): """ Search USPTO patent database via PatentsView API. Patents indicate active R&D and can reveal technical approaches to solving capability gaps. """ BASE_URL = "https://api.patentsview.org/patents/query" def __init__(self, timeout: int = 30): self.timeout = timeout @property def name(self) -> str: return "USPTO Patents" @property def source_type(self) -> str: return "patent" def search( self, query: str, max_results: int = 20, year_start: Optional[int] = None, assignee: Optional[str] = None ) -> List[SearchResult]: """ Search USPTO patents. Args: query: Text search (searches title, abstract, claims) max_results: Maximum results year_start: Only patents from this year forward assignee: Filter by assignee/company name Returns: List of SearchResult objects """ # Build query filter query_parts = [{"_text_any": {"patent_abstract": query}}] if year_start: query_parts.append({ "_gte": {"patent_date": f"{year_start}-01-01"} }) if assignee: query_parts.append({ "_text_any": {"assignee_organization": assignee} }) # Combine with AND if len(query_parts) > 1: query_filter = {"_and": query_parts} else: query_filter = query_parts[0] payload = { "q": query_filter, "f": [ "patent_number", "patent_title", "patent_abstract", "patent_date", "assignee_organization", "inventor_first_name", "inventor_last_name" ], "o": {"per_page": min(max_results, 100)}, "s": [{"patent_date": "desc"}] # Most recent first } results = [] try: response = requests.post( self.BASE_URL, json=payload, timeout=self.timeout, headers={"Content-Type": "application/json"} ) response.raise_for_status() data = response.json() patents = data.get("patents", []) for rank, patent in enumerate(patents, 1): if rank > max_results: break patent_number = patent.get("patent_number", "") url = f"https://patents.google.com/patent/US{patent_number}" if patent_number else "" # Get primary assignee assignees = patent.get("assignees", []) org = assignees[0].get("assignee_organization", "") if assignees else "" results.append(SearchResult( title=patent.get("patent_title", ""), url=url, snippet=patent.get("patent_abstract", "")[:500] if patent.get("patent_abstract") else "", source=self.name, source_type=self.source_type, rank=rank, published_date=patent.get("patent_date"), organization=org, patent_number=patent_number, trl_estimate=5, # Patents typically represent mid-TRL tech raw_data=patent )) logger.info(f"Patent search for '{query}' returned {len(results)} results") except requests.exceptions.RequestException as e: logger.error(f"USPTO API request failed: {e}") except Exception as e: logger.error(f"Patent search error: {e}") return results def search_defense_contractors(self, query: str, max_results: int = 20) -> List[SearchResult]: """Search patents from major defense contractors.""" contractors = [ "Lockheed Martin", "Raytheon", "Northrop Grumman", "Boeing", "General Dynamics", "L3Harris", "BAE Systems" ] all_results = [] per_contractor = max(2, max_results // len(contractors)) for contractor in contractors: results = self.search( query, max_results=per_contractor, assignee=contractor ) all_results.extend(results) # Sort by date and limit all_results.sort(key=lambda x: x.published_date or "", reverse=True) return all_results[:max_results] def get_patent_details(self, patent_number: str) -> Optional[Dict[str, Any]]: """Get detailed information about a specific patent.""" payload = { "q": {"patent_number": patent_number}, "f": [ "patent_number", "patent_title", "patent_abstract", "patent_date", "patent_type", "assignee_organization", "assignee_type", "inventor_first_name", "inventor_last_name", "inventor_city", "inventor_state", "cpc_section_id", "cpc_subsection_title" ] } try: response = requests.post( self.BASE_URL, json=payload, timeout=self.timeout, headers={"Content-Type": "application/json"} ) response.raise_for_status() data = response.json() patents = data.get("patents", []) return patents[0] if patents else None except Exception as e: logger.error(f"Failed to get patent details: {e}") return None