204 lines
6.1 KiB
Python
204 lines
6.1 KiB
Python
"""
|
|
USPTO Patent Search
|
|
|
|
Uses the USPTO PatentsView API for patent searches.
|
|
Free, no API key required.
|
|
|
|
API Documentation: https://patentsview.org/apis/api-endpoints
|
|
"""
|
|
|
|
import logging
|
|
import requests
|
|
from typing import List, Optional, Dict, Any
|
|
from datetime import datetime
|
|
|
|
from ..search.base import BaseSearcher, SearchResult
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PatentSearcher(BaseSearcher):
|
|
"""
|
|
Search USPTO patent database via PatentsView API.
|
|
|
|
Patents indicate active R&D and can reveal technical approaches
|
|
to solving capability gaps.
|
|
"""
|
|
|
|
BASE_URL = "https://api.patentsview.org/patents/query"
|
|
|
|
def __init__(self, timeout: int = 30):
|
|
self.timeout = timeout
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "USPTO Patents"
|
|
|
|
@property
|
|
def source_type(self) -> str:
|
|
return "patent"
|
|
|
|
def search(
|
|
self,
|
|
query: str,
|
|
max_results: int = 20,
|
|
year_start: Optional[int] = None,
|
|
assignee: Optional[str] = None
|
|
) -> List[SearchResult]:
|
|
"""
|
|
Search USPTO patents.
|
|
|
|
Args:
|
|
query: Text search (searches title, abstract, claims)
|
|
max_results: Maximum results
|
|
year_start: Only patents from this year forward
|
|
assignee: Filter by assignee/company name
|
|
|
|
Returns:
|
|
List of SearchResult objects
|
|
"""
|
|
# Build query filter
|
|
query_parts = [{"_text_any": {"patent_abstract": query}}]
|
|
|
|
if year_start:
|
|
query_parts.append({
|
|
"_gte": {"patent_date": f"{year_start}-01-01"}
|
|
})
|
|
|
|
if assignee:
|
|
query_parts.append({
|
|
"_text_any": {"assignee_organization": assignee}
|
|
})
|
|
|
|
# Combine with AND
|
|
if len(query_parts) > 1:
|
|
query_filter = {"_and": query_parts}
|
|
else:
|
|
query_filter = query_parts[0]
|
|
|
|
payload = {
|
|
"q": query_filter,
|
|
"f": [
|
|
"patent_number",
|
|
"patent_title",
|
|
"patent_abstract",
|
|
"patent_date",
|
|
"assignee_organization",
|
|
"inventor_first_name",
|
|
"inventor_last_name"
|
|
],
|
|
"o": {"per_page": min(max_results, 100)},
|
|
"s": [{"patent_date": "desc"}] # Most recent first
|
|
}
|
|
|
|
results = []
|
|
|
|
try:
|
|
response = requests.post(
|
|
self.BASE_URL,
|
|
json=payload,
|
|
timeout=self.timeout,
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
patents = data.get("patents", [])
|
|
|
|
for rank, patent in enumerate(patents, 1):
|
|
if rank > max_results:
|
|
break
|
|
|
|
patent_number = patent.get("patent_number", "")
|
|
url = f"https://patents.google.com/patent/US{patent_number}" if patent_number else ""
|
|
|
|
# Get primary assignee
|
|
assignees = patent.get("assignees", [])
|
|
org = assignees[0].get("assignee_organization", "") if assignees else ""
|
|
|
|
results.append(SearchResult(
|
|
title=patent.get("patent_title", ""),
|
|
url=url,
|
|
snippet=patent.get("patent_abstract", "")[:500] if patent.get("patent_abstract") else "",
|
|
source=self.name,
|
|
source_type=self.source_type,
|
|
rank=rank,
|
|
published_date=patent.get("patent_date"),
|
|
organization=org,
|
|
patent_number=patent_number,
|
|
trl_estimate=5, # Patents typically represent mid-TRL tech
|
|
raw_data=patent
|
|
))
|
|
|
|
logger.info(f"Patent search for '{query}' returned {len(results)} results")
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"USPTO API request failed: {e}")
|
|
except Exception as e:
|
|
logger.error(f"Patent search error: {e}")
|
|
|
|
return results
|
|
|
|
def search_defense_contractors(self, query: str, max_results: int = 20) -> List[SearchResult]:
|
|
"""Search patents from major defense contractors."""
|
|
contractors = [
|
|
"Lockheed Martin",
|
|
"Raytheon",
|
|
"Northrop Grumman",
|
|
"Boeing",
|
|
"General Dynamics",
|
|
"L3Harris",
|
|
"BAE Systems"
|
|
]
|
|
|
|
all_results = []
|
|
per_contractor = max(2, max_results // len(contractors))
|
|
|
|
for contractor in contractors:
|
|
results = self.search(
|
|
query,
|
|
max_results=per_contractor,
|
|
assignee=contractor
|
|
)
|
|
all_results.extend(results)
|
|
|
|
# Sort by date and limit
|
|
all_results.sort(key=lambda x: x.published_date or "", reverse=True)
|
|
return all_results[:max_results]
|
|
|
|
def get_patent_details(self, patent_number: str) -> Optional[Dict[str, Any]]:
|
|
"""Get detailed information about a specific patent."""
|
|
payload = {
|
|
"q": {"patent_number": patent_number},
|
|
"f": [
|
|
"patent_number",
|
|
"patent_title",
|
|
"patent_abstract",
|
|
"patent_date",
|
|
"patent_type",
|
|
"assignee_organization",
|
|
"assignee_type",
|
|
"inventor_first_name",
|
|
"inventor_last_name",
|
|
"inventor_city",
|
|
"inventor_state",
|
|
"cpc_section_id",
|
|
"cpc_subsection_title"
|
|
]
|
|
}
|
|
|
|
try:
|
|
response = requests.post(
|
|
self.BASE_URL,
|
|
json=payload,
|
|
timeout=self.timeout,
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
patents = data.get("patents", [])
|
|
return patents[0] if patents else None
|
|
except Exception as e:
|
|
logger.error(f"Failed to get patent details: {e}")
|
|
return None
|