TechScout/techscout/sources/patents.py

204 lines
6.1 KiB
Python

"""
USPTO Patent Search
Uses the USPTO PatentsView API for patent searches.
Free, no API key required.
API Documentation: https://patentsview.org/apis/api-endpoints
"""
import logging
import requests
from typing import List, Optional, Dict, Any
from datetime import datetime
from ..search.base import BaseSearcher, SearchResult
logger = logging.getLogger(__name__)
class PatentSearcher(BaseSearcher):
"""
Search USPTO patent database via PatentsView API.
Patents indicate active R&D and can reveal technical approaches
to solving capability gaps.
"""
BASE_URL = "https://api.patentsview.org/patents/query"
def __init__(self, timeout: int = 30):
self.timeout = timeout
@property
def name(self) -> str:
return "USPTO Patents"
@property
def source_type(self) -> str:
return "patent"
def search(
self,
query: str,
max_results: int = 20,
year_start: Optional[int] = None,
assignee: Optional[str] = None
) -> List[SearchResult]:
"""
Search USPTO patents.
Args:
query: Text search (searches title, abstract, claims)
max_results: Maximum results
year_start: Only patents from this year forward
assignee: Filter by assignee/company name
Returns:
List of SearchResult objects
"""
# Build query filter
query_parts = [{"_text_any": {"patent_abstract": query}}]
if year_start:
query_parts.append({
"_gte": {"patent_date": f"{year_start}-01-01"}
})
if assignee:
query_parts.append({
"_text_any": {"assignee_organization": assignee}
})
# Combine with AND
if len(query_parts) > 1:
query_filter = {"_and": query_parts}
else:
query_filter = query_parts[0]
payload = {
"q": query_filter,
"f": [
"patent_number",
"patent_title",
"patent_abstract",
"patent_date",
"assignee_organization",
"inventor_first_name",
"inventor_last_name"
],
"o": {"per_page": min(max_results, 100)},
"s": [{"patent_date": "desc"}] # Most recent first
}
results = []
try:
response = requests.post(
self.BASE_URL,
json=payload,
timeout=self.timeout,
headers={"Content-Type": "application/json"}
)
response.raise_for_status()
data = response.json()
patents = data.get("patents", [])
for rank, patent in enumerate(patents, 1):
if rank > max_results:
break
patent_number = patent.get("patent_number", "")
url = f"https://patents.google.com/patent/US{patent_number}" if patent_number else ""
# Get primary assignee
assignees = patent.get("assignees", [])
org = assignees[0].get("assignee_organization", "") if assignees else ""
results.append(SearchResult(
title=patent.get("patent_title", ""),
url=url,
snippet=patent.get("patent_abstract", "")[:500] if patent.get("patent_abstract") else "",
source=self.name,
source_type=self.source_type,
rank=rank,
published_date=patent.get("patent_date"),
organization=org,
patent_number=patent_number,
trl_estimate=5, # Patents typically represent mid-TRL tech
raw_data=patent
))
logger.info(f"Patent search for '{query}' returned {len(results)} results")
except requests.exceptions.RequestException as e:
logger.error(f"USPTO API request failed: {e}")
except Exception as e:
logger.error(f"Patent search error: {e}")
return results
def search_defense_contractors(self, query: str, max_results: int = 20) -> List[SearchResult]:
"""Search patents from major defense contractors."""
contractors = [
"Lockheed Martin",
"Raytheon",
"Northrop Grumman",
"Boeing",
"General Dynamics",
"L3Harris",
"BAE Systems"
]
all_results = []
per_contractor = max(2, max_results // len(contractors))
for contractor in contractors:
results = self.search(
query,
max_results=per_contractor,
assignee=contractor
)
all_results.extend(results)
# Sort by date and limit
all_results.sort(key=lambda x: x.published_date or "", reverse=True)
return all_results[:max_results]
def get_patent_details(self, patent_number: str) -> Optional[Dict[str, Any]]:
"""Get detailed information about a specific patent."""
payload = {
"q": {"patent_number": patent_number},
"f": [
"patent_number",
"patent_title",
"patent_abstract",
"patent_date",
"patent_type",
"assignee_organization",
"assignee_type",
"inventor_first_name",
"inventor_last_name",
"inventor_city",
"inventor_state",
"cpc_section_id",
"cpc_subsection_title"
]
}
try:
response = requests.post(
self.BASE_URL,
json=payload,
timeout=self.timeout,
headers={"Content-Type": "application/json"}
)
response.raise_for_status()
data = response.json()
patents = data.get("patents", [])
return patents[0] if patents else None
except Exception as e:
logger.error(f"Failed to get patent details: {e}")
return None