TechScout/techscout/search/base.py

112 lines
3.2 KiB
Python
Raw Normal View History

2026-01-22 13:02:09 -05:00
"""
Base search classes for TechScout.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any
from datetime import datetime
@dataclass
class SearchResult:
"""A single search result from any source."""
title: str
url: str
snippet: str
source: str # Which searcher found this
source_type: str # sbir, patent, contract, news, web
rank: int = 0 # Position in results
# Metadata
published_date: Optional[str] = None
organization: Optional[str] = None # Company/institution name
award_amount: Optional[float] = None # For contracts/grants
trl_estimate: Optional[int] = None # Technology readiness level
# Identifiers
award_id: Optional[str] = None # SBIR award ID, contract number
patent_number: Optional[str] = None
# Raw data for later processing
raw_data: Dict[str, Any] = field(default_factory=dict)
# Scoring (filled in later)
relevance_score: float = 0.0
final_score: float = 0.0
def to_dict(self) -> Dict[str, Any]:
return {
"title": self.title,
"url": self.url,
"snippet": self.snippet,
"source": self.source,
"source_type": self.source_type,
"rank": self.rank,
"published_date": self.published_date,
"organization": self.organization,
"award_amount": self.award_amount,
"trl_estimate": self.trl_estimate,
"award_id": self.award_id,
"patent_number": self.patent_number,
"relevance_score": self.relevance_score,
"final_score": self.final_score,
}
class BaseSearcher(ABC):
"""Abstract base class for all searchers."""
@property
@abstractmethod
def name(self) -> str:
"""Return the name of this searcher."""
pass
@property
@abstractmethod
def source_type(self) -> str:
"""Return the type of source (sbir, patent, contract, news, web)."""
pass
@abstractmethod
def search(self, query: str, max_results: int = 20, **kwargs) -> List[SearchResult]:
"""
Execute a search.
Args:
query: Search query
max_results: Maximum results to return
Returns:
List of SearchResult objects
"""
pass
def search_multiple(
self,
queries: List[str],
max_results_per_query: int = 10
) -> List[SearchResult]:
"""
Execute multiple searches and deduplicate.
Args:
queries: List of search queries
max_results_per_query: Max results per query
Returns:
Deduplicated list of results
"""
all_results = []
seen_urls = set()
for query in queries:
results = self.search(query, max_results=max_results_per_query)
for result in results:
if result.url not in seen_urls:
seen_urls.add(result.url)
all_results.append(result)
return all_results