""" Base search classes for TechScout. """ from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import List, Optional, Dict, Any from datetime import datetime @dataclass class SearchResult: """A single search result from any source.""" title: str url: str snippet: str source: str # Which searcher found this source_type: str # sbir, patent, contract, news, web rank: int = 0 # Position in results # Metadata published_date: Optional[str] = None organization: Optional[str] = None # Company/institution name award_amount: Optional[float] = None # For contracts/grants trl_estimate: Optional[int] = None # Technology readiness level # Identifiers award_id: Optional[str] = None # SBIR award ID, contract number patent_number: Optional[str] = None # Raw data for later processing raw_data: Dict[str, Any] = field(default_factory=dict) # Scoring (filled in later) relevance_score: float = 0.0 final_score: float = 0.0 def to_dict(self) -> Dict[str, Any]: return { "title": self.title, "url": self.url, "snippet": self.snippet, "source": self.source, "source_type": self.source_type, "rank": self.rank, "published_date": self.published_date, "organization": self.organization, "award_amount": self.award_amount, "trl_estimate": self.trl_estimate, "award_id": self.award_id, "patent_number": self.patent_number, "relevance_score": self.relevance_score, "final_score": self.final_score, } class BaseSearcher(ABC): """Abstract base class for all searchers.""" @property @abstractmethod def name(self) -> str: """Return the name of this searcher.""" pass @property @abstractmethod def source_type(self) -> str: """Return the type of source (sbir, patent, contract, news, web).""" pass @abstractmethod def search(self, query: str, max_results: int = 20, **kwargs) -> List[SearchResult]: """ Execute a search. Args: query: Search query max_results: Maximum results to return Returns: List of SearchResult objects """ pass def search_multiple( self, queries: List[str], max_results_per_query: int = 10 ) -> List[SearchResult]: """ Execute multiple searches and deduplicate. Args: queries: List of search queries max_results_per_query: Max results per query Returns: Deduplicated list of results """ all_results = [] seen_urls = set() for query in queries: results = self.search(query, max_results=max_results_per_query) for result in results: if result.url not in seen_urls: seen_urls.add(result.url) all_results.append(result) return all_results