""" TechScout API Server Simple HTTP server that the dashboard calls for discovery/deep-dive/match operations. """ import json import logging from http.server import HTTPServer, BaseHTTPRequestHandler from urllib.parse import parse_qs, urlparse import threading from techscout.pipeline.discovery import DiscoveryPipeline from techscout.pipeline.deep_dive import DeepDivePipeline from techscout.pipeline.capability_matcher import CapabilityMatcherPipeline logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Global pipelines (initialized once) discovery_pipeline = None deepdive_pipeline = None matcher_pipeline = None def get_discovery_pipeline(): global discovery_pipeline if discovery_pipeline is None: logger.info("Initializing discovery pipeline...") discovery_pipeline = DiscoveryPipeline(model='mistral-nemo:12b') return discovery_pipeline def get_deepdive_pipeline(): global deepdive_pipeline if deepdive_pipeline is None: logger.info("Initializing deep dive pipeline...") deepdive_pipeline = DeepDivePipeline(model='mistral-nemo:12b') return deepdive_pipeline def get_matcher_pipeline(): global matcher_pipeline if matcher_pipeline is None: logger.info("Initializing capability matcher pipeline...") try: matcher_pipeline = CapabilityMatcherPipeline(model='mistral-nemo:12b') logger.info("Capability matcher pipeline initialized successfully") except Exception as e: logger.error(f"Failed to initialize matcher pipeline: {e}") import traceback traceback.print_exc() raise return matcher_pipeline class TechScoutHandler(BaseHTTPRequestHandler): def _set_cors_headers(self): self.send_header('Access-Control-Allow-Origin', '*') self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS') self.send_header('Access-Control-Allow-Headers', 'Content-Type') def do_OPTIONS(self): print(f"[DEBUG] do_OPTIONS: path={self.path}", flush=True) self.send_response(200) self._set_cors_headers() self.end_headers() def do_GET(self): parsed = urlparse(self.path) if parsed.path == '/api/health': self.send_response(200) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"status": "ok"}).encode()) elif parsed.path == '/api/discoveries': self._list_discoveries() elif parsed.path.startswith('/api/discoveries/'): discovery_id = parsed.path.split('/')[-1] self._get_discovery(discovery_id) else: self.send_response(404) self.end_headers() def do_POST(self): parsed = urlparse(self.path) print(f"[DEBUG] do_POST: path={parsed.path}", flush=True) if parsed.path == '/api/discover': self._run_discovery() elif parsed.path == '/api/deepdive': self._run_deepdive() elif parsed.path == '/api/match': self._run_match() elif parsed.path == '/api/match/search': self._run_match_search() elif parsed.path == '/api/match/extract': self._run_match_extract() elif parsed.path == '/api/match/evaluate': self._run_match_evaluate() else: self.send_response(404) self.end_headers() def _run_discovery(self): try: content_length = int(self.headers['Content-Length']) post_data = self.rfile.read(content_length) data = json.loads(post_data.decode()) query = data.get('query', '') if not query: self.send_response(400) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": "Query is required"}).encode()) return logger.info(f"Starting discovery for: {query[:50]}...") pipeline = get_discovery_pipeline() result = pipeline.discover( capability_gap=query, max_results=50, use_llm_scoring=True ) self.send_response(200) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"result": result.to_dict()}).encode()) logger.info(f"Discovery complete: {len(result.candidates)} candidates") except Exception as e: logger.error(f"Discovery failed: {e}") self.send_response(500) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": str(e)}).encode()) def _run_deepdive(self): try: content_length = int(self.headers['Content-Length']) post_data = self.rfile.read(content_length) data = json.loads(post_data.decode()) organization = data.get('organization', '') technology = data.get('technology', '') gap = data.get('gap', '') if not organization: self.send_response(400) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": "Organization is required"}).encode()) return # Validate organization is not "Unknown" or similar placeholder if organization.lower() in ('unknown', 'n/a', 'none', ''): self.send_response(400) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": "Cannot perform deep dive on unknown organization. Please select a candidate with a known company name."}).encode()) return logger.info(f"Starting deep dive for: {organization}") pipeline = get_deepdive_pipeline() result = pipeline.deep_dive( organization=organization, technology_context=technology, capability_gap=gap ) self.send_response(200) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"result": result.to_dict()}).encode()) logger.info(f"Deep dive complete for: {organization}") except Exception as e: logger.error(f"Deep dive failed: {e}") self.send_response(500) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": str(e)}).encode()) def _run_match(self): """Run capability-to-technology matching (new pipeline).""" try: content_length = int(self.headers['Content-Length']) post_data = self.rfile.read(content_length) data = json.loads(post_data.decode()) query = data.get('query', '') if not query: self.send_response(400) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": "Query is required"}).encode()) return # Optional parameters max_technologies = data.get('max_technologies', 15) min_fit_score = data.get('min_fit_score', 25) sources = data.get('sources', None) logger.info(f"Starting capability match for: {query[:50]}...") pipeline = get_matcher_pipeline() result = pipeline.match( user_input=query, max_technologies=max_technologies, min_fit_score=min_fit_score, sources=sources ) self.send_response(200) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"result": result.to_dict()}).encode()) logger.info(f"Match complete: {len(result.technologies)} technologies found") except Exception as e: logger.error(f"Match failed: {e}") import traceback traceback.print_exc() self.send_response(500) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": str(e)}).encode()) def _run_match_search(self): """Step 1: Parse capability and search sources.""" print("[DEBUG] _run_match_search: Handler started", flush=True) logger.info("_run_match_search: Handler started") try: content_length = int(self.headers['Content-Length']) print(f"[DEBUG] _run_match_search: Reading {content_length} bytes", flush=True) logger.info(f"_run_match_search: Reading {content_length} bytes") post_data = self.rfile.read(content_length) print("[DEBUG] _run_match_search: Parsing JSON", flush=True) logger.info(f"_run_match_search: Parsing JSON") data = json.loads(post_data.decode()) print(f"[DEBUG] _run_match_search: Got data: {list(data.keys())}", flush=True) logger.info(f"_run_match_search: Got data: {list(data.keys())}") query = data.get('query', '') if not query: self.send_response(400) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": "Query is required"}).encode()) return sources = data.get('sources', None) logger.info(f"Step 1 - Search: {query[:50]}...") logger.info("Getting matcher pipeline...") pipeline = get_matcher_pipeline() logger.info("Got pipeline, calling step_search...") try: result = pipeline.step_search( user_input=query, sources=sources ) logger.info(f"step_search returned: success={result.success}") except Exception as e: logger.error(f"step_search raised exception: {e}") import traceback traceback.print_exc() raise self.send_response(200) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"result": result.to_dict()}).encode()) logger.info(f"Step 1 complete: {len(result.search_results)} results found") except Exception as e: logger.error(f"Match search failed: {e}") import traceback traceback.print_exc() self.send_response(500) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": str(e)}).encode()) def _run_match_extract(self): """Step 2: Extract technologies from search results.""" try: content_length = int(self.headers['Content-Length']) post_data = self.rfile.read(content_length) data = json.loads(post_data.decode()) search_id = data.get('search_id', '') if not search_id: self.send_response(400) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": "search_id is required"}).encode()) return logger.info(f"Step 2 - Extract: search_id={search_id}") pipeline = get_matcher_pipeline() result = pipeline.step_extract(search_id=search_id) self.send_response(200) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"result": result.to_dict()}).encode()) logger.info(f"Step 2 complete: {len(result.technologies)} technologies extracted") except Exception as e: logger.error(f"Match extract failed: {e}") import traceback traceback.print_exc() self.send_response(500) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": str(e)}).encode()) def _run_match_evaluate(self): """Step 3: Evaluate selected technologies.""" try: content_length = int(self.headers['Content-Length']) post_data = self.rfile.read(content_length) data = json.loads(post_data.decode()) extraction_id = data.get('extraction_id', '') technology_ids = data.get('technology_ids', []) if not extraction_id: self.send_response(400) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": "extraction_id is required"}).encode()) return if not technology_ids: self.send_response(400) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": "technology_ids is required"}).encode()) return logger.info(f"Step 3 - Evaluate: {len(technology_ids)} technologies") pipeline = get_matcher_pipeline() result = pipeline.step_evaluate( extraction_id=extraction_id, technology_ids=technology_ids ) self.send_response(200) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"result": result.to_dict()}).encode()) logger.info(f"Step 3 complete: {len(result.technologies)} technologies evaluated") except Exception as e: logger.error(f"Match evaluate failed: {e}") import traceback traceback.print_exc() self.send_response(500) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": str(e)}).encode()) def _list_discoveries(self): import os from pathlib import Path analyses_dir = Path(__file__).parent / "analyses" try: discoveries = [] if analyses_dir.exists(): for f in sorted(analyses_dir.glob("discovery_*.json"), reverse=True): with open(f) as fp: data = json.load(fp) discoveries.append({ "id": data.get("id"), "capability_gap": data.get("capability_gap"), "timestamp": data.get("timestamp"), "candidate_count": len(data.get("candidates", [])), "top_score": data["candidates"][0]["score"] if data.get("candidates") else 0 }) self.send_response(200) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"discoveries": discoveries}).encode()) except Exception as e: self.send_response(500) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": str(e)}).encode()) def _get_discovery(self, discovery_id): from pathlib import Path analyses_dir = Path(__file__).parent / "analyses" file_path = analyses_dir / f"discovery_{discovery_id}.json" try: if not file_path.exists(): self.send_response(404) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": "Not found"}).encode()) return with open(file_path) as f: data = json.load(f) self.send_response(200) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps(data).encode()) except Exception as e: self.send_response(500) self.send_header('Content-Type', 'application/json') self._set_cors_headers() self.end_headers() self.wfile.write(json.dumps({"error": str(e)}).encode()) def log_message(self, format, *args): logger.info("%s - %s" % (self.client_address[0], format % args)) def run_server(port=8000): server = HTTPServer(('localhost', port), TechScoutHandler) logger.info(f"TechScout API Server running on http://localhost:{port}") logger.info("Endpoints:") logger.info(" POST /api/discover - Run technology discovery (legacy)") logger.info(" POST /api/deepdive - Run deep dive analysis") logger.info("") logger.info(" Step-based Capability Matching (Guided Workflow):") logger.info(" POST /api/match/search - Step 1: Parse & Search") logger.info(" POST /api/match/extract - Step 2: Extract Technologies") logger.info(" POST /api/match/evaluate - Step 3: Evaluate Selected") logger.info("") logger.info(" POST /api/match - Run full matching pipeline (may timeout)") logger.info(" GET /api/discoveries - List saved discoveries") logger.info(" GET /api/discoveries/ - Get specific discovery") server.serve_forever() if __name__ == '__main__': run_server(port=8000)