TechScout/api_server - llama3-1-8b.py

258 lines
9.1 KiB
Python
Raw Normal View History

2026-01-22 13:02:09 -05:00
"""
TechScout API Server
Simple HTTP server that the dashboard calls for discovery/deep-dive operations.
"""
import json
import logging
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.parse import parse_qs, urlparse
import threading
from techscout.pipeline.discovery import DiscoveryPipeline
from techscout.pipeline.deep_dive import DeepDivePipeline
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Global pipelines (initialized once)
discovery_pipeline = None
deepdive_pipeline = None
def get_discovery_pipeline():
global discovery_pipeline
if discovery_pipeline is None:
logger.info("Initializing discovery pipeline...")
discovery_pipeline = DiscoveryPipeline(model='llama3:8b')
return discovery_pipeline
def get_deepdive_pipeline():
global deepdive_pipeline
if deepdive_pipeline is None:
logger.info("Initializing deep dive pipeline...")
deepdive_pipeline = DeepDivePipeline(model='llama3:8b')
return deepdive_pipeline
class TechScoutHandler(BaseHTTPRequestHandler):
def _set_cors_headers(self):
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS')
self.send_header('Access-Control-Allow-Headers', 'Content-Type')
def do_OPTIONS(self):
self.send_response(200)
self._set_cors_headers()
self.end_headers()
def do_GET(self):
parsed = urlparse(self.path)
if parsed.path == '/api/health':
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"status": "ok"}).encode())
elif parsed.path == '/api/discoveries':
self._list_discoveries()
elif parsed.path.startswith('/api/discoveries/'):
discovery_id = parsed.path.split('/')[-1]
self._get_discovery(discovery_id)
else:
self.send_response(404)
self.end_headers()
def do_POST(self):
parsed = urlparse(self.path)
if parsed.path == '/api/discover':
self._run_discovery()
elif parsed.path == '/api/deepdive':
self._run_deepdive()
else:
self.send_response(404)
self.end_headers()
def _run_discovery(self):
try:
content_length = int(self.headers['Content-Length'])
post_data = self.rfile.read(content_length)
data = json.loads(post_data.decode())
query = data.get('query', '')
if not query:
self.send_response(400)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"error": "Query is required"}).encode())
return
logger.info(f"Starting discovery for: {query[:50]}...")
pipeline = get_discovery_pipeline()
result = pipeline.discover(
capability_gap=query,
max_results=50,
use_llm_scoring=True
)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"result": result.to_dict()}).encode())
logger.info(f"Discovery complete: {len(result.candidates)} candidates")
except Exception as e:
logger.error(f"Discovery failed: {e}")
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"error": str(e)}).encode())
def _run_deepdive(self):
try:
content_length = int(self.headers['Content-Length'])
post_data = self.rfile.read(content_length)
data = json.loads(post_data.decode())
organization = data.get('organization', '')
technology = data.get('technology', '')
gap = data.get('gap', '')
if not organization:
self.send_response(400)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"error": "Organization is required"}).encode())
return
# Validate organization is not "Unknown" or similar placeholder
if organization.lower() in ('unknown', 'n/a', 'none', ''):
self.send_response(400)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"error": "Cannot perform deep dive on unknown organization. Please select a candidate with a known company name."}).encode())
return
logger.info(f"Starting deep dive for: {organization}")
pipeline = get_deepdive_pipeline()
result = pipeline.deep_dive(
organization=organization,
technology_context=technology,
capability_gap=gap
)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"result": result.to_dict()}).encode())
logger.info(f"Deep dive complete for: {organization}")
except Exception as e:
logger.error(f"Deep dive failed: {e}")
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"error": str(e)}).encode())
def _list_discoveries(self):
import os
from pathlib import Path
analyses_dir = Path(__file__).parent / "analyses"
try:
discoveries = []
if analyses_dir.exists():
for f in sorted(analyses_dir.glob("discovery_*.json"), reverse=True):
with open(f) as fp:
data = json.load(fp)
discoveries.append({
"id": data.get("id"),
"capability_gap": data.get("capability_gap"),
"timestamp": data.get("timestamp"),
"candidate_count": len(data.get("candidates", [])),
"top_score": data["candidates"][0]["score"] if data.get("candidates") else 0
})
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"discoveries": discoveries}).encode())
except Exception as e:
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"error": str(e)}).encode())
def _get_discovery(self, discovery_id):
from pathlib import Path
analyses_dir = Path(__file__).parent / "analyses"
file_path = analyses_dir / f"discovery_{discovery_id}.json"
try:
if not file_path.exists():
self.send_response(404)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"error": "Not found"}).encode())
return
with open(file_path) as f:
data = json.load(f)
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps(data).encode())
except Exception as e:
self.send_response(500)
self.send_header('Content-Type', 'application/json')
self._set_cors_headers()
self.end_headers()
self.wfile.write(json.dumps({"error": str(e)}).encode())
def log_message(self, format, *args):
logger.info("%s - %s" % (self.client_address[0], format % args))
def run_server(port=8000):
server = HTTPServer(('localhost', port), TechScoutHandler)
logger.info(f"TechScout API Server running on http://localhost:{port}")
logger.info("Endpoints:")
logger.info(" POST /api/discover - Run technology discovery")
logger.info(" POST /api/deepdive - Run deep dive analysis")
logger.info(" GET /api/discoveries - List saved discoveries")
logger.info(" GET /api/discoveries/<id> - Get specific discovery")
server.serve_forever()
if __name__ == '__main__':
run_server(port=8000)