"""FastAPI MCP-compatible server exposing ContextForge tools. The server uses a FastAPI lifespan to construct the heavy components once (`ContextRegistry`, `ContextCompressor`, `CompressionCoordinator`, `MetricsCollector`, `VLLMClient`) and stores them on `app.state`. Endpoints read these via the dependency-getter functions defined below; tests override the same getters via `app.dependency_overrides` so endpoint logic runs against fakes without ever entering the lifespan. Important contracts: - /health returns the metrics-supplied GPU label, never the request body. - Endpoints log only metadata (agent_id, lengths) — never the raw context — so request payloads cannot leak via stdout/stderr. """ from __future__ import annotations import asyncio import logging from contextlib import asynccontextmanager from typing import Any, AsyncIterator from fastapi import Depends, FastAPI, Request from fastapi.responses import JSONResponse from apohara_context_forge.config import settings from apohara_context_forge.compression.compressor import ContextCompressor from apohara_context_forge.compression.coordinator import CompressionCoordinator from apohara_context_forge.metrics.collector import MetricsCollector from apohara_context_forge.models import ( CompressionDecision, ContextEntry, ContextMatch, ContextRegistration, Degradation, MetricsSnapshot, OptimizedContextRequest, ) from apohara_context_forge.registry.context_registry import ContextRegistry from apohara_context_forge.serving.vllm_client import VLLMClient logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Lifespan — constructs heavy components once and tears them down on shutdown. # --------------------------------------------------------------------------- @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncIterator[None]: """Build app.state.* once; release resources on shutdown. Tests bypass the production heavy path either by NOT entering the `with TestClient(app) as client:` context (so this lifespan never fires) or by monkeypatching the constructor classes referenced by name on this module before entering the context. """ app.state.registry = ContextRegistry() app.state.compressor = ContextCompressor() app.state.coordinator = CompressionCoordinator( registry=app.state.registry, compressor=app.state.compressor, ) app.state.metrics = MetricsCollector() app.state.vllm = VLLMClient() logger.info( "ContextForge started on %s:%s (vLLM %s, model %s)", settings.contextforge_host, settings.contextforge_port, settings.vllm_base_url, settings.vllm_model, ) try: yield finally: # Best-effort teardown — never let cleanup errors mask the original # request error during shutdown. clear = getattr(app.state.registry, "clear", None) if clear is not None: try: await clear() except Exception as exc: logger.warning("registry.clear() failed: %s", exc) aclose = getattr(app.state.vllm, "aclose", None) if aclose is not None: try: await aclose() except Exception as exc: logger.warning("vllm.aclose() failed: %s", exc) app = FastAPI(title="ContextForge", version="0.1.0", lifespan=lifespan) # Module-level globals kept for callers that import the server outside a # lifespan-managed TestClient (e.g. ad-hoc REPL probes). Endpoints prefer # `request.app.state.*` via the dependency getters below. registry = ContextRegistry() metrics = MetricsCollector() compressor: ContextCompressor | None = None coordinator: CompressionCoordinator | None = None # --------------------------------------------------------------------------- # Dependency getters — keys for app.dependency_overrides in tests. # --------------------------------------------------------------------------- def get_registry(request: Request) -> ContextRegistry: return getattr(request.app.state, "registry", registry) def get_metrics(request: Request) -> MetricsCollector: return getattr(request.app.state, "metrics", metrics) def get_compressor(request: Request) -> Any: return getattr(request.app.state, "compressor", compressor) def get_coordinator(request: Request) -> Any: return getattr(request.app.state, "coordinator", coordinator) # --------------------------------------------------------------------------- # /health — never raises. Reports {"status": "ok"|"degraded", "gpu":