""" groundlens REST API Lightweight HTTP wrapper around the groundlens library. Deploy on Hugging Face Spaces (Docker SDK), Railway, Fly.io, or any container host. Endpoints: POST /v1/check — auto-selects SGI or DGI based on whether context is provided POST /v1/sgi — explicit context-based grounding check POST /v1/dgi — explicit context-free grounding check GET /health — liveness + model status """ from __future__ import annotations import time from contextlib import asynccontextmanager from typing import Optional from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, Field, ConfigDict # ───────────────────────────────────────────────────────────────────────────── # Model preloading # ───────────────────────────────────────────────────────────────────────────── _model_ready = False _model_load_time: float = 0.0 def _load_model() -> None: """Import groundlens to trigger model download + warm the embedding cache.""" global _model_ready, _model_load_time if _model_ready: return t0 = time.monotonic() from groundlens import compute_dgi # noqa: F401 # Warm up — first call loads the sentence-transformer model compute_dgi(question="warmup", response="warmup") _model_load_time = round(time.monotonic() - t0, 2) _model_ready = True @asynccontextmanager async def lifespan(app: FastAPI): """Load model at startup so first request is fast.""" _load_model() yield # ───────────────────────────────────────────────────────────────────────────── # App # ───────────────────────────────────────────────────────────────────────────── app = FastAPI( title="groundlens API", description=( "LLM hallucination detection using embedding geometry. " "No second LLM. Deterministic. Same inputs → same scores." ), version="2026.5.12", docs_url="/docs", redoc_url="/redoc", lifespan=lifespan, ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=False, allow_methods=["GET", "POST", "OPTIONS"], allow_headers=["*"], ) # ───────────────────────────────────────────────────────────────────────────── # Request / Response models # ───────────────────────────────────────────────────────────────────────────── class CheckRequest(BaseModel): """Auto-select SGI or DGI based on whether context is provided.""" model_config = ConfigDict(str_strip_whitespace=True) question: str = Field( ..., description="The question asked to the LLM", min_length=1, max_length=10_000, ) response: str = Field( ..., description="The LLM's response to evaluate", min_length=1, max_length=50_000, ) context: Optional[str] = Field( default=None, description=( "Source material (document, RAG chunks, reference text). " "If provided → SGI. If omitted → DGI." ), max_length=100_000, ) class SGIRequest(BaseModel): """Explicit context-based grounding check.""" model_config = ConfigDict(str_strip_whitespace=True) question: str = Field(..., min_length=1, max_length=10_000) context: str = Field(..., min_length=1, max_length=100_000) response: str = Field(..., min_length=1, max_length=50_000) class DGIRequest(BaseModel): """Explicit context-free grounding check.""" model_config = ConfigDict(str_strip_whitespace=True) question: str = Field(..., min_length=1, max_length=10_000) response: str = Field(..., min_length=1, max_length=50_000) class SGIDetail(BaseModel): q_dist: float ctx_dist: float interpretation: str class DGIDetail(BaseModel): interpretation: str class GroundingResult(BaseModel): verdict: str = Field(description="GROUNDED or HALLUCINATION RISK") flagged: bool = Field(description="True if hallucination risk detected") method: str = Field(description="SGI or DGI") score: float = Field(description="Grounding score") threshold: float = Field(description="Score threshold for flagging") explanation: str = Field(description="Plain-language explanation") detail: SGIDetail | DGIDetail latency_ms: int = Field(description="Processing time in milliseconds") class HealthResponse(BaseModel): status: str model_loaded: bool model_load_time_s: float version: str # ───────────────────────────────────────────────────────────────────────────── # Helpers # ───────────────────────────────────────────────────────────────────────────── def _run_sgi(question: str, context: str, response: str) -> GroundingResult: from groundlens import compute_sgi t0 = time.monotonic() result = compute_sgi(question=question, context=context, response=response) latency = int((time.monotonic() - t0) * 1000) return GroundingResult( verdict="GROUNDED" if not result.flagged else "HALLUCINATION RISK", flagged=result.flagged, method="SGI (Semantic Grounding Index)", score=round(result.value, 4), threshold=0.95, explanation=( "The response appears grounded in the source material." if not result.flagged else "The response may not be based on the source material provided." ), detail=SGIDetail( q_dist=round(result.q_dist, 4), ctx_dist=round(result.ctx_dist, 4), interpretation=result.explanation, ), latency_ms=latency, ) def _run_dgi(question: str, response: str) -> GroundingResult: from groundlens import compute_dgi t0 = time.monotonic() result = compute_dgi(question=question, response=response) latency = int((time.monotonic() - t0) * 1000) return GroundingResult( verdict="GROUNDED" if not result.flagged else "HALLUCINATION RISK", flagged=result.flagged, method="DGI (Directional Grounding Index)", score=round(result.value, 4), threshold=0.30, explanation=( "The response follows patterns typical of grounded answers." if not result.flagged else "The response shows geometric patterns associated with hallucination." ), detail=DGIDetail( interpretation=result.explanation, ), latency_ms=latency, ) # ───────────────────────────────────────────────────────────────────────────── # Endpoints # ───────────────────────────────────────────────────────────────────────────── @app.get("/health", response_model=HealthResponse, tags=["system"]) async def health(): """Liveness check. Returns model load status.""" return HealthResponse( status="ok" if _model_ready else "loading", model_loaded=_model_ready, model_load_time_s=_model_load_time, version="2026.5.12", ) @app.post("/v1/check", response_model=GroundingResult, tags=["grounding"]) async def check(req: CheckRequest): """Check whether an LLM response is hallucinated. Auto-selects the right method: - Context provided → SGI (checks if the response used the source material) - No context → DGI (checks geometric grounding patterns) """ if not _model_ready: raise HTTPException(503, "Model is still loading. Try again in a few seconds.") has_context = req.context is not None and req.context.strip() != "" if has_context: return _run_sgi(req.question, req.context, req.response) else: return _run_dgi(req.question, req.response) @app.post("/v1/sgi", response_model=GroundingResult, tags=["grounding"]) async def sgi(req: SGIRequest): """SGI — check if the response is grounded in a source document. Use for RAG pipelines, document Q&A, or any case where you have the source material the LLM was given. """ if not _model_ready: raise HTTPException(503, "Model is still loading. Try again in a few seconds.") return _run_sgi(req.question, req.context, req.response) @app.post("/v1/dgi", response_model=GroundingResult, tags=["grounding"]) async def dgi(req: DGIRequest): """DGI — check grounding patterns without source context. Use for open-ended chat, general Q&A, or any case where you just have a question and the LLM's answer. """ if not _model_ready: raise HTTPException(503, "Model is still loading. Try again in a few seconds.") return _run_dgi(req.question, req.response)