groundlens-api / app.py
AI-that-works's picture
Upload 4 files
6d74c84 verified
"""
groundlens REST API
Lightweight HTTP wrapper around the groundlens library.
Deploy on Hugging Face Spaces (Docker SDK), Railway, Fly.io, or any container host.
Endpoints:
POST /v1/check β€” auto-selects SGI or DGI based on whether context is provided
POST /v1/sgi β€” explicit context-based grounding check
POST /v1/dgi β€” explicit context-free grounding check
GET /health β€” liveness + model status
"""
from __future__ import annotations
import time
from contextlib import asynccontextmanager
from typing import Optional
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field, ConfigDict
# ─────────────────────────────────────────────────────────────────────────────
# Model preloading
# ─────────────────────────────────────────────────────────────────────────────
_model_ready = False
_model_load_time: float = 0.0
def _load_model() -> None:
"""Import groundlens to trigger model download + warm the embedding cache."""
global _model_ready, _model_load_time
if _model_ready:
return
t0 = time.monotonic()
from groundlens import compute_dgi # noqa: F401
# Warm up β€” first call loads the sentence-transformer model
compute_dgi(question="warmup", response="warmup")
_model_load_time = round(time.monotonic() - t0, 2)
_model_ready = True
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Load model at startup so first request is fast."""
_load_model()
yield
# ─────────────────────────────────────────────────────────────────────────────
# App
# ─────────────────────────────────────────────────────────────────────────────
app = FastAPI(
title="groundlens API",
description=(
"LLM hallucination detection using embedding geometry. "
"No second LLM. Deterministic. Same inputs β†’ same scores."
),
version="2026.5.12",
docs_url="/docs",
redoc_url="/redoc",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=False,
allow_methods=["GET", "POST", "OPTIONS"],
allow_headers=["*"],
)
# ─────────────────────────────────────────────────────────────────────────────
# Request / Response models
# ─────────────────────────────────────────────────────────────────────────────
class CheckRequest(BaseModel):
"""Auto-select SGI or DGI based on whether context is provided."""
model_config = ConfigDict(str_strip_whitespace=True)
question: str = Field(
...,
description="The question asked to the LLM",
min_length=1,
max_length=10_000,
)
response: str = Field(
...,
description="The LLM's response to evaluate",
min_length=1,
max_length=50_000,
)
context: Optional[str] = Field(
default=None,
description=(
"Source material (document, RAG chunks, reference text). "
"If provided β†’ SGI. If omitted β†’ DGI."
),
max_length=100_000,
)
class SGIRequest(BaseModel):
"""Explicit context-based grounding check."""
model_config = ConfigDict(str_strip_whitespace=True)
question: str = Field(..., min_length=1, max_length=10_000)
context: str = Field(..., min_length=1, max_length=100_000)
response: str = Field(..., min_length=1, max_length=50_000)
class DGIRequest(BaseModel):
"""Explicit context-free grounding check."""
model_config = ConfigDict(str_strip_whitespace=True)
question: str = Field(..., min_length=1, max_length=10_000)
response: str = Field(..., min_length=1, max_length=50_000)
class SGIDetail(BaseModel):
q_dist: float
ctx_dist: float
interpretation: str
class DGIDetail(BaseModel):
interpretation: str
class GroundingResult(BaseModel):
verdict: str = Field(description="GROUNDED or HALLUCINATION RISK")
flagged: bool = Field(description="True if hallucination risk detected")
method: str = Field(description="SGI or DGI")
score: float = Field(description="Grounding score")
threshold: float = Field(description="Score threshold for flagging")
explanation: str = Field(description="Plain-language explanation")
detail: SGIDetail | DGIDetail
latency_ms: int = Field(description="Processing time in milliseconds")
class HealthResponse(BaseModel):
status: str
model_loaded: bool
model_load_time_s: float
version: str
# ─────────────────────────────────────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────────────────────────────────────
def _run_sgi(question: str, context: str, response: str) -> GroundingResult:
from groundlens import compute_sgi
t0 = time.monotonic()
result = compute_sgi(question=question, context=context, response=response)
latency = int((time.monotonic() - t0) * 1000)
return GroundingResult(
verdict="GROUNDED" if not result.flagged else "HALLUCINATION RISK",
flagged=result.flagged,
method="SGI (Semantic Grounding Index)",
score=round(result.value, 4),
threshold=0.95,
explanation=(
"The response appears grounded in the source material."
if not result.flagged
else "The response may not be based on the source material provided."
),
detail=SGIDetail(
q_dist=round(result.q_dist, 4),
ctx_dist=round(result.ctx_dist, 4),
interpretation=result.explanation,
),
latency_ms=latency,
)
def _run_dgi(question: str, response: str) -> GroundingResult:
from groundlens import compute_dgi
t0 = time.monotonic()
result = compute_dgi(question=question, response=response)
latency = int((time.monotonic() - t0) * 1000)
return GroundingResult(
verdict="GROUNDED" if not result.flagged else "HALLUCINATION RISK",
flagged=result.flagged,
method="DGI (Directional Grounding Index)",
score=round(result.value, 4),
threshold=0.30,
explanation=(
"The response follows patterns typical of grounded answers."
if not result.flagged
else "The response shows geometric patterns associated with hallucination."
),
detail=DGIDetail(
interpretation=result.explanation,
),
latency_ms=latency,
)
# ─────────────────────────────────────────────────────────────────────────────
# Endpoints
# ─────────────────────────────────────────────────────────────────────────────
@app.get("/health", response_model=HealthResponse, tags=["system"])
async def health():
"""Liveness check. Returns model load status."""
return HealthResponse(
status="ok" if _model_ready else "loading",
model_loaded=_model_ready,
model_load_time_s=_model_load_time,
version="2026.5.12",
)
@app.post("/v1/check", response_model=GroundingResult, tags=["grounding"])
async def check(req: CheckRequest):
"""Check whether an LLM response is hallucinated.
Auto-selects the right method:
- Context provided β†’ SGI (checks if the response used the source material)
- No context β†’ DGI (checks geometric grounding patterns)
"""
if not _model_ready:
raise HTTPException(503, "Model is still loading. Try again in a few seconds.")
has_context = req.context is not None and req.context.strip() != ""
if has_context:
return _run_sgi(req.question, req.context, req.response)
else:
return _run_dgi(req.question, req.response)
@app.post("/v1/sgi", response_model=GroundingResult, tags=["grounding"])
async def sgi(req: SGIRequest):
"""SGI β€” check if the response is grounded in a source document.
Use for RAG pipelines, document Q&A, or any case where you have
the source material the LLM was given.
"""
if not _model_ready:
raise HTTPException(503, "Model is still loading. Try again in a few seconds.")
return _run_sgi(req.question, req.context, req.response)
@app.post("/v1/dgi", response_model=GroundingResult, tags=["grounding"])
async def dgi(req: DGIRequest):
"""DGI β€” check grounding patterns without source context.
Use for open-ended chat, general Q&A, or any case where you just
have a question and the LLM's answer.
"""
if not _model_ready:
raise HTTPException(503, "Model is still loading. Try again in a few seconds.")
return _run_dgi(req.question, req.response)