Spaces:

groundlens
/

groundlens-api

Sleeping

File size: 10,112 Bytes

6d74c84

"""
groundlens REST API

Lightweight HTTP wrapper around the groundlens library.
Deploy on Hugging Face Spaces (Docker SDK), Railway, Fly.io, or any container host.

Endpoints:
  POST /v1/check   — auto-selects SGI or DGI based on whether context is provided
  POST /v1/sgi     — explicit context-based grounding check
  POST /v1/dgi     — explicit context-free grounding check
  GET  /health     — liveness + model status
"""

from __future__ import annotations

import time
from contextlib import asynccontextmanager
from typing import Optional

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field, ConfigDict

# ─────────────────────────────────────────────────────────────────────────────
# Model preloading
# ─────────────────────────────────────────────────────────────────────────────

_model_ready = False
_model_load_time: float = 0.0


def _load_model() -> None:
    """Import groundlens to trigger model download + warm the embedding cache."""
    global _model_ready, _model_load_time
    if _model_ready:
        return
    t0 = time.monotonic()
    from groundlens import compute_dgi  # noqa: F401

    # Warm up — first call loads the sentence-transformer model
    compute_dgi(question="warmup", response="warmup")
    _model_load_time = round(time.monotonic() - t0, 2)
    _model_ready = True


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Load model at startup so first request is fast."""
    _load_model()
    yield


# ─────────────────────────────────────────────────────────────────────────────
# App
# ─────────────────────────────────────────────────────────────────────────────

app = FastAPI(
    title="groundlens API",
    description=(
        "LLM hallucination detection using embedding geometry. "
        "No second LLM. Deterministic. Same inputs → same scores."
    ),
    version="2026.5.12",
    docs_url="/docs",
    redoc_url="/redoc",
    lifespan=lifespan,
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=False,
    allow_methods=["GET", "POST", "OPTIONS"],
    allow_headers=["*"],
)


# ─────────────────────────────────────────────────────────────────────────────
# Request / Response models
# ─────────────────────────────────────────────────────────────────────────────

class CheckRequest(BaseModel):
    """Auto-select SGI or DGI based on whether context is provided."""

    model_config = ConfigDict(str_strip_whitespace=True)

    question: str = Field(
        ...,
        description="The question asked to the LLM",
        min_length=1,
        max_length=10_000,
    )
    response: str = Field(
        ...,
        description="The LLM's response to evaluate",
        min_length=1,
        max_length=50_000,
    )
    context: Optional[str] = Field(
        default=None,
        description=(
            "Source material (document, RAG chunks, reference text). "
            "If provided → SGI. If omitted → DGI."
        ),
        max_length=100_000,
    )


class SGIRequest(BaseModel):
    """Explicit context-based grounding check."""

    model_config = ConfigDict(str_strip_whitespace=True)

    question: str = Field(..., min_length=1, max_length=10_000)
    context: str = Field(..., min_length=1, max_length=100_000)
    response: str = Field(..., min_length=1, max_length=50_000)


class DGIRequest(BaseModel):
    """Explicit context-free grounding check."""

    model_config = ConfigDict(str_strip_whitespace=True)

    question: str = Field(..., min_length=1, max_length=10_000)
    response: str = Field(..., min_length=1, max_length=50_000)


class SGIDetail(BaseModel):
    q_dist: float
    ctx_dist: float
    interpretation: str


class DGIDetail(BaseModel):
    interpretation: str


class GroundingResult(BaseModel):
    verdict: str = Field(description="GROUNDED or HALLUCINATION RISK")
    flagged: bool = Field(description="True if hallucination risk detected")
    method: str = Field(description="SGI or DGI")
    score: float = Field(description="Grounding score")
    threshold: float = Field(description="Score threshold for flagging")
    explanation: str = Field(description="Plain-language explanation")
    detail: SGIDetail | DGIDetail
    latency_ms: int = Field(description="Processing time in milliseconds")


class HealthResponse(BaseModel):
    status: str
    model_loaded: bool
    model_load_time_s: float
    version: str


# ─────────────────────────────────────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────────────────────────────────────

def _run_sgi(question: str, context: str, response: str) -> GroundingResult:
    from groundlens import compute_sgi

    t0 = time.monotonic()
    result = compute_sgi(question=question, context=context, response=response)
    latency = int((time.monotonic() - t0) * 1000)

    return GroundingResult(
        verdict="GROUNDED" if not result.flagged else "HALLUCINATION RISK",
        flagged=result.flagged,
        method="SGI (Semantic Grounding Index)",
        score=round(result.value, 4),
        threshold=0.95,
        explanation=(
            "The response appears grounded in the source material."
            if not result.flagged
            else "The response may not be based on the source material provided."
        ),
        detail=SGIDetail(
            q_dist=round(result.q_dist, 4),
            ctx_dist=round(result.ctx_dist, 4),
            interpretation=result.explanation,
        ),
        latency_ms=latency,
    )


def _run_dgi(question: str, response: str) -> GroundingResult:
    from groundlens import compute_dgi

    t0 = time.monotonic()
    result = compute_dgi(question=question, response=response)
    latency = int((time.monotonic() - t0) * 1000)

    return GroundingResult(
        verdict="GROUNDED" if not result.flagged else "HALLUCINATION RISK",
        flagged=result.flagged,
        method="DGI (Directional Grounding Index)",
        score=round(result.value, 4),
        threshold=0.30,
        explanation=(
            "The response follows patterns typical of grounded answers."
            if not result.flagged
            else "The response shows geometric patterns associated with hallucination."
        ),
        detail=DGIDetail(
            interpretation=result.explanation,
        ),
        latency_ms=latency,
    )


# ─────────────────────────────────────────────────────────────────────────────
# Endpoints
# ─────────────────────────────────────────────────────────────────────────────

@app.get("/health", response_model=HealthResponse, tags=["system"])
async def health():
    """Liveness check. Returns model load status."""
    return HealthResponse(
        status="ok" if _model_ready else "loading",
        model_loaded=_model_ready,
        model_load_time_s=_model_load_time,
        version="2026.5.12",
    )


@app.post("/v1/check", response_model=GroundingResult, tags=["grounding"])
async def check(req: CheckRequest):
    """Check whether an LLM response is hallucinated.

    Auto-selects the right method:
    - Context provided → SGI (checks if the response used the source material)
    - No context → DGI (checks geometric grounding patterns)
    """
    if not _model_ready:
        raise HTTPException(503, "Model is still loading. Try again in a few seconds.")

    has_context = req.context is not None and req.context.strip() != ""

    if has_context:
        return _run_sgi(req.question, req.context, req.response)
    else:
        return _run_dgi(req.question, req.response)


@app.post("/v1/sgi", response_model=GroundingResult, tags=["grounding"])
async def sgi(req: SGIRequest):
    """SGI — check if the response is grounded in a source document.

    Use for RAG pipelines, document Q&A, or any case where you have
    the source material the LLM was given.
    """
    if not _model_ready:
        raise HTTPException(503, "Model is still loading. Try again in a few seconds.")

    return _run_sgi(req.question, req.context, req.response)


@app.post("/v1/dgi", response_model=GroundingResult, tags=["grounding"])
async def dgi(req: DGIRequest):
    """DGI — check grounding patterns without source context.

    Use for open-ended chat, general Q&A, or any case where you just
    have a question and the LLM's answer.
    """
    if not _model_ready:
        raise HTTPException(503, "Model is still loading. Try again in a few seconds.")

    return _run_dgi(req.question, req.response)