Spaces:
Sleeping
Sleeping
| """ | |
| groundlens — Geometric LLM Hallucination Detection Demo | |
| Plain-language interface: paste a question and the AI's answer, | |
| optionally upload context (PDF, Excel, or plain text). | |
| Compares groundlens (embedding geometry) vs Vectara HHEM-2.1-Open. | |
| Models load once at module level to avoid cold-start on Space wake. | |
| """ | |
| import logging | |
| import time | |
| import os | |
| import gradio as gr | |
| from groundlens import compute_sgi, compute_dgi | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # FILE EXTRACTION — PDF and Excel support | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| def extract_pdf_text(file_path: str, max_chars: int = 8000) -> str: | |
| """Extract text from a PDF file.""" | |
| try: | |
| import pdfplumber | |
| text_parts = [] | |
| with pdfplumber.open(file_path) as pdf: | |
| for page in pdf.pages[:20]: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text_parts.append(page_text) | |
| full_text = "\n\n".join(text_parts) | |
| return full_text[:max_chars] if len(full_text) > max_chars else full_text | |
| except Exception as e: | |
| return f"[Could not read PDF: {e}]" | |
| def extract_excel_text(file_path: str, max_chars: int = 8000) -> str: | |
| """Extract text from an Excel file.""" | |
| try: | |
| import openpyxl | |
| wb = openpyxl.load_workbook(file_path, data_only=True) | |
| text_parts = [] | |
| for sheet_name in wb.sheetnames[:5]: | |
| ws = wb[sheet_name] | |
| text_parts.append(f"--- {sheet_name} ---") | |
| for row in ws.iter_rows(max_row=200, values_only=True): | |
| cells = [str(c) if c is not None else "" for c in row] | |
| line = " | ".join(cells).strip() | |
| if line and line != " | ".join([""] * len(cells)): | |
| text_parts.append(line) | |
| full_text = "\n".join(text_parts) | |
| return full_text[:max_chars] if len(full_text) > max_chars else full_text | |
| except Exception as e: | |
| return f"[Could not read Excel file: {e}]" | |
| def extract_file_to_text(file) -> str: | |
| """Extract text from an uploaded file and return it for the textbox.""" | |
| if file is None: | |
| return "" | |
| file_path = file.name if hasattr(file, 'name') else str(file) | |
| ext = os.path.splitext(file_path)[1].lower() | |
| basename = os.path.basename(file_path) | |
| if ext == ".pdf": | |
| text = extract_pdf_text(file_path) | |
| elif ext in (".xlsx", ".xls"): | |
| text = extract_excel_text(file_path) | |
| elif ext in (".txt", ".md", ".csv"): | |
| try: | |
| with open(file_path, "r", encoding="utf-8", errors="replace") as f: | |
| text = f.read(8000) | |
| except Exception as e: | |
| text = f"[Could not read file: {e}]" | |
| else: | |
| text = f"[Unsupported file type: {ext}. Use PDF, Excel, TXT, or CSV.]" | |
| if text and not text.startswith("["): | |
| return f"[Extracted from {basename}]\n\n{text}" | |
| return text | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # HHEM-2.1-Open — baseline comparison | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| logger.info("Loading HHEM-2.1-Open...") | |
| from transformers import AutoModelForSequenceClassification | |
| _hhem = AutoModelForSequenceClassification.from_pretrained( | |
| "vectara/hallucination_evaluation_model", | |
| trust_remote_code=True, | |
| ) | |
| logger.info("HHEM loaded.") | |
| # Warm up groundlens embedding model | |
| logger.info("Warming up groundlens...") | |
| compute_dgi(question="warmup", response="warmup") | |
| logger.info("groundlens ready.") | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # SCORING | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| def score_groundlens(question: str, response: str, context: str) -> dict: | |
| start = time.perf_counter() | |
| has_context = bool(context.strip()) | |
| if has_context: | |
| result = compute_sgi( | |
| question=question, | |
| context=context, | |
| response=response, | |
| ) | |
| method = "SGI (with context)" | |
| raw_score = result.value | |
| grounded = not result.flagged | |
| threshold = 0.95 | |
| mode_note = ( | |
| "Measured how much the AI's answer used your source document " | |
| "vs. just rephrasing the question." | |
| ) | |
| else: | |
| result = compute_dgi( | |
| question=question, | |
| response=response, | |
| ) | |
| method = "DGI (without context)" | |
| raw_score = result.value | |
| grounded = not result.flagged | |
| threshold = 0.30 | |
| mode_note = ( | |
| "Measured whether the AI's answer follows patterns typical " | |
| "of grounded, factual responses." | |
| ) | |
| elapsed_ms = (time.perf_counter() - start) * 1000 | |
| return { | |
| "method": method, | |
| "raw_score": round(raw_score, 4), | |
| "grounded": grounded, | |
| "threshold": threshold, | |
| "elapsed_ms": round(elapsed_ms, 1), | |
| "mode_note": mode_note, | |
| } | |
| def score_hhem(question: str, response: str, context: str) -> dict: | |
| has_context = bool(context.strip()) | |
| premise = ( | |
| f"{context.strip()}\n\n{question}".strip() | |
| if has_context | |
| else question | |
| ) | |
| if len(premise) > 1800: | |
| premise = premise[:1800] | |
| start = time.perf_counter() | |
| scores = _hhem.predict([(premise, response)]) | |
| raw_score = float(scores[0]) | |
| elapsed_ms = (time.perf_counter() - start) * 1000 | |
| return { | |
| "method": "HHEM-2.1-Open", | |
| "raw_score": round(raw_score, 4), | |
| "grounded": raw_score >= 0.5, | |
| "elapsed_ms": round(elapsed_ms, 1), | |
| "label": "consistent" if raw_score >= 0.5 else "hallucinated", | |
| } | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # MAIN COMPARISON — now takes only text inputs (no file object) | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| def run_comparison( | |
| question: str, context_text: str, response: str | |
| ) -> tuple[str, str, str]: | |
| if not question.strip(): | |
| return "⚠️ Enter the question you asked the AI.", "", "" | |
| if not response.strip(): | |
| return "⚠️ Enter the AI's response.", "", "" | |
| # Strip the "[Extracted from ...]" header if present | |
| context = context_text.strip() | |
| if context.startswith("[Extracted from "): | |
| newline_pos = context.find("\n") | |
| if newline_pos > 0: | |
| context = context[newline_pos:].strip() | |
| gl = score_groundlens(question, response, context) | |
| hhem = score_hhem(question, response, context) | |
| # groundlens result | |
| if gl["grounded"]: | |
| gl_verdict = "🟢 Looks grounded" | |
| gl_explain = "The AI's answer appears to be based on real information." | |
| else: | |
| gl_verdict = "🔴 Possible hallucination" | |
| gl_explain = "The AI's answer shows signs of being fabricated or not grounded in the source." | |
| gl_md = f"""### groundlens | |
| **{gl_verdict}** | |
| {gl_explain} | |
| | | | | |
| |---|---| | |
| | **Method** | {gl["method"]} | | |
| | **Score** | {gl["raw_score"]} (threshold: {gl["threshold"]}) | | |
| | **Time** | {gl["elapsed_ms"]} ms | | |
| *{gl["mode_note"]}*""" | |
| # HHEM result | |
| if hhem["grounded"]: | |
| hhem_verdict = "🟢 Looks consistent" | |
| hhem_explain = "The classifier considers this answer consistent with the input." | |
| else: | |
| hhem_verdict = "🔴 Possible hallucination" | |
| hhem_explain = "The classifier flagged this answer as potentially hallucinated." | |
| hhem_md = f"""### Vectara HHEM-2.1-Open | |
| **{hhem_verdict}** | |
| {hhem_explain} | |
| | | | | |
| |---|---| | |
| | **Method** | {hhem["method"]} | | |
| | **Score** | {hhem["raw_score"]} ({hhem["label"]}) | | |
| | **Time** | {hhem["elapsed_ms"]} ms | | |
| *Fine-tuned flan-T5 classifier.*""" | |
| # Agreement | |
| agree = gl["grounded"] == hhem["grounded"] | |
| if agree and gl["grounded"]: | |
| agreement_md = "### 🔵 Both methods agree: the answer looks reliable." | |
| elif agree and not gl["grounded"]: | |
| agreement_md = "### 🔴 Both methods agree: this answer is likely hallucinated." | |
| else: | |
| agreement_md = """### 🟠 The two methods disagree. | |
| This often happens with **subtle factual errors** — the answer sounds right and | |
| uses the correct vocabulary, but gets specific facts wrong. Embedding geometry | |
| (groundlens) measures the shape of the answer; the classifier (HHEM) evaluates | |
| its content differently. When they disagree, it's worth checking the facts manually. | |
| [Learn more about hallucination types →](https://docs.groundlens.dev/theory/hallucination-taxonomy/)""" | |
| return gl_md, hhem_md, agreement_md | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # EXAMPLES | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| EXAMPLES = [ | |
| [ | |
| "What does the water damage policy cover?", | |
| "Coverage includes burst pipes and sudden appliance failure up to " | |
| "$50,000. Flood damage requires a separate NFIP policy. " | |
| "Deductible is $1,500 per occurrence.", | |
| "The policy covers burst pipes and sudden appliance failure up to " | |
| "$50,000 per occurrence, with a $1,500 deductible.", | |
| ], | |
| [ | |
| "What does the water damage policy cover?", | |
| "Coverage includes burst pipes and sudden appliance failure up to " | |
| "$50,000. Flood damage requires a separate NFIP policy. " | |
| "Deductible is $1,500 per occurrence.", | |
| "The policy covers all water damage including floods " | |
| "with no deductible required.", | |
| ], | |
| [ | |
| "What causes seasons on Earth?", | |
| "", | |
| "Seasons are caused by Earth's 23.5-degree axial tilt, which " | |
| "changes how directly sunlight hits each hemisphere.", | |
| ], | |
| [ | |
| "What causes seasons on Earth?", | |
| "", | |
| "Seasons are regulated by the Atmospheric Regulation Committee, " | |
| "a UN body established in 1952 that adjusts global temperature " | |
| "through orbital satellites.", | |
| ], | |
| ] | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # THEME — dark, matching groundlens.dev | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| _orange = gr.themes.Color( | |
| c50="#fff7ed", | |
| c100="#ffedd5", | |
| c200="#fed7aa", | |
| c300="#fdba74", | |
| c400="#fb923c", | |
| c500="#fc7604", | |
| c600="#ea580c", | |
| c700="#c2410c", | |
| c800="#9a3412", | |
| c900="#7c2d12", | |
| c950="#431407", | |
| ) | |
| theme = gr.Theme.from_hub("Bruhn/CrimsonNight").set( | |
| # Override crimson red → groundlens orange | |
| button_primary_background_fill="#fc7604", | |
| button_primary_background_fill_dark="#fc7604", | |
| button_primary_background_fill_hover="#fb923c", | |
| button_primary_background_fill_hover_dark="#fb923c", | |
| button_primary_text_color="#0a0a0a", | |
| button_primary_text_color_dark="#0a0a0a", | |
| border_color_primary="#fc7604", | |
| border_color_primary_dark="#fc7604", | |
| ) | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # INTERFACE | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| css = """ | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| margin: 0 auto !important; | |
| padding: 1.5rem !important; | |
| } | |
| h1 { color: #fc7604 !important; font-size: 2.2rem !important; font-weight: 700 !important; margin-bottom: 0.2rem !important; } | |
| h3 { font-size: 1.15rem !important; } | |
| .subtitle { color: #94a3b8 !important; font-size: 1.1rem !important; margin-top: 0 !important; } | |
| a { color: #fd9a42 !important; } | |
| a:hover { color: #fec08a !important; } | |
| .step-label { color: #fc7604; font-weight: 600; font-size: 1.05rem; } | |
| .links-bar { font-size: 0.9rem; color: #64748b; margin-top: 0.5rem; } | |
| .links-bar a { color: #64748b !important; } | |
| .links-bar a:hover { color: #fd9a42 !important; } | |
| footer { display: none !important; } | |
| /* Upload button — small, dashed secondary style */ | |
| .upload-btn { margin-top: 0.25rem !important; } | |
| .upload-btn button { | |
| background: transparent !important; | |
| border: 1px dashed #475569 !important; | |
| color: #94a3b8 !important; | |
| font-size: 0.85rem !important; | |
| padding: 0.4rem 1rem !important; | |
| border-radius: 6px !important; | |
| } | |
| .upload-btn button:hover { | |
| border-color: #fc7604 !important; | |
| color: #fc7604 !important; | |
| } | |
| .upload-status p { | |
| color: #94a3b8 !important; | |
| font-size: 0.85rem !important; | |
| margin: 0.25rem 0 0 0 !important; | |
| font-style: italic; | |
| } | |
| @media (max-width: 768px) { | |
| .gradio-container { padding: 0.75rem !important; } | |
| h1 { font-size: 1.6rem !important; } | |
| } | |
| """ | |
| with gr.Blocks( | |
| title="groundlens — Check if your AI is hallucinating", | |
| theme=theme, | |
| css=css, | |
| ) as demo: | |
| gr.Markdown(""" | |
| # groundlens | |
| <p class="subtitle">Check if an AI gave you a real answer or made something up.</p> | |
| """) | |
| gr.Markdown(""" | |
| You asked an AI a question and got an answer. Was it real or hallucinated? | |
| Paste both below and we'll check using two independent methods: **groundlens** | |
| (geometric analysis) and **Vectara HHEM** (neural classifier). | |
| """) | |
| gr.Markdown("""<p class="links-bar"> | |
| <a href="https://github.com/groundlens-dev/groundlens">GitHub</a> · | |
| <a href="https://docs.groundlens.dev">Docs</a> · | |
| <a href="https://pypi.org/project/groundlens/">PyPI</a> · | |
| <a href="https://arxiv.org/abs/2512.13771">SGI paper</a> · | |
| <a href="https://arxiv.org/pdf/2602.13224v3">Taxonomy</a> · | |
| <a href="https://arxiv.org/abs/2603.13259">Mechanistic paper</a> | |
| </p>""") | |
| # ── Step 1: Question ── | |
| gr.Markdown('<p class="step-label">1. What did you ask the AI?</p>') | |
| q_in = gr.Textbox( | |
| show_label=False, | |
| placeholder="e.g. What does our insurance policy cover for water damage?", | |
| lines=2, | |
| ) | |
| # ── Step 2: Context ── | |
| gr.Markdown( | |
| '<p class="step-label">2. Did you give the AI any source material? (optional)</p>' | |
| ) | |
| gr.Markdown( | |
| "If you gave the AI a document, a webpage, an Excel file, or any reference " | |
| "material to base its answer on, paste the text below. " | |
| "If you just asked a question with no source, skip this step.", | |
| ) | |
| ctx_in = gr.Textbox( | |
| show_label=False, | |
| placeholder="Paste the source text here, or use the upload button below to extract text from a file...", | |
| lines=5, | |
| ) | |
| # Hidden file input + visible upload button | |
| file_in = gr.File( | |
| file_types=[".pdf", ".xlsx", ".xls", ".csv", ".txt"], | |
| file_count="single", | |
| visible=False, | |
| ) | |
| upload_status = gr.Markdown("", elem_classes=["upload-status"]) | |
| upload_btn = gr.UploadButton( | |
| "📄 Upload a file (PDF, Excel, CSV, TXT)", | |
| file_types=[".pdf", ".xlsx", ".xls", ".csv", ".txt"], | |
| file_count="single", | |
| elem_classes=["upload-btn"], | |
| ) | |
| def handle_upload(file, existing_text): | |
| """Extract file text and append to context textbox.""" | |
| extracted = extract_file_to_text(file) | |
| if not extracted: | |
| return existing_text, "" | |
| if extracted.startswith("[Could not") or extracted.startswith("[Unsupported"): | |
| return existing_text, f"⚠️ {extracted}" | |
| basename = os.path.basename(file.name if hasattr(file, 'name') else str(file)) | |
| # Replace existing content or append | |
| if existing_text and existing_text.strip(): | |
| new_text = existing_text.strip() + "\n\n" + extracted | |
| else: | |
| new_text = extracted | |
| return new_text, f"✓ Extracted text from **{basename}**" | |
| upload_btn.upload( | |
| fn=handle_upload, | |
| inputs=[upload_btn, ctx_in], | |
| outputs=[ctx_in, upload_status], | |
| ) | |
| # ── Step 3: Response ── | |
| gr.Markdown('<p class="step-label">3. What did the AI answer?</p>') | |
| r_in = gr.Textbox( | |
| show_label=False, | |
| placeholder="Paste the AI's response here...", | |
| lines=4, | |
| ) | |
| # ── Evaluate button ── | |
| run_btn = gr.Button( | |
| "Check for hallucination", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| # ── Results ── | |
| with gr.Row(equal_height=True): | |
| gl_out = gr.Markdown() | |
| hhem_out = gr.Markdown() | |
| agreement_out = gr.Markdown() | |
| # ── Examples ── | |
| gr.Markdown("---") | |
| gr.Markdown("### Try an example") | |
| gr.Examples( | |
| examples=EXAMPLES, | |
| inputs=[q_in, ctx_in, r_in], | |
| label="", | |
| ) | |
| # ── Footer ── | |
| gr.Markdown(""" | |
| --- | |
| <p style="color:#475569; font-size:0.85rem; text-align:center;"> | |
| <strong>groundlens</strong> is open source (MIT). Built by | |
| <a href="https://jmarin.info" style="color:#64748b !important;">Javier Marin</a>. | |
| This demo runs the same library available via <code>pip install groundlens</code>.<br> | |
| groundlens is verification triage, not a truth oracle. It tells you which answers | |
| deserve trust and which need a closer look. | |
| </p> | |
| """) | |
| # ── Event binding ── | |
| run_btn.click( | |
| fn=run_comparison, | |
| inputs=[q_in, ctx_in, r_in], | |
| outputs=[gl_out, hhem_out, agreement_out], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |