AI-that-works commited on
Commit
9f6ea26
·
verified ·
1 Parent(s): 5e8dbda

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +523 -0
app.py ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ groundlens — Geometric LLM Hallucination Detection Demo
3
+
4
+ Plain-language interface: paste a question and the AI's answer,
5
+ optionally upload context (PDF, Excel, or plain text).
6
+ Compares groundlens (embedding geometry) vs Vectara HHEM-2.1-Open.
7
+
8
+ Models load once at module level to avoid cold-start on Space wake.
9
+ """
10
+
11
+ import logging
12
+ import time
13
+ import os
14
+
15
+ import gradio as gr
16
+ from groundlens import compute_sgi, compute_dgi
17
+
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ # ─────────────────────────────────────────────────────────────────────────────
23
+ # FILE EXTRACTION — PDF and Excel support
24
+ # ─────────────────────────────────────────────────────────────────────────────
25
+
26
+ def extract_pdf_text(file_path: str, max_chars: int = 8000) -> str:
27
+ """Extract text from a PDF file."""
28
+ try:
29
+ import pdfplumber
30
+ text_parts = []
31
+ with pdfplumber.open(file_path) as pdf:
32
+ for page in pdf.pages[:20]:
33
+ page_text = page.extract_text()
34
+ if page_text:
35
+ text_parts.append(page_text)
36
+ full_text = "\n\n".join(text_parts)
37
+ return full_text[:max_chars] if len(full_text) > max_chars else full_text
38
+ except Exception as e:
39
+ return f"[Could not read PDF: {e}]"
40
+
41
+
42
+ def extract_excel_text(file_path: str, max_chars: int = 8000) -> str:
43
+ """Extract text from an Excel file."""
44
+ try:
45
+ import openpyxl
46
+ wb = openpyxl.load_workbook(file_path, data_only=True)
47
+ text_parts = []
48
+ for sheet_name in wb.sheetnames[:5]:
49
+ ws = wb[sheet_name]
50
+ text_parts.append(f"--- {sheet_name} ---")
51
+ for row in ws.iter_rows(max_row=200, values_only=True):
52
+ cells = [str(c) if c is not None else "" for c in row]
53
+ line = " | ".join(cells).strip()
54
+ if line and line != " | ".join([""] * len(cells)):
55
+ text_parts.append(line)
56
+ full_text = "\n".join(text_parts)
57
+ return full_text[:max_chars] if len(full_text) > max_chars else full_text
58
+ except Exception as e:
59
+ return f"[Could not read Excel file: {e}]"
60
+
61
+
62
+ def extract_file_to_text(file) -> str:
63
+ """Extract text from an uploaded file and return it for the textbox."""
64
+ if file is None:
65
+ return ""
66
+
67
+ file_path = file.name if hasattr(file, 'name') else str(file)
68
+ ext = os.path.splitext(file_path)[1].lower()
69
+ basename = os.path.basename(file_path)
70
+
71
+ if ext == ".pdf":
72
+ text = extract_pdf_text(file_path)
73
+ elif ext in (".xlsx", ".xls"):
74
+ text = extract_excel_text(file_path)
75
+ elif ext in (".txt", ".md", ".csv"):
76
+ try:
77
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
78
+ text = f.read(8000)
79
+ except Exception as e:
80
+ text = f"[Could not read file: {e}]"
81
+ else:
82
+ text = f"[Unsupported file type: {ext}. Use PDF, Excel, TXT, or CSV.]"
83
+
84
+ if text and not text.startswith("["):
85
+ return f"[Extracted from {basename}]\n\n{text}"
86
+ return text
87
+
88
+
89
+ # ─────────────────────────────────────────────────────────────────────────────
90
+ # HHEM-2.1-Open — baseline comparison
91
+ # ─────────────────────────────────────────────────────────────────────────────
92
+
93
+ logger.info("Loading HHEM-2.1-Open...")
94
+ from transformers import AutoModelForSequenceClassification
95
+
96
+ _hhem = AutoModelForSequenceClassification.from_pretrained(
97
+ "vectara/hallucination_evaluation_model",
98
+ trust_remote_code=True,
99
+ )
100
+ logger.info("HHEM loaded.")
101
+
102
+ # Warm up groundlens embedding model
103
+ logger.info("Warming up groundlens...")
104
+ compute_dgi(question="warmup", response="warmup")
105
+ logger.info("groundlens ready.")
106
+
107
+
108
+ # ─────────────────────────────────────────────────────────────────────────────
109
+ # SCORING
110
+ # ─────────────────────────────────────────────────────────────────────────────
111
+
112
+ def score_groundlens(question: str, response: str, context: str) -> dict:
113
+ start = time.perf_counter()
114
+ has_context = bool(context.strip())
115
+
116
+ if has_context:
117
+ result = compute_sgi(
118
+ question=question,
119
+ context=context,
120
+ response=response,
121
+ )
122
+ method = "SGI (with context)"
123
+ raw_score = result.value
124
+ grounded = not result.flagged
125
+ threshold = 0.95
126
+ mode_note = (
127
+ "Measured how much the AI's answer used your source document "
128
+ "vs. just rephrasing the question."
129
+ )
130
+ else:
131
+ result = compute_dgi(
132
+ question=question,
133
+ response=response,
134
+ )
135
+ method = "DGI (without context)"
136
+ raw_score = result.value
137
+ grounded = not result.flagged
138
+ threshold = 0.30
139
+ mode_note = (
140
+ "Measured whether the AI's answer follows patterns typical "
141
+ "of grounded, factual responses."
142
+ )
143
+
144
+ elapsed_ms = (time.perf_counter() - start) * 1000
145
+
146
+ return {
147
+ "method": method,
148
+ "raw_score": round(raw_score, 4),
149
+ "grounded": grounded,
150
+ "threshold": threshold,
151
+ "elapsed_ms": round(elapsed_ms, 1),
152
+ "mode_note": mode_note,
153
+ }
154
+
155
+
156
+ def score_hhem(question: str, response: str, context: str) -> dict:
157
+ has_context = bool(context.strip())
158
+ premise = (
159
+ f"{context.strip()}\n\n{question}".strip()
160
+ if has_context
161
+ else question
162
+ )
163
+ if len(premise) > 1800:
164
+ premise = premise[:1800]
165
+
166
+ start = time.perf_counter()
167
+ scores = _hhem.predict([(premise, response)])
168
+ raw_score = float(scores[0])
169
+ elapsed_ms = (time.perf_counter() - start) * 1000
170
+
171
+ return {
172
+ "method": "HHEM-2.1-Open",
173
+ "raw_score": round(raw_score, 4),
174
+ "grounded": raw_score >= 0.5,
175
+ "elapsed_ms": round(elapsed_ms, 1),
176
+ "label": "consistent" if raw_score >= 0.5 else "hallucinated",
177
+ }
178
+
179
+
180
+ # ─────────────────────────────────────────────────────────────────────────────
181
+ # MAIN COMPARISON — now takes only text inputs (no file object)
182
+ # ─────────────────────────────────────────────────────────────────────────────
183
+
184
+ def run_comparison(
185
+ question: str, context_text: str, response: str
186
+ ) -> tuple[str, str, str]:
187
+
188
+ if not question.strip():
189
+ return "⚠️ Enter the question you asked the AI.", "", ""
190
+ if not response.strip():
191
+ return "⚠️ Enter the AI's response.", "", ""
192
+
193
+ # Strip the "[Extracted from ...]" header if present
194
+ context = context_text.strip()
195
+ if context.startswith("[Extracted from "):
196
+ newline_pos = context.find("\n")
197
+ if newline_pos > 0:
198
+ context = context[newline_pos:].strip()
199
+
200
+ gl = score_groundlens(question, response, context)
201
+ hhem = score_hhem(question, response, context)
202
+
203
+ # groundlens result
204
+ if gl["grounded"]:
205
+ gl_verdict = "🟢 Looks grounded"
206
+ gl_explain = "The AI's answer appears to be based on real information."
207
+ else:
208
+ gl_verdict = "🔴 Possible hallucination"
209
+ gl_explain = "The AI's answer shows signs of being fabricated or not grounded in the source."
210
+
211
+ gl_md = f"""### groundlens
212
+
213
+ **{gl_verdict}**
214
+
215
+ {gl_explain}
216
+
217
+ | | |
218
+ |---|---|
219
+ | **Method** | {gl["method"]} |
220
+ | **Score** | {gl["raw_score"]} (threshold: {gl["threshold"]}) |
221
+ | **Time** | {gl["elapsed_ms"]} ms |
222
+
223
+ *{gl["mode_note"]}*"""
224
+
225
+ # HHEM result
226
+ if hhem["grounded"]:
227
+ hhem_verdict = "🟢 Looks consistent"
228
+ hhem_explain = "The classifier considers this answer consistent with the input."
229
+ else:
230
+ hhem_verdict = "🔴 Possible hallucination"
231
+ hhem_explain = "The classifier flagged this answer as potentially hallucinated."
232
+
233
+ hhem_md = f"""### Vectara HHEM-2.1-Open
234
+
235
+ **{hhem_verdict}**
236
+
237
+ {hhem_explain}
238
+
239
+ | | |
240
+ |---|---|
241
+ | **Method** | {hhem["method"]} |
242
+ | **Score** | {hhem["raw_score"]} ({hhem["label"]}) |
243
+ | **Time** | {hhem["elapsed_ms"]} ms |
244
+
245
+ *Fine-tuned flan-T5 classifier.*"""
246
+
247
+ # Agreement
248
+ agree = gl["grounded"] == hhem["grounded"]
249
+ if agree and gl["grounded"]:
250
+ agreement_md = "### 🔵 Both methods agree: the answer looks reliable."
251
+ elif agree and not gl["grounded"]:
252
+ agreement_md = "### 🔴 Both methods agree: this answer is likely hallucinated."
253
+ else:
254
+ agreement_md = """### 🟠 The two methods disagree.
255
+
256
+ This often happens with **subtle factual errors** — the answer sounds right and
257
+ uses the correct vocabulary, but gets specific facts wrong. Embedding geometry
258
+ (groundlens) measures the shape of the answer; the classifier (HHEM) evaluates
259
+ its content differently. When they disagree, it's worth checking the facts manually.
260
+
261
+ [Learn more about hallucination types →](https://docs.groundlens.dev/theory/hallucination-taxonomy/)"""
262
+
263
+ return gl_md, hhem_md, agreement_md
264
+
265
+
266
+ # ─────────────────────────────────────────────────────────────────────────────
267
+ # EXAMPLES
268
+ # ─────────────────────────────────────────────────────────────────────────────
269
+
270
+ EXAMPLES = [
271
+ [
272
+ "What does the water damage policy cover?",
273
+ "Coverage includes burst pipes and sudden appliance failure up to "
274
+ "$50,000. Flood damage requires a separate NFIP policy. "
275
+ "Deductible is $1,500 per occurrence.",
276
+ "The policy covers burst pipes and sudden appliance failure up to "
277
+ "$50,000 per occurrence, with a $1,500 deductible.",
278
+ ],
279
+ [
280
+ "What does the water damage policy cover?",
281
+ "Coverage includes burst pipes and sudden appliance failure up to "
282
+ "$50,000. Flood damage requires a separate NFIP policy. "
283
+ "Deductible is $1,500 per occurrence.",
284
+ "The policy covers all water damage including floods "
285
+ "with no deductible required.",
286
+ ],
287
+ [
288
+ "What causes seasons on Earth?",
289
+ "",
290
+ "Seasons are caused by Earth's 23.5-degree axial tilt, which "
291
+ "changes how directly sunlight hits each hemisphere.",
292
+ ],
293
+ [
294
+ "What causes seasons on Earth?",
295
+ "",
296
+ "Seasons are regulated by the Atmospheric Regulation Committee, "
297
+ "a UN body established in 1952 that adjusts global temperature "
298
+ "through orbital satellites.",
299
+ ],
300
+ ]
301
+
302
+
303
+ # ─────────────────────────────────────────────────────────────────────────────
304
+ # THEME — dark, matching groundlens.dev
305
+ # ─────────────────────────────────────────────────────────────────────────────
306
+
307
+ _orange = gr.themes.Color(
308
+ c50="#fff7ed",
309
+ c100="#ffedd5",
310
+ c200="#fed7aa",
311
+ c300="#fdba74",
312
+ c400="#fb923c",
313
+ c500="#fc7604",
314
+ c600="#ea580c",
315
+ c700="#c2410c",
316
+ c800="#9a3412",
317
+ c900="#7c2d12",
318
+ c950="#431407",
319
+ )
320
+
321
+ theme = gr.Theme.from_hub("Bruhn/CrimsonNight").set(
322
+ # Override crimson red → groundlens orange
323
+ button_primary_background_fill="#fc7604",
324
+ button_primary_background_fill_dark="#fc7604",
325
+ button_primary_background_fill_hover="#fb923c",
326
+ button_primary_background_fill_hover_dark="#fb923c",
327
+ button_primary_text_color="#0a0a0a",
328
+ button_primary_text_color_dark="#0a0a0a",
329
+ border_color_primary="#fc7604",
330
+ border_color_primary_dark="#fc7604",
331
+ )
332
+
333
+
334
+ # ─────────────────────────────────────────────────────────────────────────────
335
+ # INTERFACE
336
+ # ─────────────────────────────────────────────────────────────────────────────
337
+
338
+ css = """
339
+ .gradio-container {
340
+ max-width: 1200px !important;
341
+ margin: 0 auto !important;
342
+ padding: 1.5rem !important;
343
+ }
344
+ h1 { color: #fc7604 !important; font-size: 2.2rem !important; font-weight: 700 !important; margin-bottom: 0.2rem !important; }
345
+ h3 { font-size: 1.15rem !important; }
346
+ .subtitle { color: #94a3b8 !important; font-size: 1.1rem !important; margin-top: 0 !important; }
347
+ a { color: #fd9a42 !important; }
348
+ a:hover { color: #fec08a !important; }
349
+ .step-label { color: #fc7604; font-weight: 600; font-size: 1.05rem; }
350
+ .links-bar { font-size: 0.9rem; color: #64748b; margin-top: 0.5rem; }
351
+ .links-bar a { color: #64748b !important; }
352
+ .links-bar a:hover { color: #fd9a42 !important; }
353
+ footer { display: none !important; }
354
+
355
+ /* Upload button — small, dashed secondary style */
356
+ .upload-btn { margin-top: 0.25rem !important; }
357
+ .upload-btn button {
358
+ background: transparent !important;
359
+ border: 1px dashed #475569 !important;
360
+ color: #94a3b8 !important;
361
+ font-size: 0.85rem !important;
362
+ padding: 0.4rem 1rem !important;
363
+ border-radius: 6px !important;
364
+ }
365
+ .upload-btn button:hover {
366
+ border-color: #fc7604 !important;
367
+ color: #fc7604 !important;
368
+ }
369
+ .upload-status p {
370
+ color: #94a3b8 !important;
371
+ font-size: 0.85rem !important;
372
+ margin: 0.25rem 0 0 0 !important;
373
+ font-style: italic;
374
+ }
375
+ @media (max-width: 768px) {
376
+ .gradio-container { padding: 0.75rem !important; }
377
+ h1 { font-size: 1.6rem !important; }
378
+ }
379
+ """
380
+
381
+ with gr.Blocks(
382
+ title="groundlens — Check if your AI is hallucinating",
383
+ theme=theme,
384
+ css=css,
385
+ ) as demo:
386
+
387
+ gr.Markdown("""
388
+ # groundlens
389
+
390
+ <p class="subtitle">Check if an AI gave you a real answer or made something up.</p>
391
+ """)
392
+
393
+ gr.Markdown("""
394
+ You asked an AI a question and got an answer. Was it real or hallucinated?
395
+ Paste both below and we'll check using two independent methods: **groundlens**
396
+ (geometric analysis) and **Vectara HHEM** (neural classifier).
397
+ """)
398
+
399
+ gr.Markdown("""<p class="links-bar">
400
+ <a href="https://github.com/groundlens-dev/groundlens">GitHub</a> ·
401
+ <a href="https://docs.groundlens.dev">Docs</a> ·
402
+ <a href="https://pypi.org/project/groundlens/">PyPI</a> ·
403
+ <a href="https://arxiv.org/abs/2512.13771">SGI paper</a> ·
404
+ <a href="https://arxiv.org/pdf/2602.13224v3">Taxonomy</a> ·
405
+ <a href="https://arxiv.org/abs/2603.13259">Mechanistic paper</a>
406
+ </p>""")
407
+
408
+ # ── Step 1: Question ──
409
+ gr.Markdown('<p class="step-label">1. What did you ask the AI?</p>')
410
+ q_in = gr.Textbox(
411
+ show_label=False,
412
+ placeholder="e.g. What does our insurance policy cover for water damage?",
413
+ lines=2,
414
+ )
415
+
416
+ # ── Step 2: Context ──
417
+ gr.Markdown(
418
+ '<p class="step-label">2. Did you give the AI any source material? (optional)</p>'
419
+ )
420
+ gr.Markdown(
421
+ "If you gave the AI a document, a webpage, an Excel file, or any reference "
422
+ "material to base its answer on, paste the text below. "
423
+ "If you just asked a question with no source, skip this step.",
424
+ )
425
+
426
+ ctx_in = gr.Textbox(
427
+ show_label=False,
428
+ placeholder="Paste the source text here, or use the upload button below to extract text from a file...",
429
+ lines=5,
430
+ )
431
+
432
+ # Hidden file input + visible upload button
433
+ file_in = gr.File(
434
+ file_types=[".pdf", ".xlsx", ".xls", ".csv", ".txt"],
435
+ file_count="single",
436
+ visible=False,
437
+ )
438
+ upload_status = gr.Markdown("", elem_classes=["upload-status"])
439
+
440
+ upload_btn = gr.UploadButton(
441
+ "📄 Upload a file (PDF, Excel, CSV, TXT)",
442
+ file_types=[".pdf", ".xlsx", ".xls", ".csv", ".txt"],
443
+ file_count="single",
444
+ elem_classes=["upload-btn"],
445
+ )
446
+
447
+ def handle_upload(file, existing_text):
448
+ """Extract file text and append to context textbox."""
449
+ extracted = extract_file_to_text(file)
450
+ if not extracted:
451
+ return existing_text, ""
452
+ if extracted.startswith("[Could not") or extracted.startswith("[Unsupported"):
453
+ return existing_text, f"⚠️ {extracted}"
454
+
455
+ basename = os.path.basename(file.name if hasattr(file, 'name') else str(file))
456
+ # Replace existing content or append
457
+ if existing_text and existing_text.strip():
458
+ new_text = existing_text.strip() + "\n\n" + extracted
459
+ else:
460
+ new_text = extracted
461
+ return new_text, f"✓ Extracted text from **{basename}**"
462
+
463
+ upload_btn.upload(
464
+ fn=handle_upload,
465
+ inputs=[upload_btn, ctx_in],
466
+ outputs=[ctx_in, upload_status],
467
+ )
468
+
469
+ # ── Step 3: Response ──
470
+ gr.Markdown('<p class="step-label">3. What did the AI answer?</p>')
471
+ r_in = gr.Textbox(
472
+ show_label=False,
473
+ placeholder="Paste the AI's response here...",
474
+ lines=4,
475
+ )
476
+
477
+ # ── Evaluate button ──
478
+ run_btn = gr.Button(
479
+ "Check for hallucination",
480
+ variant="primary",
481
+ size="lg",
482
+ )
483
+
484
+ # ── Results ──
485
+ with gr.Row(equal_height=True):
486
+ gl_out = gr.Markdown()
487
+ hhem_out = gr.Markdown()
488
+
489
+ agreement_out = gr.Markdown()
490
+
491
+ # ── Examples ──
492
+ gr.Markdown("---")
493
+ gr.Markdown("### Try an example")
494
+
495
+ gr.Examples(
496
+ examples=EXAMPLES,
497
+ inputs=[q_in, ctx_in, r_in],
498
+ label="",
499
+ )
500
+
501
+ # ── Footer ──
502
+ gr.Markdown("""
503
+ ---
504
+
505
+ <p style="color:#475569; font-size:0.85rem; text-align:center;">
506
+ <strong>groundlens</strong> is open source (MIT). Built by
507
+ <a href="https://jmarin.info" style="color:#64748b !important;">Javier Marin</a>.
508
+ This demo runs the same library available via <code>pip install groundlens</code>.<br>
509
+ groundlens is verification triage, not a truth oracle. It tells you which answers
510
+ deserve trust and which need a closer look.
511
+ </p>
512
+ """)
513
+
514
+ # ── Event binding ──
515
+ run_btn.click(
516
+ fn=run_comparison,
517
+ inputs=[q_in, ctx_in, r_in],
518
+ outputs=[gl_out, hhem_out, agreement_out],
519
+ )
520
+
521
+
522
+ if __name__ == "__main__":
523
+ demo.launch()