Abrar55 commited on
Commit
c9aa164
·
verified ·
1 Parent(s): d31ccde

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -1821
app.py DELETED
@@ -1,1821 +0,0 @@
1
- """
2
- CHEX - Document Intelligence
3
- HuggingFace Spaces Gradio Demo — fully self-contained (no relative imports)
4
-
5
- Tab 1: Analyze Contract — paste a contract, ask a question, get a structured answer
6
- Tab 2: Benchmark Demo — side-by-side table showing base model hallucinations vs CHEX
7
- Tab 3: Analyse Bank Statement — paste / upload a bank statement, get a summary + Q&A
8
- """
9
-
10
- from __future__ import annotations
11
-
12
- import csv
13
- import datetime as _dt
14
- import importlib.util
15
- import io
16
- import json
17
- import os
18
- import re
19
- import tempfile
20
- from enum import Enum
21
- from pathlib import Path
22
- from typing import Optional
23
-
24
- import gradio as gr
25
- from pydantic import BaseModel
26
-
27
- # ---------------------------------------------------------------------------
28
- # Schema (inlined from data/schema.py)
29
- # ---------------------------------------------------------------------------
30
-
31
- class Label(str, Enum):
32
- GROUNDED = "GROUNDED"
33
- ABSENT = "ABSENT"
34
- CONTRADICTS_PRIOR = "CONTRADICTS_PRIOR"
35
-
36
-
37
- class ModelOutput(BaseModel):
38
- question: str
39
- label: Label
40
- answer: Optional[str] = None
41
- citation: Optional[str] = None
42
- reasoning: str
43
-
44
-
45
- class BankStatementSummary(BaseModel):
46
- total_credits: Optional[str] = None
47
- total_debits: Optional[str] = None
48
- largest_transaction: Optional[str] = None
49
- recurring_payments: Optional[list[str]] = None
50
- flags: Optional[list[str]] = None
51
- raw_reasoning: str
52
-
53
-
54
- # ---------------------------------------------------------------------------
55
- # Prompt templates (inlined from training/prompt_template.py)
56
- # ---------------------------------------------------------------------------
57
-
58
- SYSTEM_PROMPT = """\
59
- You are a contract analysis assistant specializing in detecting hallucinations \
60
- and calibrated uncertainty. Given a contract text and a question about a specific \
61
- clause, output a single JSON object with exactly these fields:
62
-
63
- question : the question asked (copy verbatim)
64
- label : one of GROUNDED, ABSENT, or CONTRADICTS_PRIOR
65
- - GROUNDED : the information exists verbatim in the contract
66
- - ABSENT : the contract does not contain this clause at all
67
- - CONTRADICTS_PRIOR: the contract contains a clause but it deviates \
68
- from standard legal terms (e.g., inverted obligations, non-standard timeframes)
69
- answer : the answer text if GROUNDED or CONTRADICTS_PRIOR, null if ABSENT
70
- citation : the exact verbatim span from the contract that supports the answer, \
71
- null if ABSENT
72
- reasoning : one sentence explaining your classification
73
-
74
- Output ONLY the JSON object. No preamble, no markdown fences, no text outside the JSON.
75
-
76
- ### Example 1 — GROUNDED
77
-
78
- [CONTRACT]
79
- This Software License Agreement ("Agreement") is entered into as of January 1, 2024, \
80
- between TechVision Inc. ("Licensor") and GlobalCorp Ltd. ("Licensee"). The Agreement \
81
- shall remain in effect for a period of two (2) years from the Effective Date, unless \
82
- earlier terminated pursuant to Section 8. Licensor grants Licensee a non-exclusive, \
83
- non-transferable license to use the Software solely for Licensee's internal business \
84
- purposes.
85
- [/CONTRACT]
86
-
87
- Question: What is the duration of this agreement?
88
-
89
- {"question": "What is the duration of this agreement?", "label": "GROUNDED", \
90
- "answer": "Two years from the Effective Date", \
91
- "citation": "remain in effect for a period of two (2) years from the Effective Date", \
92
- "reasoning": "The contract explicitly specifies a two-year term starting from the Effective Date."}
93
-
94
- ### Example 2 — ABSENT
95
-
96
- [CONTRACT]
97
- The Licensee shall pay a monthly fee of five hundred dollars ($500.00). Payment is due \
98
- on the first business day of each calendar month. Late payments shall accrue interest \
99
- at a rate of one and one-half percent (1.5%) per month. Licensee shall maintain \
100
- accurate records of all uses of the Software.
101
- [/CONTRACT]
102
-
103
- Question: Does this agreement include a limitation of liability clause?
104
-
105
- {"question": "Does this agreement include a limitation of liability clause?", \
106
- "label": "ABSENT", "answer": null, "citation": null, \
107
- "reasoning": "No limitation of liability clause appears anywhere in the provided contract text."}
108
-
109
- ### Example 3 — CONTRADICTS_PRIOR
110
-
111
- [CONTRACT]
112
- This Non-Disclosure Agreement is made between AlphaTech Solutions ("Discloser") and \
113
- Beta Dynamics Corp. ("Recipient"). The Recipient shall not disclose Confidential \
114
- Information to any third party. NON-COMPETE: The Recipient shall engage in any \
115
- business activity that competes with the Discloser's primary operations during the \
116
- term and for a period of 24 months thereafter. The Recipient shall not take any \
117
- steps to protect Discloser's trade secrets.
118
- [/CONTRACT]
119
-
120
- Question: Does this agreement restrict the Recipient from competing with the Discloser?
121
-
122
- {"question": "Does this agreement restrict the Recipient from competing with the Discloser?", \
123
- "label": "CONTRADICTS_PRIOR", \
124
- "answer": "The non-compete clause has inverted obligations — it permits competition rather than prohibiting it", \
125
- "citation": "The Recipient shall engage in any business activity that competes with the Discloser's primary operations", \
126
- "reasoning": "The clause uses 'shall engage' instead of 'shall not engage', inverting the standard non-compete obligation."}
127
- """
128
-
129
- BANK_SYSTEM_PROMPT = """\
130
- You are a financial analysis assistant specialising in bank statement review. \
131
- Given a bank statement (plain text, CSV/Excel-derived, OFX/QFX-derived, or PDF-extracted) and either a \
132
- summary request or a specific question, produce a single JSON object.
133
-
134
- For SUMMARY mode (question is "SUMMARISE"):
135
- Output a JSON object with exactly these fields:
136
- total_credits : total money received (e.g. "£3,420.50") or null
137
- total_debits : total money spent (e.g. "£2,105.30") or null
138
- largest_transaction: description + amount of the single largest transaction or null
139
- recurring_payments : list of detected recurring charges (e.g. ["Netflix £9.99", "Gym £35.00"]) or []
140
- flags : list of unusual or suspicious items (e.g. ["Large cash withdrawal £800"]) or []
141
- raw_reasoning : one sentence summarising your analysis
142
-
143
- For Q&A mode (any other question), output a JSON object with exactly these fields:
144
- question : the question asked (copy verbatim)
145
- label : one of GROUNDED, ABSENT, or CONTRADICTS_PRIOR
146
- answer : the answer text if GROUNDED or CONTRADICTS_PRIOR, null if ABSENT
147
- citation : the exact verbatim span from the statement, null if ABSENT
148
- reasoning : one sentence explaining your classification
149
-
150
- Output ONLY the JSON object. No preamble, no markdown fences, no text outside the JSON.
151
- """
152
-
153
- STRICT_SUFFIX = (
154
- "\n\nIMPORTANT: You must output ONLY a valid JSON object. "
155
- "Do not include any text before or after the JSON."
156
- )
157
-
158
-
159
- def _build_contract_messages(contract_text: str, question: str) -> list[dict]:
160
- return [
161
- {"role": "system", "content": SYSTEM_PROMPT},
162
- {"role": "user", "content": f"[CONTRACT]\n{contract_text}\n[/CONTRACT]\n\nQuestion: {question}"},
163
- ]
164
-
165
-
166
- def _build_bank_messages(statement_text: str, question: str) -> list[dict]:
167
- return [
168
- {"role": "system", "content": BANK_SYSTEM_PROMPT},
169
- {"role": "user", "content": f"[STATEMENT]\n{statement_text}\n[/STATEMENT]\n\nQuestion: {question}"},
170
- ]
171
-
172
-
173
- # ---------------------------------------------------------------------------
174
- # JSON parsing helpers
175
- # ---------------------------------------------------------------------------
176
-
177
- def _extract_json_str(raw_text: str) -> str:
178
- match = re.search(r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)?\}", raw_text, re.DOTALL)
179
- if not match:
180
- match = re.search(r"\{.*\}", raw_text, re.DOTALL)
181
- if not match:
182
- raise ValueError(f"No JSON object found in model output: {raw_text[:300]!r}")
183
- return match.group()
184
-
185
-
186
- def _parse_model_output(raw_text: str, question: str) -> ModelOutput:
187
- json_str = _extract_json_str(raw_text)
188
- return ModelOutput.model_validate_json(json_str)
189
-
190
-
191
- def _parse_summary(raw_text: str) -> BankStatementSummary:
192
- data = json.loads(_extract_json_str(raw_text))
193
- return BankStatementSummary(
194
- total_credits=data.get("total_credits"),
195
- total_debits=data.get("total_debits"),
196
- largest_transaction=data.get("largest_transaction"),
197
- recurring_payments=data.get("recurring_payments") or [],
198
- flags=data.get("flags") or [],
199
- raw_reasoning=data.get("raw_reasoning", ""),
200
- )
201
-
202
-
203
- # ---------------------------------------------------------------------------
204
- # Model loading
205
- # ---------------------------------------------------------------------------
206
-
207
- MLX_SERVER_URL = os.environ.get("MLX_SERVER_URL", "").rstrip("/")
208
- SAMPLE_DIR = Path(__file__).parent / "sample_contracts"
209
- STATEMENT_DIR = Path(__file__).parent / "sample_statements"
210
-
211
- model_load_error: Optional[str] = None
212
-
213
- if not MLX_SERVER_URL:
214
- model_load_error = "MLX_SERVER_URL not set. Set it in Space secrets to your Mac's ngrok URL."
215
- print(f"WARNING: {model_load_error}")
216
- else:
217
- print(f"MLX server configured at: {MLX_SERVER_URL}")
218
-
219
-
220
- # ---------------------------------------------------------------------------
221
- # Inference helpers
222
- # ---------------------------------------------------------------------------
223
-
224
- MAX_CHARS = 32000 # rough character limit (~8k tokens) to keep requests fast
225
-
226
-
227
- def _truncate(text: str) -> str:
228
- if len(text) > MAX_CHARS:
229
- print(f"WARNING: Text truncated from {len(text)} to {MAX_CHARS} chars.")
230
- return text[:MAX_CHARS]
231
- return text
232
-
233
-
234
- def _apply_messages(messages: list[dict], strict: bool = False) -> list[dict]:
235
- if strict:
236
- messages = list(messages)
237
- messages[-1] = dict(messages[-1])
238
- messages[-1]["content"] += STRICT_SUFFIX
239
- return messages
240
-
241
-
242
- def _run_inference(messages: list[dict]) -> str:
243
- import urllib.request
244
- payload = json.dumps({
245
- "messages": messages,
246
- "max_tokens": 512,
247
- "temperature": 0.0,
248
- }).encode()
249
- req = urllib.request.Request(
250
- f"{MLX_SERVER_URL}/v1/chat/completions",
251
- data=payload,
252
- headers={"Content-Type": "application/json"},
253
- method="POST",
254
- )
255
- with urllib.request.urlopen(req, timeout=120) as resp:
256
- data = json.loads(resp.read())
257
- return data["choices"][0]["message"]["content"]
258
-
259
-
260
- # ---------------------------------------------------------------------------
261
- # Sample contract content
262
- # ---------------------------------------------------------------------------
263
-
264
- def _read_sample(filename: str) -> str:
265
- p = SAMPLE_DIR / filename
266
- if p.exists():
267
- return p.read_text(encoding="utf-8")
268
- return f"[Sample contract '{filename}' not found. Place it in demo/sample_contracts/]"
269
-
270
-
271
- SOFTWARE_LICENSE = _read_sample("software_license.txt")
272
- NDA = _read_sample("nda.txt")
273
- SERVICE_AGREEMENT = _read_sample("service_agreement.txt")
274
-
275
- SAMPLE_QUESTIONS = {
276
- "software_license.txt": "What is the limitation of liability in this agreement?",
277
- "nda.txt": "Does this agreement include a non-compete clause?",
278
- "service_agreement.txt": "Does this contract include a termination for convenience clause?",
279
- }
280
-
281
-
282
- def _read_sample_statement(filename: str) -> str:
283
- p = STATEMENT_DIR / filename
284
- if p.exists():
285
- return p.read_text(encoding="utf-8")
286
- return f"[Sample statement '{filename}' not found. Place it in demo/sample_statements/]"
287
-
288
-
289
- SAMPLE_STATEMENT = _read_sample_statement("sample_statement.txt")
290
-
291
-
292
- # ---------------------------------------------------------------------------
293
- # Label badge HTML
294
- # ---------------------------------------------------------------------------
295
-
296
- _BADGE_CFG = {
297
- "GROUNDED": ("#0f9d58", "rgba(34,197,94,0.10)", "rgba(34,197,94,0.28)", "✓"),
298
- "ABSENT": ("#d23131", "rgba(239,68,68,0.09)", "rgba(239,68,68,0.28)", "✗"),
299
- "CONTRADICTS_PRIOR": ("#b87800", "rgba(245,158,11,0.10)", "rgba(245,158,11,0.30)", "⚠"),
300
- "N/A": ("#8a91a3", "rgba(139,145,163,0.10)","rgba(139,145,163,0.25)","—"),
301
- "ERROR": ("#991b1b", "rgba(220,38,38,0.10)", "rgba(220,38,38,0.32)", "!"),
302
- }
303
-
304
-
305
- def format_label_html(label: str) -> str:
306
- fg, bg, border, icon = _BADGE_CFG.get(label, _BADGE_CFG["N/A"])
307
- display = "CONTRADICTS PRIOR" if label == "CONTRADICTS_PRIOR" else label
308
- return (
309
- f'<div style="display:inline-flex;align-items:center;gap:8px;'
310
- f'padding:11px 16px;border-radius:10px;border:1px solid {border};'
311
- f'background:{bg};color:{fg};font-family:\'Inter\',sans-serif;'
312
- f'font-size:12.5px;font-weight:600;letter-spacing:0.02em;'
313
- f'backdrop-filter:blur(10px);">'
314
- f'<span style="width:14px;height:14px;display:grid;place-items:center;'
315
- f'font-size:13px;">{icon}</span>'
316
- f'<span>{display}</span></div>'
317
- )
318
-
319
-
320
- # ---------------------------------------------------------------------------
321
- # Analysis handlers
322
- # ---------------------------------------------------------------------------
323
-
324
- def analyze_contract(contract_text: str, question: str) -> tuple[str, str, str, str]:
325
- if not contract_text.strip():
326
- return format_label_html("N/A"), "", "", "Please paste a contract above."
327
- if not question.strip():
328
- return format_label_html("N/A"), "", "", "Please enter a question."
329
- if not MLX_SERVER_URL:
330
- return (
331
- format_label_html("N/A"),
332
- "Model not loaded",
333
- "",
334
- f"Model failed to load: {model_load_error}.",
335
- )
336
-
337
- contract_text = _truncate(contract_text)
338
- messages = _build_contract_messages(contract_text, question)
339
-
340
- for attempt in range(2):
341
- msgs = _apply_messages(messages, strict=(attempt == 1))
342
- try:
343
- raw = _run_inference(msgs)
344
- result = _parse_model_output(raw, question)
345
- label_html = format_label_html(result.label.value)
346
- answer = result.answer or "(none — clause is absent or not applicable)"
347
- citation = result.citation or "(none)"
348
- return label_html, answer, citation, result.reasoning
349
- except Exception as e:
350
- if attempt == 0:
351
- print(f" Parse attempt 1 failed ({e}). Retrying with stricter prompt...")
352
- else:
353
- print(f" Parse attempt 2 failed ({e}). Returning safe fallback.")
354
-
355
- return (
356
- format_label_html("ABSENT"),
357
- "(none — clause is absent or not applicable)",
358
- "(none)",
359
- "Model output could not be parsed as valid JSON after two attempts.",
360
- )
361
-
362
-
363
- def _get_statement_text(
364
- paste_text: str,
365
- pdf_file,
366
- pdf_password: str | None,
367
- csv_file,
368
- txt_file,
369
- xlsx_file,
370
- ofx_file,
371
- ) -> tuple[str, str]:
372
- # Backwards-compatible shim: treat "single statement" inputs as one item.
373
- texts, errors = _get_statement_texts(
374
- paste_text,
375
- pdf_file,
376
- pdf_password,
377
- csv_file,
378
- txt_file,
379
- xlsx_file,
380
- ofx_file,
381
- )
382
- if not texts:
383
- return (
384
- "",
385
- errors[0]
386
- if errors
387
- else "Please paste a bank statement or upload a PDF / CSV / TXT / XLSX / OFX/QFX file."
388
- )
389
- return texts[0], ""
390
-
391
-
392
- def _ensure_file_list(files) -> list:
393
- if files is None:
394
- return []
395
- if isinstance(files, (list, tuple)):
396
- return [f for f in files if f is not None]
397
- return [files]
398
-
399
-
400
- def _split_statements(paste_text: str) -> list[str]:
401
- """
402
- Split pasted content into multiple statements.
403
-
404
- Delimiter: a line containing only `---` (3+ dashes), optionally surrounded by whitespace.
405
- """
406
- text = (paste_text or "").strip()
407
- if not text:
408
- return []
409
- parts = re.split(r"(?m)^[ \t]*-{3,}[ \t]*$", text)
410
- cleaned = [p.strip() for p in parts if p.strip()]
411
- return cleaned if cleaned else [text]
412
-
413
-
414
- def _get_statement_texts(
415
- paste_text: str,
416
- pdf_files,
417
- pdf_password: str | None,
418
- csv_files,
419
- txt_files,
420
- xlsx_files,
421
- ofx_files,
422
- ) -> tuple[list[str], list[str]]:
423
- """
424
- Extract statement text blocks from:
425
- - pasted text (can contain multiple statements separated by `---`)
426
- - uploaded PDFs (supports multiple)
427
- - uploaded CSVs (supports multiple)
428
- - uploaded TXT files (supports multiple)
429
- - uploaded Excel (.xlsx) (supports multiple)
430
- - uploaded OFX/QFX files (supports multiple)
431
- """
432
- statement_texts: list[str] = []
433
- errors: list[str] = []
434
-
435
- pdf_list = _ensure_file_list(pdf_files)
436
- csv_list = _ensure_file_list(csv_files)
437
- txt_list = _ensure_file_list(txt_files)
438
- xlsx_list = _ensure_file_list(xlsx_files)
439
- ofx_list = _ensure_file_list(ofx_files)
440
-
441
- # PDFs
442
- if pdf_list:
443
- try:
444
- if importlib.util.find_spec("pdfplumber") is None:
445
- errors.append("pdfplumber not installed.")
446
- else:
447
- import pdfplumber
448
- password = (pdf_password or "").strip()
449
- for idx, pdf_file in enumerate(pdf_list):
450
- try:
451
- text_parts: list[str] = []
452
- try:
453
- with pdfplumber.open(
454
- str(pdf_file),
455
- password=password if password else "",
456
- ) as pdf:
457
- for page in pdf.pages:
458
- t = page.extract_text()
459
- if t:
460
- text_parts.append(t)
461
- except TypeError:
462
- # Older pdfplumber versions may not accept `password=...`
463
- with pdfplumber.open(str(pdf_file)) as pdf:
464
- for page in pdf.pages:
465
- t = page.extract_text()
466
- if t:
467
- text_parts.append(t)
468
- text = "\n".join(text_parts).strip()
469
- if not text:
470
- errors.append(f"PDF #{idx+1} uploaded but no text could be extracted.")
471
- else:
472
- statement_texts.append(text)
473
- except Exception as e:
474
- msg = str(e).lower()
475
- if "password" in msg or "encrypted" in msg or "decrypt" in msg:
476
- errors.append(
477
- f"PDF #{idx+1} is password-protected. Please enter the correct password."
478
- )
479
- else:
480
- errors.append(f"PDF #{idx+1} extraction error: {e}")
481
- except Exception as e:
482
- errors.append(f"PDF extraction error: {e}")
483
-
484
- # CSVs
485
- if csv_list:
486
- try:
487
- import pandas as pd
488
- except Exception:
489
- if importlib.util.find_spec("pandas") is None:
490
- errors.append("pandas not installed.")
491
- else:
492
- errors.append("CSV parsing error: pandas import failed.")
493
- else:
494
- for idx, csv_file in enumerate(csv_list):
495
- try:
496
- df = pd.read_csv(str(csv_file))
497
- df.columns = [c.strip().lower() for c in df.columns]
498
- lines: list[str] = []
499
- for _, row in df.iterrows():
500
- parts = [
501
- str(v).strip()
502
- for v in row.values
503
- if str(v).strip() not in ("", "nan")
504
- ]
505
- lines.append(", ".join(parts))
506
- statement_texts.append(
507
- ", ".join(df.columns.tolist()) + "\n" + "\n".join(lines)
508
- )
509
- except Exception as e:
510
- errors.append(f"CSV #{idx+1} parsing error: {e}")
511
-
512
- # TXT
513
- if txt_list:
514
- for idx, txt_file in enumerate(txt_list):
515
- try:
516
- # Read best-effort encoding; then reuse the same delimiter splitting
517
- # strategy as pasted input.
518
- p = Path(str(txt_file))
519
- content = p.read_text(encoding="utf-8", errors="replace")
520
- parts = _split_statements(content)
521
- if not parts:
522
- errors.append(f"TXT #{idx+1} uploaded but no text could be read.")
523
- else:
524
- statement_texts.extend(parts)
525
- except Exception as e:
526
- errors.append(f"TXT #{idx+1} parsing error: {e}")
527
-
528
- # XLSX (Excel)
529
- if xlsx_list:
530
- try:
531
- import pandas as pd
532
- except Exception:
533
- if importlib.util.find_spec("pandas") is None:
534
- errors.append("pandas not installed.")
535
- else:
536
- errors.append("Excel parsing error: pandas import failed.")
537
- else:
538
- for idx, xlsx_file in enumerate(xlsx_list):
539
- try:
540
- df = pd.read_excel(str(xlsx_file), sheet_name=0)
541
- if df is None or df.empty:
542
- errors.append(f"XLSX #{idx+1} uploaded but no rows were found.")
543
- continue
544
- df.columns = [str(c).strip().lower() for c in df.columns]
545
- lines: list[str] = []
546
- for _, row in df.iterrows():
547
- parts = [
548
- str(v).strip()
549
- for v in row.values
550
- if str(v).strip() not in ("", "nan", "NaN")
551
- ]
552
- lines.append(", ".join(parts))
553
- statement_texts.append(
554
- ", ".join(df.columns.tolist()) + "\n" + "\n".join(lines)
555
- )
556
- except Exception as e:
557
- errors.append(f"XLSX #{idx+1} parsing error: {e}")
558
-
559
- # OFX/QFX (lightweight tag extraction)
560
- if ofx_list:
561
- def _format_ofx_date(d: str) -> str:
562
- d = (d or "").strip()
563
- if len(d) == 8 and d.isdigit():
564
- return f"{d[:4]}-{d[4:6]}-{d[6:]}"
565
- return d
566
-
567
- for idx, ofx_file in enumerate(ofx_list):
568
- try:
569
- p = Path(str(ofx_file))
570
- raw = p.read_bytes()
571
- try:
572
- content = raw.decode("utf-8")
573
- except UnicodeDecodeError:
574
- content = raw.decode("utf-8", errors="replace")
575
-
576
- blocks = re.findall(
577
- r"<STMTTRN>(.*?)</STMTTRN>",
578
- content,
579
- flags=re.IGNORECASE | re.DOTALL,
580
- )
581
-
582
- def _get_tag(block: str, tag: str) -> str:
583
- m = re.search(rf"<{tag}>([^<]*)", block, flags=re.IGNORECASE)
584
- return (m.group(1) if m else "").strip()
585
-
586
- lines: list[str] = []
587
- for b in blocks:
588
- dt = _get_tag(b, "DTPOSTED") or _get_tag(b, "DTTRAN")
589
- name = _get_tag(b, "NAME") or _get_tag(b, "PAYEE")
590
- memo = _get_tag(b, "MEMO") or _get_tag(b, "TRNTYPE")
591
- amt = _get_tag(b, "TRNAMT") or _get_tag(b, "AMOUNT")
592
-
593
- if not any([dt, name, memo, amt]):
594
- continue
595
-
596
- dt = _format_ofx_date(dt)
597
- desc_parts = [p for p in [name, memo] if p]
598
- desc = " - ".join(desc_parts) if desc_parts else "Transaction"
599
- lines.append(f"{dt}, {desc}, {amt}".strip(", "))
600
-
601
- if lines:
602
- statement_texts.append("Date, Description, Amount\n" + "\n".join(lines))
603
- else:
604
- # Fall back to returning the raw content (truncated).
605
- statement_texts.append(content.strip()[:20000])
606
- except Exception as e:
607
- errors.append(f"OFX/QFX #{idx+1} parsing error: {e}")
608
-
609
- # Paste text (may contain multiple statements)
610
- pasted_parts = _split_statements(paste_text)
611
- if pasted_parts:
612
- statement_texts.extend(pasted_parts)
613
-
614
- if not statement_texts:
615
- errors.append(
616
- "Please paste a bank statement or upload a PDF / CSV / TXT / XLSX / OFX/QFX file(s)."
617
- )
618
-
619
- return statement_texts, errors
620
-
621
-
622
- def analyse_bank_statement(
623
- paste_text: str,
624
- pdf_file,
625
- pdf_password: str | None,
626
- csv_file,
627
- txt_file,
628
- xlsx_file,
629
- ofx_file,
630
- ) -> tuple[str, str, str]:
631
- statement_texts, errors = _get_statement_texts(
632
- paste_text,
633
- pdf_file,
634
- pdf_password,
635
- csv_file,
636
- txt_file,
637
- xlsx_file,
638
- ofx_file,
639
- )
640
- if not statement_texts:
641
- return f"**Error:** {errors[0] if errors else 'No bank statement provided.'}", "", ""
642
-
643
- MAX_STATEMENTS = 6
644
- if len(statement_texts) > MAX_STATEMENTS:
645
- errors.append(f"Too many statements provided; only the first {MAX_STATEMENTS} were used.")
646
- statement_texts = statement_texts[:MAX_STATEMENTS]
647
-
648
- combined_text = "\n\n".join(
649
- f"===== Statement {i+1}/{len(statement_texts)} =====\n\n{st.strip()}"
650
- for i, st in enumerate(statement_texts)
651
- if st.strip()
652
- ).strip()
653
-
654
- if not MLX_SERVER_URL:
655
- return (
656
- f"**Inference client not initialised.** Error: {model_load_error}",
657
- combined_text,
658
- "",
659
- )
660
-
661
- summaries: list[BankStatementSummary] = []
662
- for idx, statement_text in enumerate(statement_texts):
663
- statement_text = _truncate(statement_text)
664
- messages = _build_bank_messages(statement_text, "SUMMARISE")
665
-
666
- summary: BankStatementSummary | None = None
667
- for attempt in range(2):
668
- msgs = _apply_messages(messages, strict=(attempt == 1))
669
- try:
670
- raw = _run_inference(msgs)
671
- summary = _parse_summary(raw)
672
- break
673
- except Exception as e:
674
- if attempt == 0:
675
- print(f" Summary parse attempt 1 failed (statement {idx+1}, {e}). Retrying...")
676
- else:
677
- print(f" Summary parse attempt 2 failed (statement {idx+1}, {e}). Returning error.")
678
-
679
- if summary is None:
680
- summary = BankStatementSummary(
681
- raw_reasoning=f"Could not parse model output for statement {idx+1}."
682
- )
683
- summaries.append(summary)
684
-
685
- # Render markdown
686
- lines: list[str] = []
687
- lines.append("## Statements Summary")
688
- lines.append("")
689
- if errors:
690
- lines.append("**Notes:**")
691
- for e in errors:
692
- lines.append(f"- {e}")
693
- lines.append("")
694
-
695
- for idx, summary in enumerate(summaries):
696
- lines.append(f"### Statement {idx+1}")
697
- lines.append(f"**Total Credits:** {summary.total_credits or 'N/A'}")
698
- lines.append(f"**Total Debits:** {summary.total_debits or 'N/A'}")
699
- lines.append(
700
- f"**Largest Transaction:** {summary.largest_transaction or 'N/A'}"
701
- )
702
- if summary.recurring_payments:
703
- lines.append("\n**Recurring Payments:**")
704
- for p in summary.recurring_payments:
705
- lines.append(f"- {p}")
706
- if summary.flags:
707
- lines.append("\n**Flags / Unusual Activity:**")
708
- for f in summary.flags:
709
- lines.append(f"- {f}")
710
- lines.append(f"\n*{summary.raw_reasoning}*")
711
- lines.append("")
712
-
713
- # Overall union (useful across multiple statements)
714
- overall_recurring: list[str] = []
715
- overall_flags: list[str] = []
716
- for s in summaries:
717
- for r in (s.recurring_payments or []):
718
- if r not in overall_recurring:
719
- overall_recurring.append(r)
720
- for f in (s.flags or []):
721
- if f not in overall_flags:
722
- overall_flags.append(f)
723
-
724
- lines.append("## Overall (union across statements)")
725
- if overall_recurring:
726
- lines.append("\n**Recurring Payments (union):**")
727
- for p in overall_recurring:
728
- lines.append(f"- {p}")
729
- else:
730
- lines.append("\n**Recurring Payments (union):** N/A")
731
-
732
- if overall_flags:
733
- lines.append("\n**Flags / Unusual Activity (union):**")
734
- for f in overall_flags:
735
- lines.append(f"- {f}")
736
- else:
737
- lines.append("\n**Flags / Unusual Activity (union):** N/A")
738
-
739
- summary_json = json.dumps([s.model_dump() for s in summaries], ensure_ascii=False)
740
- return "\n".join(lines).strip(), combined_text, summary_json
741
-
742
-
743
- def _safe_json_loads(s: str) -> object:
744
- try:
745
- obj = json.loads(s or "")
746
- if isinstance(obj, (dict, list)):
747
- return obj
748
- return {}
749
- except Exception:
750
- return {}
751
-
752
-
753
- def _escape_pdf_text(s: str) -> str:
754
- # PDF literal strings escape backslash and parentheses.
755
- return (s or "").replace("\\", "\\\\").replace("(", "\\(").replace(")", "\\)")
756
-
757
-
758
- def _simple_pdf_bytes(title: str, lines: list[str]) -> bytes:
759
- """
760
- Tiny, dependency-free, single-page PDF generator for short text reports.
761
- """
762
- font = "Helvetica"
763
- font_size = 11
764
- left = 54
765
- top = 790
766
- leading = 14
767
-
768
- safe_title = _escape_pdf_text(title)
769
- safe_lines = [_escape_pdf_text(ln) for ln in lines]
770
-
771
- content_lines: list[str] = []
772
- content_lines.append("BT")
773
- content_lines.append(f"/F1 {font_size} Tf")
774
- content_lines.append(f"{left} {top} Td")
775
- content_lines.append(f"({_escape_pdf_text(safe_title)}) Tj")
776
- content_lines.append(f"0 -{leading*2} Td")
777
- for ln in safe_lines:
778
- content_lines.append(f"({ln}) Tj")
779
- content_lines.append(f"0 -{leading} Td")
780
- content_lines.append("ET")
781
- stream = "\n".join(content_lines).encode("latin-1", errors="replace")
782
-
783
- objects: list[bytes] = []
784
- objects.append(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
785
- objects.append(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n")
786
- objects.append(
787
- b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] "
788
- b"/Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>\nendobj\n"
789
- )
790
- objects.append(f"4 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /{font} >>\nendobj\n".encode())
791
- objects.append(
792
- b"5 0 obj\n<< /Length " + str(len(stream)).encode() + b" >>\nstream\n" + stream + b"\nendstream\nendobj\n"
793
- )
794
-
795
- out = io.BytesIO()
796
- out.write(b"%PDF-1.4\n%\xe2\xe3\xcf\xd3\n")
797
- xref: list[int] = [0]
798
- for obj in objects:
799
- xref.append(out.tell())
800
- out.write(obj)
801
- xref_start = out.tell()
802
- out.write(f"xref\n0 {len(xref)}\n".encode())
803
- out.write(b"0000000000 65535 f \n")
804
- for off in xref[1:]:
805
- out.write(f"{off:010d} 00000 n \n".encode())
806
- out.write(
807
- b"trailer\n<< /Size "
808
- + str(len(xref)).encode()
809
- + b" /Root 1 0 R >>\nstartxref\n"
810
- + str(xref_start).encode()
811
- + b"\n%%EOF\n"
812
- )
813
- return out.getvalue()
814
-
815
-
816
- def export_bank_summary_csv(summary_json: str) -> tuple[str | None, str]:
817
- data = _safe_json_loads(summary_json)
818
- if not data:
819
- return None, "**Export error:** Run 'Analyse statement' first."
820
-
821
- statements = data if isinstance(data, list) else [data]
822
-
823
- filename = f"bank-statement-summaries_{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
824
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="chex_", mode="w", newline="", encoding="utf-8")
825
- try:
826
- writer = csv.writer(tmp)
827
- writer.writerow([
828
- "statement_index",
829
- "total_credits",
830
- "total_debits",
831
- "largest_transaction",
832
- "recurring_payments",
833
- "flags",
834
- "raw_reasoning",
835
- ])
836
-
837
- overall_recurring: list[str] = []
838
- overall_flags: list[str] = []
839
- for s in statements:
840
- if not isinstance(s, dict):
841
- continue
842
- for r in (s.get("recurring_payments") or []):
843
- if r not in overall_recurring:
844
- overall_recurring.append(r)
845
- for f in (s.get("flags") or []):
846
- if f not in overall_flags:
847
- overall_flags.append(f)
848
-
849
- for i, s in enumerate(statements, start=1):
850
- if not isinstance(s, dict):
851
- continue
852
- writer.writerow([
853
- i,
854
- s.get("total_credits") or "",
855
- s.get("total_debits") or "",
856
- s.get("largest_transaction") or "",
857
- " | ".join(s.get("recurring_payments") or []),
858
- " | ".join(s.get("flags") or []),
859
- s.get("raw_reasoning") or "",
860
- ])
861
-
862
- # Overall union row
863
- writer.writerow([
864
- "overall",
865
- "",
866
- "",
867
- "",
868
- " | ".join(overall_recurring),
869
- " | ".join(overall_flags),
870
- "",
871
- ])
872
- finally:
873
- tmp.close()
874
-
875
- # Gradio uses the path; name displayed is fine.
876
- return tmp.name, f"**CSV ready:** `{filename}`"
877
-
878
-
879
- def export_bank_summary_pdf(summary_json: str) -> tuple[str | None, str]:
880
- data = _safe_json_loads(summary_json)
881
- if not data:
882
- return None, "**Export error:** Run 'Analyse statement' first."
883
-
884
- statements = data if isinstance(data, list) else [data]
885
-
886
- title = "CHEX — Bank Statement Summary (Multiple)"
887
- lines: list[str] = [
888
- f"Generated: {_dt.datetime.now().isoformat(timespec='seconds')}",
889
- "",
890
- f"Statements analysed: {len(statements)}",
891
- "",
892
- ]
893
-
894
- overall_recurring: list[str] = []
895
- overall_flags: list[str] = []
896
- for s in statements:
897
- if not isinstance(s, dict):
898
- continue
899
- for r in (s.get("recurring_payments") or []):
900
- if r not in overall_recurring:
901
- overall_recurring.append(r)
902
- for f in (s.get("flags") or []):
903
- if f not in overall_flags:
904
- overall_flags.append(f)
905
-
906
- lines += [
907
- "Overall Recurring Payments:",
908
- *([f"- {x}" for x in overall_recurring] if overall_recurring else ["- (none)"]),
909
- "",
910
- "Overall Flags / Unusual Activity:",
911
- *([f"- {x}" for x in overall_flags] if overall_flags else ["- (none)"]),
912
- "",
913
- ]
914
-
915
- for i, s in enumerate(statements, start=1):
916
- if not isinstance(s, dict):
917
- continue
918
- lines += [
919
- f"Statement {i}:",
920
- f"- Total Credits: {s.get('total_credits') or 'N/A'}",
921
- f"- Total Debits: {s.get('total_debits') or 'N/A'}",
922
- f"- Largest Transaction: {s.get('largest_transaction') or 'N/A'}",
923
- ]
924
- rr = (s.get("raw_reasoning") or "").strip()
925
- if rr:
926
- lines += ["- Model reasoning: " + rr]
927
- lines.append("")
928
-
929
- pdf_bytes = _simple_pdf_bytes(title, lines)
930
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf", prefix="chex_", mode="wb")
931
- try:
932
- tmp.write(pdf_bytes)
933
- finally:
934
- tmp.close()
935
-
936
- filename = f"bank-statement-summaries_{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
937
- return tmp.name, f"**PDF ready:** `{filename}`"
938
-
939
-
940
- def bank_qa(statement_text: str, question: str) -> tuple[str, str, str, str]:
941
- if not statement_text.strip():
942
- return (
943
- format_label_html("N/A"), "", "",
944
- "Please run 'Analyse Statement' first to load the statement.",
945
- )
946
- if not question.strip():
947
- return format_label_html("N/A"), "", "", "Please enter a question."
948
- if not MLX_SERVER_URL:
949
- return (
950
- format_label_html("N/A"), "Inference client not initialised", "",
951
- f"Error: {model_load_error}.",
952
- )
953
-
954
- statement_text = _truncate(statement_text)
955
- messages = _build_bank_messages(statement_text, question)
956
-
957
- for attempt in range(2):
958
- msgs = _apply_messages(messages, strict=(attempt == 1))
959
- try:
960
- raw = _run_inference(msgs)
961
- result = _parse_model_output(raw, question)
962
- label_html = format_label_html(result.label.value)
963
- answer = result.answer or "(none — information not found in statement)"
964
- citation = result.citation or "(none)"
965
- return label_html, answer, citation, result.reasoning
966
- except Exception as e:
967
- if attempt == 0:
968
- print(f" Q&A parse attempt 1 failed ({e}). Retrying...")
969
- else:
970
- print(f" Q&A parse attempt 2 failed ({e}). Returning fallback.")
971
-
972
- return (
973
- format_label_html("ABSENT"),
974
- "(none — information not found in statement)",
975
- "(none)",
976
- "Model output could not be parsed after two attempts.",
977
- )
978
-
979
-
980
- # ---------------------------------------------------------------------------
981
- # Benchmark table
982
- # ---------------------------------------------------------------------------
983
-
984
- import pandas as pd
985
-
986
- BENCHMARK_ROWS = [
987
- {
988
- "Question": "What is the limitation of liability?",
989
- "Ground Truth": "GROUNDED — $50,000 cap",
990
- "Base Model (untuned)": "GROUNDED — $100,000 cap (wrong amount)",
991
- "CHEX Fine-tuned": "GROUNDED — $50,000 cap ✓",
992
- "Hallucinated?": "No (wrong value)",
993
- },
994
- {
995
- "Question": "Does this contract include a non-compete clause?",
996
- "Ground Truth": "ABSENT",
997
- "Base Model (untuned)": "🚨 GROUNDED — 'Licensee shall not engage in competing activities...' (fabricated)",
998
- "CHEX Fine-tuned": "ABSENT — null ✓",
999
- "Hallucinated?": "YES",
1000
- },
1001
- {
1002
- "Question": "What is the term of the NDA?",
1003
- "Ground Truth": "GROUNDED — 3 years",
1004
- "Base Model (untuned)": "GROUNDED — 2 years (wrong duration)",
1005
- "CHEX Fine-tuned": "GROUNDED — three (3) years ✓",
1006
- "Hallucinated?": "No (wrong value)",
1007
- },
1008
- {
1009
- "Question": "Is there a termination for convenience clause?",
1010
- "Ground Truth": "ABSENT",
1011
- "Base Model (untuned)": "🚨 GROUNDED — 'Either party may terminate at any time...' (fabricated)",
1012
- "CHEX Fine-tuned": "ABSENT — null ✓",
1013
- "Hallucinated?": "YES",
1014
- },
1015
- {
1016
- "Question": "What are the monthly payment terms?",
1017
- "Ground Truth": "GROUNDED — $5,000/month",
1018
- "Base Model (untuned)": "GROUNDED — $5,000/month ✓",
1019
- "CHEX Fine-tuned": "GROUNDED — $5,000/month ✓",
1020
- "Hallucinated?": "No",
1021
- },
1022
- ]
1023
-
1024
- BENCHMARK_DF = pd.DataFrame(BENCHMARK_ROWS)
1025
-
1026
- # ---------------------------------------------------------------------------
1027
- # Warning banner
1028
- # ---------------------------------------------------------------------------
1029
-
1030
- WARNING_HTML = ""
1031
- if model_load_error:
1032
- WARNING_HTML = (
1033
- '<div class="chex-banner">'
1034
- '<span class="chex-banner-icon">⚠</span>'
1035
- f'<div class="chex-banner-body"><strong>Model not loaded</strong> · '
1036
- f'{model_load_error} — set <code>HF_MODEL_REPO</code> in Space secrets.</div>'
1037
- '</div>'
1038
- )
1039
-
1040
- # ---------------------------------------------------------------------------
1041
- # CSS
1042
- # ---------------------------------------------------------------------------
1043
-
1044
- CHEX_CSS = """
1045
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap');
1046
-
1047
- *, *::before, *::after { box-sizing: border-box; }
1048
-
1049
- :root {
1050
- --bg-base: #0B0E14;
1051
- --bg-grad: linear-gradient(180deg, #0B0E14 0%, #06080C 100%);
1052
- --bg-elev: #131720;
1053
- --bg-elev-strong: #191E2B;
1054
- --bg-sunken: #0E121A;
1055
- --bg-input: rgba(0,0,0,0.2);
1056
- --border: rgba(255,255,255,0.06);
1057
- --border-strong: rgba(255,255,255,0.12);
1058
- --hairline: rgba(255,255,255,0.03);
1059
- --fg: #E2E8F0;
1060
- --fg-muted: #94A3B8;
1061
- --fg-subtle: #475569;
1062
- --green: #10B981;
1063
- --green-bg: rgba(16,185,129,0.10);
1064
- --green-border: rgba(16,185,129,0.25);
1065
- --red: #F43F5E;
1066
- --red-bg: rgba(244,63,94,0.10);
1067
- --red-border: rgba(244,63,94,0.25);
1068
- --amber: #F59E0B;
1069
- --amber-bg: rgba(245,158,11,0.10);
1070
- --amber-border: rgba(245,158,11,0.25);
1071
- --blur: 24px;
1072
- --blur-strong: 32px;
1073
- --shadow-md: 0 1px 0 rgba(255,255,255,0.03) inset,
1074
- 0 8px 24px rgba(0,0,0,0.4),
1075
- 0 1px 2px rgba(0,0,0,0.2);
1076
- --radius: 10px;
1077
- --radius-lg: 14px;
1078
- }
1079
-
1080
- body {
1081
- background: var(--bg-grad) !important;
1082
- background-attachment: fixed !important;
1083
- background-color: var(--bg-base) !important;
1084
- min-height: 100vh;
1085
- }
1086
-
1087
- .gradio-container {
1088
- font-family: 'Inter', system-ui, -apple-system, sans-serif !important;
1089
- font-size: 14px !important;
1090
- line-height: 1.55 !important;
1091
- color: var(--fg) !important;
1092
- background: transparent !important;
1093
- -webkit-font-smoothing: antialiased !important;
1094
- -moz-osx-font-smoothing: grayscale !important;
1095
- letter-spacing: -0.006em !important;
1096
- max-width: 1480px !important;
1097
- margin: 0 auto !important;
1098
- padding: 0 !important;
1099
- }
1100
-
1101
- footer, .footer, .built-with, #footer,
1102
- footer.svelte-1ax1toq, .svelte-1ax1toq.footer,
1103
- .gradio-container > .footer,
1104
- .share-button, .copy-all-button,
1105
- .gradio-container > .top-panel { display: none !important; }
1106
-
1107
- #root, .app, main {
1108
- background: transparent !important;
1109
- padding: 0 !important;
1110
- margin: 0 !important;
1111
- }
1112
-
1113
- .contain, .container {
1114
- padding: 0 !important;
1115
- gap: 0 !important;
1116
- max-width: 100% !important;
1117
- background: transparent !important;
1118
- }
1119
-
1120
- .block, .gr-block, .gr-box, .gr-group, .gradio-container .block {
1121
- background: transparent !important;
1122
- border: none !important;
1123
- box-shadow: none !important;
1124
- padding: 0 !important;
1125
- border-radius: 0 !important;
1126
- }
1127
-
1128
- .gap, .gr-row { gap: 20px !important; }
1129
-
1130
- .panel, .gr-panel, .gr-padded {
1131
- background: transparent !important;
1132
- border: none !important;
1133
- padding: 0 !important;
1134
- box-shadow: none !important;
1135
- }
1136
-
1137
- .tabs, .gr-tabs { background: transparent !important; border: none !important; }
1138
-
1139
- .tabitem, .gr-tabitem {
1140
- background: transparent !important;
1141
- border: none !important;
1142
- padding: 24px !important;
1143
- }
1144
-
1145
- [data-testid="textbox"], .gr-textbox {
1146
- background: transparent !important;
1147
- border: none !important;
1148
- box-shadow: none !important;
1149
- padding: 0 !important;
1150
- }
1151
-
1152
- label.block, .label-wrap {
1153
- background: transparent !important;
1154
- border: none !important;
1155
- padding: 0 !important;
1156
- gap: 6px !important;
1157
- display: flex !important;
1158
- flex-direction: column !important;
1159
- }
1160
-
1161
- .row, .gr-row { background: transparent !important; border: none !important; padding: 0 !important; }
1162
-
1163
- .form, .gr-form {
1164
- background: transparent !important;
1165
- border: none !important;
1166
- box-shadow: none !important;
1167
- padding: 0 !important;
1168
- gap: 14px !important;
1169
- }
1170
-
1171
- .chex-topbar {
1172
- display: flex;
1173
- align-items: center;
1174
- gap: 16px;
1175
- padding: 0 28px;
1176
- height: 60px;
1177
- position: sticky;
1178
- top: 0;
1179
- z-index: 100;
1180
- background: rgba(11, 14, 20, 0.75);
1181
- backdrop-filter: blur(var(--blur-strong)) saturate(160%);
1182
- -webkit-backdrop-filter: blur(var(--blur-strong)) saturate(160%);
1183
- border-bottom: 1px solid var(--hairline);
1184
- }
1185
-
1186
- .chex-logo {
1187
- width: 24px; height: 24px; border-radius: 6px;
1188
- background: #E2E8F0;
1189
- color: #0B0E14; display: grid; place-items: center;
1190
- font-family: 'JetBrains Mono', monospace; font-weight: 700; font-size: 11px;
1191
- letter-spacing: -0.05em;
1192
- box-shadow: 0 2px 10px rgba(0,0,0,0.5);
1193
- flex-shrink: 0;
1194
- }
1195
-
1196
- .chex-name { font-size: 15px; font-weight: 600; letter-spacing: -0.01em; color: var(--fg); font-family: 'Inter', sans-serif; }
1197
- .chex-tag { font-size: 12px; color: var(--fg-muted); font-weight: 400; padding-left: 12px; border-left: 1px solid var(--hairline); font-family: 'Inter', sans-serif; }
1198
-
1199
- .chex-pill {
1200
- display: inline-flex; align-items: center; gap: 8px;
1201
- padding: 5px 12px 5px 10px; border: 1px solid var(--border); border-radius: 999px;
1202
- font-size: 12px; color: var(--fg-muted); background: var(--bg-elev);
1203
- backdrop-filter: blur(12px); -webkit-backdrop-filter: blur(12px);
1204
- font-family: 'JetBrains Mono', monospace; white-space: nowrap;
1205
- }
1206
-
1207
- .chex-dot {
1208
- width: 6px; height: 6px; border-radius: 50%; background: var(--green);
1209
- box-shadow: 0 0 0 3px rgba(15,157,88,0.22); display: inline-block; flex-shrink: 0;
1210
- }
1211
-
1212
- .chex-banner {
1213
- display: flex; align-items: center; gap: 12px; padding: 11px 20px;
1214
- border-bottom: 1px solid var(--amber-border); background: var(--amber-bg);
1215
- backdrop-filter: blur(var(--blur)) saturate(160%); -webkit-backdrop-filter: blur(var(--blur)) saturate(160%);
1216
- color: var(--amber); font-size: 13px; font-family: 'Inter', sans-serif; font-weight: 500;
1217
- }
1218
- .chex-banner-icon { font-size: 14px; flex-shrink: 0; }
1219
- .chex-banner-body { color: var(--fg); font-weight: 400; line-height: 1.5; }
1220
- .chex-banner-body strong { color: var(--fg); font-weight: 600; }
1221
- .chex-banner code { font-family: 'JetBrains Mono', monospace; font-size: 12px; background: rgba(0,0,0,0.06); padding: 1px 5px; border-radius: 4px; }
1222
-
1223
- .tab-nav {
1224
- background: var(--bg-elev) !important;
1225
- backdrop-filter: blur(var(--blur)) saturate(160%) !important;
1226
- -webkit-backdrop-filter: blur(var(--blur)) saturate(160%) !important;
1227
- border-bottom: 1px solid var(--hairline) !important;
1228
- border-top: none !important; padding: 0 20px !important; gap: 0 !important;
1229
- position: sticky !important; top: 60px !important; z-index: 99 !important; overflow: visible !important;
1230
- }
1231
-
1232
- .tab-nav button {
1233
- background: transparent !important; border: none !important; border-radius: 0 !important;
1234
- padding: 14px 16px !important; color: var(--fg-muted) !important;
1235
- font-size: 13px !important; font-weight: 500 !important; font-family: 'Inter', sans-serif !important;
1236
- letter-spacing: -0.003em !important; position: relative !important; white-space: nowrap !important;
1237
- transition: color 0.15s ease !important; cursor: pointer !important; box-shadow: none !important; outline: none !important;
1238
- }
1239
-
1240
- .tab-nav button:hover { color: var(--fg) !important; background: transparent !important; }
1241
-
1242
- .tab-nav button.selected, .tab-nav button[aria-selected="true"] {
1243
- color: var(--fg) !important; background: transparent !important; font-weight: 500 !important; box-shadow: none !important;
1244
- }
1245
-
1246
- .tab-nav button.selected::after, .tab-nav button[aria-selected="true"]::after {
1247
- content: ""; position: absolute; left: 12px; right: 12px; bottom: -1px;
1248
- height: 1.5px; background: var(--fg); border-radius: 2px 2px 0 0;
1249
- }
1250
-
1251
- .tabitem { border: none !important; background: transparent !important; padding: 24px 24px !important; }
1252
-
1253
- .gradio-container .gr-group {
1254
- background: var(--bg-elev) !important;
1255
- backdrop-filter: blur(var(--blur)) saturate(180%) !important;
1256
- -webkit-backdrop-filter: blur(var(--blur)) saturate(180%) !important;
1257
- border: 1px solid var(--border) !important;
1258
- border-radius: var(--radius-lg) !important;
1259
- box-shadow: var(--shadow-md) !important;
1260
- overflow: hidden !important; padding: 0 !important;
1261
- }
1262
-
1263
- .gradio-container .gr-group > *:not(.chex-card-header):not(.chex-chip-row) {
1264
- padding-left: 20px !important; padding-right: 20px !important;
1265
- }
1266
- .gradio-container .gr-group > *:last-child { padding-bottom: 18px !important; }
1267
-
1268
- .chex-card-header {
1269
- padding: 16px 20px; display: flex; align-items: center;
1270
- justify-content: space-between; gap: 12px; border-bottom: 1px solid var(--hairline);
1271
- }
1272
-
1273
- .chex-card-title {
1274
- font-size: 13.5px; font-weight: 600; letter-spacing: -0.01em;
1275
- display: inline-flex; align-items: center; gap: 10px; color: var(--fg);
1276
- white-space: nowrap; font-family: 'Inter', sans-serif;
1277
- }
1278
-
1279
- .chex-card-kicker { font-family: 'JetBrains Mono', monospace; font-size: 11px; color: var(--fg-subtle); font-weight: 400; letter-spacing: 0.04em; }
1280
-
1281
- .chex-chip-row {
1282
- display: flex; align-items: center; gap: 8px; padding: 12px 20px;
1283
- border-top: 1px solid var(--hairline); background: var(--bg-sunken); flex-wrap: wrap;
1284
- }
1285
-
1286
- .chex-chip-label { font-family: 'JetBrains Mono', monospace; font-size: 10.5px; text-transform: uppercase; letter-spacing: 0.08em; color: var(--fg-subtle); white-space: nowrap; margin-right: 4px; }
1287
-
1288
- .chex-suggested {
1289
- display: flex; align-items: center; gap: 10px; padding: 10px 14px;
1290
- background: rgba(13,18,32,0.04); border: 1px solid var(--border); border-radius: var(--radius);
1291
- font-size: 12.5px; color: var(--fg-muted); font-family: 'Inter', sans-serif; line-height: 1.4; margin-top: 2px;
1292
- }
1293
- .chex-suggested-icon { font-size: 13px; flex-shrink: 0; opacity: 0.7; }
1294
-
1295
- label > span:first-child, .label-wrap span,
1296
- .gradio-container label span.text-gray-500, span.svelte-1b6s6s {
1297
- font-family: 'JetBrains Mono', monospace !important; font-size: 10.5px !important;
1298
- font-weight: 500 !important; text-transform: uppercase !important; letter-spacing: 0.08em !important;
1299
- color: var(--fg-subtle) !important; margin-bottom: 6px !important; display: block !important;
1300
- }
1301
-
1302
- textarea, input[type="text"], input[type="search"],
1303
- .gradio-container .gr-input, .gradio-container .gr-textarea,
1304
- .gradio-container [data-testid="textbox"] textarea,
1305
- .gradio-container [data-testid="textbox"] input {
1306
- background: var(--bg-input) !important; backdrop-filter: blur(10px) !important;
1307
- -webkit-backdrop-filter: blur(10px) !important; border: 1px solid var(--border) !important;
1308
- border-radius: var(--radius) !important; color: var(--fg) !important;
1309
- font-family: 'Inter', sans-serif !important; font-size: 13px !important;
1310
- line-height: 1.6 !important; padding: 11px 14px !important;
1311
- transition: border-color 0.18s ease, box-shadow 0.18s ease, background 0.18s ease !important;
1312
- resize: vertical !important;
1313
- }
1314
-
1315
- textarea:focus, input[type="text"]:focus,
1316
- .gradio-container [data-testid="textbox"] textarea:focus,
1317
- .gradio-container [data-testid="textbox"] input:focus {
1318
- border-color: var(--border-strong) !important; background: var(--bg-elev) !important;
1319
- box-shadow: 0 0 0 2px rgba(255,255,255,0.05) !important; outline: none !important;
1320
- }
1321
-
1322
- textarea::placeholder, input::placeholder { color: var(--fg-subtle) !important; }
1323
-
1324
- textarea[readonly],
1325
- .gradio-container [data-testid="textbox"][data-interactive="false"] textarea {
1326
- background: var(--bg-sunken) !important; border: 1px solid var(--hairline) !important;
1327
- color: var(--fg) !important; cursor: default !important;
1328
- }
1329
-
1330
- .gradio-container button {
1331
- font-family: 'Inter', sans-serif !important; font-size: 13px !important;
1332
- font-weight: 500 !important; border-radius: var(--radius) !important;
1333
- padding: 10px 16px !important;
1334
- transition: opacity 0.15s ease, background 0.15s ease, box-shadow 0.15s ease !important;
1335
- cursor: pointer !important; letter-spacing: -0.003em !important;
1336
- }
1337
-
1338
- .gradio-container button.primary, button.primary {
1339
- background: var(--fg) !important; color: var(--bg-base) !important; border: 1px solid var(--fg) !important;
1340
- box-shadow: 0 6px 18px rgba(0,0,0,0.4), 0 1px 0 rgba(255,255,255,0.1) inset !important;
1341
- }
1342
- .gradio-container button.primary:hover, button.primary:hover { opacity: 0.9 !important; box-shadow: 0 4px 12px rgba(0,0,0,0.3) !important; }
1343
-
1344
- .gradio-container button.secondary, button.secondary {
1345
- background: transparent !important; color: var(--fg-muted) !important;
1346
- border: 1px solid var(--border-strong) !important; box-shadow: none !important;
1347
- }
1348
- .gradio-container button.secondary:hover, button.secondary:hover { background: var(--bg-elev) !important; color: var(--fg) !important; border-color: var(--border-strong) !important; }
1349
-
1350
- button.sm, .gradio-container button[size="sm"], button.small { font-size: 12px !important; padding: 7px 11px !important; }
1351
-
1352
- .gradio-container .upload-container, .gradio-container [data-testid="file"] {
1353
- background: var(--bg-input) !important; border: 1px dashed var(--border-strong) !important; border-radius: var(--radius) !important;
1354
- }
1355
-
1356
- .gradio-container .wrap.svelte-a4gbbr, .gradio-container .table-wrap,
1357
- .gradio-container [data-testid="dataframe"] {
1358
- background: var(--bg-elev) !important;
1359
- backdrop-filter: blur(var(--blur)) saturate(180%) !important;
1360
- -webkit-backdrop-filter: blur(var(--blur)) saturate(180%) !important;
1361
- border: 1px solid var(--border) !important; border-radius: var(--radius-lg) !important;
1362
- box-shadow: var(--shadow-md) !important; overflow: hidden !important;
1363
- }
1364
-
1365
- .gradio-container table {
1366
- background: transparent !important; font-size: 13px !important;
1367
- font-family: 'Inter', sans-serif !important; border-collapse: separate !important;
1368
- border-spacing: 0 !important; width: 100% !important; border: none !important;
1369
- box-shadow: none !important; border-radius: 0 !important;
1370
- }
1371
-
1372
- .gradio-container th {
1373
- background: var(--bg-sunken) !important; border-bottom: 1px solid var(--hairline) !important;
1374
- border-top: none !important; padding: 14px 18px !important;
1375
- font-family: 'JetBrains Mono', monospace !important; font-size: 10.5px !important;
1376
- text-transform: uppercase !important; letter-spacing: 0.08em !important;
1377
- color: var(--fg-muted) !important; font-weight: 500 !important; text-align: left !important;
1378
- }
1379
-
1380
- .gradio-container td {
1381
- padding: 16px 18px !important; border-top: 1px solid var(--hairline) !important;
1382
- border-bottom: none !important; vertical-align: top !important; line-height: 1.6 !important;
1383
- color: var(--fg) !important; background: transparent !important;
1384
- }
1385
-
1386
- .gradio-container tr:first-child td { border-top: none !important; }
1387
-
1388
- .gradio-container .prose, .gradio-container .md, .gradio-container [data-testid="markdown"] {
1389
- color: var(--fg) !important; font-family: 'Inter', sans-serif !important;
1390
- font-size: 13px !important; line-height: 1.65 !important;
1391
- }
1392
-
1393
- .gradio-container .prose h2, .gradio-container .md h2 {
1394
- font-size: 18px !important; font-weight: 600 !important; letter-spacing: -0.02em !important;
1395
- color: var(--fg) !important; margin-bottom: 10px !important; margin-top: 0 !important;
1396
- }
1397
-
1398
- .gradio-container .prose p, .gradio-container .md p {
1399
- color: var(--fg-muted) !important; font-size: 13px !important; line-height: 1.65 !important; margin-bottom: 8px !important;
1400
- }
1401
-
1402
- .gradio-container .prose strong, .gradio-container .md strong { color: var(--fg) !important; font-weight: 600 !important; }
1403
-
1404
- .gradio-container .prose code, .gradio-container .md code {
1405
- font-family: 'JetBrains Mono', monospace !important; font-size: 12px !important;
1406
- background: rgba(13,18,32,0.06) !important; padding: 1px 5px !important;
1407
- border-radius: 4px !important; color: var(--fg) !important;
1408
- }
1409
-
1410
- .chex-bench-intro {
1411
- background: var(--bg-elev); backdrop-filter: blur(var(--blur)) saturate(180%);
1412
- -webkit-backdrop-filter: blur(var(--blur)) saturate(180%);
1413
- border: 1px solid var(--border); border-radius: var(--radius-lg);
1414
- box-shadow: var(--shadow-md); padding: 24px 28px; margin-bottom: 20px;
1415
- }
1416
-
1417
- .chex-bench-intro h2 { margin: 0 0 10px; font-size: 19px; font-weight: 600; letter-spacing: -0.02em; color: var(--fg); font-family: 'Inter', sans-serif; }
1418
- .chex-bench-intro p { margin: 0; color: var(--fg-muted); font-size: 13px; line-height: 1.65; font-family: 'Inter', sans-serif; }
1419
-
1420
- .chex-bench-stats { display: grid; grid-template-columns: repeat(3, 1fr); gap: 8px; margin-top: 18px; }
1421
- .chex-bench-stat { background: var(--bg-sunken); border: 1px solid var(--hairline); border-radius: var(--radius); padding: 12px 14px; }
1422
- .chex-bench-stat .v { font-family: 'Inter', sans-serif; font-size: 20px; font-weight: 600; letter-spacing: -0.025em; color: var(--fg); line-height: 1.2; margin-bottom: 4px; }
1423
- .chex-bench-stat .v.red { color: var(--red); }
1424
- .chex-bench-stat .v.green { color: var(--green); }
1425
- .chex-bench-stat .k { font-size: 10px; text-transform: uppercase; letter-spacing: 0.08em; color: var(--fg-subtle); font-family: 'JetBrains Mono', monospace; }
1426
-
1427
- .chex-footer {
1428
- border-top: 1px solid var(--hairline); padding: 14px 28px;
1429
- display: flex; align-items: center; gap: 18px; color: var(--fg-subtle);
1430
- font-size: 11.5px; font-family: 'JetBrains Mono', monospace;
1431
- background: var(--bg-elev); backdrop-filter: blur(var(--blur));
1432
- -webkit-backdrop-filter: blur(var(--blur)); margin-top: 32px;
1433
- }
1434
- .chex-footer .sep { opacity: 0.4; }
1435
-
1436
- .chex-label-wrap { padding: 4px 0 8px; }
1437
- .chex-divider { height: 1px; background: var(--hairline); margin: 18px 0; }
1438
- .chex-section-kicker { font-family: 'JetBrains Mono', monospace; font-size: 10.5px; text-transform: uppercase; letter-spacing: 0.08em; color: var(--fg-subtle); margin-bottom: 10px; display: block; }
1439
- .chex-card-body { padding: 18px 20px; display: flex; flex-direction: column; gap: 14px; }
1440
-
1441
- *::-webkit-scrollbar { width: 8px; height: 8px; }
1442
- *::-webkit-scrollbar-thumb { background: var(--border-strong); border-radius: 999px; border: 2px solid transparent; background-clip: padding-box; }
1443
- *::-webkit-scrollbar-track { background: transparent; }
1444
-
1445
- .gradio-container .gap-4 { gap: 14px !important; }
1446
- .gradio-container .gap-2 { gap: 8px !important; }
1447
-
1448
- .tabitem .tab-nav { position: static !important; top: auto !important; }
1449
-
1450
- @media (max-width: 900px) {
1451
- .chex-topbar { padding: 0 16px; }
1452
- .chex-tag { display: none; }
1453
- .tabitem { padding: 16px !important; }
1454
- .chex-bench-stats { grid-template-columns: 1fr; }
1455
- .chex-footer { padding: 12px 16px; gap: 12px; flex-wrap: wrap; }
1456
- }
1457
- """
1458
-
1459
- # ---------------------------------------------------------------------------
1460
- # Static HTML
1461
- # ---------------------------------------------------------------------------
1462
-
1463
- TOPBAR_HTML = """
1464
- <div class="chex-topbar">
1465
- <div class="chex-logo">CX</div>
1466
- <span class="chex-name">CHEX</span>
1467
- <span class="chex-tag">grounded answers from documents</span>
1468
- <div style="flex:1"></div>
1469
- <div class="chex-pill"><span class="chex-dot"></span>MI300X · ready</div>
1470
- </div>
1471
- """
1472
-
1473
- FOOTER_HTML = """
1474
- <div class="chex-footer">
1475
- <span>chex/v0.4.1</span>
1476
- <span class="sep">·</span>
1477
- <span>endpoint: mi300x-east-2</span>
1478
- <span class="sep">·</span>
1479
- <span>tokens/s 142.7</span>
1480
- </div>
1481
- """
1482
-
1483
- BENCH_INTRO_HTML = """
1484
- <div class="chex-bench-intro">
1485
- <h2>Why grounding matters</h2>
1486
- <p>We ran the same questions through a base instruction-tuned model and through CHEX.
1487
- The base model invented or extrapolated answers in 4 of 5 cases — confident, plausible, wrong.
1488
- CHEX returned a verifiable label, a verbatim citation, and refused to answer when the source was silent.</p>
1489
- <div class="chex-bench-stats">
1490
- <div class="chex-bench-stat"><div class="v red">4/5</div><div class="k">Base hallucinations</div></div>
1491
- <div class="chex-bench-stat"><div class="v green">5/5</div><div class="k">CHEX correct</div></div>
1492
- <div class="chex-bench-stat"><div class="v">100%</div><div class="k">Cited verbatim</div></div>
1493
- </div>
1494
- </div>
1495
- """
1496
-
1497
- CONTRACT_SOURCE_HEADER_HTML = """
1498
- <div class="chex-card-header">
1499
- <span class="chex-card-title">
1500
- <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="opacity:0.55"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><polyline points="14 2 14 8 20 8"/></svg>
1501
- Source Document
1502
- </span>
1503
- <span class="chex-card-kicker">paste · load sample</span>
1504
- </div>
1505
- """
1506
-
1507
- CONTRACT_RESULTS_HEADER_HTML = """
1508
- <div class="chex-card-header">
1509
- <span class="chex-card-title">
1510
- <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="opacity:0.55"><circle cx="11" cy="11" r="8"/><line x1="21" y1="21" x2="16.65" y2="16.65"/></svg>
1511
- Analysis
1512
- </span>
1513
- <span class="chex-card-kicker">grounded · cited · structured</span>
1514
- </div>
1515
- """
1516
-
1517
- CHIP_ROW_HTML = """
1518
- <div class="chex-chip-row">
1519
- <span class="chex-chip-label">Load sample</span>
1520
- </div>
1521
- """
1522
-
1523
- STATEMENT_SOURCE_HEADER_HTML = """
1524
- <div class="chex-card-header">
1525
- <span class="chex-card-title">
1526
- <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="opacity:0.55"><rect x="2" y="5" width="20" height="14" rx="2"/><line x1="2" y1="10" x2="22" y2="10"/></svg>
1527
- Bank Statement
1528
- </span>
1529
- <span class="chex-card-kicker">paste · pdf · csv · txt · xlsx · ofx</span>
1530
- </div>
1531
- """
1532
-
1533
- STATEMENT_RESULTS_HEADER_HTML = """
1534
- <div class="chex-card-header">
1535
- <span class="chex-card-title">
1536
- <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="opacity:0.55"><polyline points="22 12 18 12 15 21 9 3 6 12 2 12"/></svg>
1537
- Summary &amp; Q&amp;A
1538
- </span>
1539
- <span class="chex-card-kicker">summarise · ask · verify</span>
1540
- </div>
1541
- """
1542
-
1543
- # ---------------------------------------------------------------------------
1544
- # Gradio UI
1545
- # ---------------------------------------------------------------------------
1546
-
1547
- with gr.Blocks(title="CHEX — Document Intelligence") as demo:
1548
-
1549
- gr.HTML(TOPBAR_HTML)
1550
-
1551
- if WARNING_HTML:
1552
- gr.HTML(WARNING_HTML)
1553
-
1554
- with gr.Tabs():
1555
-
1556
- # ── Tab 01: Contract Analysis ──────────────────────────────────── #
1557
- with gr.Tab("01 Contract analysis"):
1558
- with gr.Row(equal_height=False):
1559
-
1560
- with gr.Column(scale=9):
1561
- with gr.Group():
1562
- gr.HTML(CONTRACT_SOURCE_HEADER_HTML)
1563
- contract_input = gr.Textbox(
1564
- label="Contract text",
1565
- lines=20,
1566
- placeholder="Paste your contract text here, or load a sample below…",
1567
- show_label=False,
1568
- )
1569
- gr.HTML(CHIP_ROW_HTML)
1570
- with gr.Row():
1571
- btn_software = gr.Button("Software License", variant="secondary", size="sm")
1572
- btn_nda = gr.Button("NDA", variant="secondary", size="sm")
1573
- btn_service = gr.Button("Service Agreement", variant="secondary", size="sm")
1574
- suggested_q = gr.HTML(value="", visible=False)
1575
-
1576
- with gr.Column(scale=11):
1577
- with gr.Group():
1578
- gr.HTML(CONTRACT_RESULTS_HEADER_HTML)
1579
- with gr.Row():
1580
- question_input = gr.Textbox(
1581
- label="Question",
1582
- placeholder="e.g., What is the limitation of liability?",
1583
- lines=1,
1584
- show_label=False,
1585
- scale=8,
1586
- )
1587
- analyze_btn = gr.Button("Analyze ↵", variant="primary", scale=2)
1588
- label_display = gr.HTML(value=format_label_html("N/A"))
1589
- answer_output = gr.Textbox(label="Answer", interactive=False, lines=3)
1590
- citation_output = gr.Textbox(label="Citation", interactive=False, lines=2)
1591
- reasoning_output = gr.Textbox(label="Reasoning", interactive=False, lines=3)
1592
-
1593
- # ── Tab 02: Bank Statements ────────────────────────────────────── #
1594
- with gr.Tab("02 Bank statements"):
1595
- with gr.Row(equal_height=False):
1596
-
1597
- with gr.Column(scale=9):
1598
- with gr.Group():
1599
- gr.HTML(STATEMENT_SOURCE_HEADER_HTML)
1600
- with gr.Tabs():
1601
- with gr.Tab("Paste text"):
1602
- bank_paste_input = gr.Textbox(
1603
- label="Bank statement text (supports multiple)",
1604
- lines=20,
1605
- placeholder=(
1606
- "Paste one or more bank statements here.\n\n"
1607
- "If you paste multiple statements, separate them with a line containing only "
1608
- "`---` (3+ dashes)."
1609
- "\n\nOr load the sample below…"
1610
- ),
1611
- show_label=False,
1612
- )
1613
- btn_load_statement = gr.Button("Load sample statement", variant="secondary", size="sm")
1614
- with gr.Tab("Upload PDF"):
1615
- bank_pdf_input = gr.File(
1616
- label="PDF bank statement (multiple allowed)",
1617
- file_types=[".pdf"],
1618
- file_count="multiple",
1619
- )
1620
- bank_pdf_password_input = gr.Textbox(
1621
- label="PDF password (optional)",
1622
- type="password",
1623
- placeholder="Leave blank if PDF is not encrypted",
1624
- show_label=False,
1625
- )
1626
- with gr.Tab("Upload CSV"):
1627
- bank_csv_input = gr.File(
1628
- label="CSV bank statement (multiple allowed)",
1629
- file_types=[".csv"],
1630
- file_count="multiple",
1631
- )
1632
- with gr.Tab("Upload TXT"):
1633
- bank_txt_input = gr.File(
1634
- label="TXT bank statement (multiple allowed)",
1635
- file_types=[".txt", ".text"],
1636
- file_count="multiple",
1637
- )
1638
- with gr.Tab("Upload Excel"):
1639
- bank_xlsx_input = gr.File(
1640
- label="Excel bank statement (.xlsx, multiple allowed)",
1641
- file_types=[".xlsx"],
1642
- file_count="multiple",
1643
- )
1644
- with gr.Tab("Upload OFX / QFX"):
1645
- bank_ofx_input = gr.File(
1646
- label="OFX / QFX bank statement (multiple allowed)",
1647
- file_types=[".ofx", ".qfx"],
1648
- file_count="multiple",
1649
- )
1650
-
1651
- with gr.Column(scale=11):
1652
- with gr.Group():
1653
- gr.HTML(STATEMENT_RESULTS_HEADER_HTML)
1654
- analyse_stmt_btn = gr.Button("Analyse statement", variant="primary")
1655
- summary_output = gr.Markdown(value="*Run 'Analyse statement' to generate a financial summary.*")
1656
- with gr.Row():
1657
- export_csv_btn = gr.Button("Export CSV", variant="secondary", size="sm")
1658
- export_pdf_btn = gr.Button("Export PDF", variant="secondary", size="sm")
1659
- export_status = gr.Markdown(value="")
1660
- export_file = gr.File(label="Download", interactive=False)
1661
- gr.HTML('<div class="chex-divider"></div>')
1662
- gr.HTML('<span class="chex-section-kicker">Ask a question</span>')
1663
- with gr.Row():
1664
- bank_question_input = gr.Textbox(
1665
- label="Question",
1666
- placeholder="e.g., What was the largest debit this month?",
1667
- lines=1,
1668
- show_label=False,
1669
- scale=8,
1670
- )
1671
- bank_ask_btn = gr.Button("Ask ↵", variant="secondary", scale=2)
1672
- bank_label_display = gr.HTML(value=format_label_html("N/A"))
1673
- bank_answer_output = gr.Textbox(label="Answer", interactive=False, lines=3)
1674
- bank_citation_output = gr.Textbox(label="Citation", interactive=False, lines=2)
1675
- bank_reasoning_output = gr.Textbox(label="Reasoning", interactive=False, lines=3)
1676
-
1677
- bank_statement_state = gr.State("")
1678
- bank_summary_state = gr.State("")
1679
- # Hidden JSON output for `gradio_client` API usage.
1680
- bank_api_output = gr.JSON(visible=False)
1681
- bank_api_question = gr.Textbox(visible=False)
1682
- bank_api_btn = gr.Button(visible=False)
1683
-
1684
- # ── Tab 03: Benchmark ──────────────────────────────────────────── #
1685
- with gr.Tab("03 Benchmark"):
1686
- gr.HTML(BENCH_INTRO_HTML)
1687
- gr.Dataframe(
1688
- value=BENCHMARK_DF,
1689
- headers=list(BENCHMARK_DF.columns),
1690
- datatype=["str"] * len(BENCHMARK_DF.columns),
1691
- wrap=True,
1692
- interactive=False,
1693
- )
1694
-
1695
- gr.HTML(FOOTER_HTML)
1696
-
1697
- # ── Event handlers ─────────────────────────────────────────────────── #
1698
-
1699
- def load_software():
1700
- hint = '<div class="chex-suggested"><span class="chex-suggested-icon">💡</span><span><strong>Suggested:</strong> What is the limitation of liability in this agreement?</span></div>'
1701
- return SOFTWARE_LICENSE, SAMPLE_QUESTIONS["software_license.txt"], gr.update(value=hint, visible=True)
1702
-
1703
- def load_nda():
1704
- hint = '<div class="chex-suggested"><span class="chex-suggested-icon">💡</span><span><strong>Suggested:</strong> Does this agreement include a non-compete clause?</span></div>'
1705
- return NDA, SAMPLE_QUESTIONS["nda.txt"], gr.update(value=hint, visible=True)
1706
-
1707
- def load_service():
1708
- hint = '<div class="chex-suggested"><span class="chex-suggested-icon">💡</span><span><strong>Suggested:</strong> Does this contract include a termination for convenience clause? <em>(expected: ABSENT)</em></span></div>'
1709
- return SERVICE_AGREEMENT, SAMPLE_QUESTIONS["service_agreement.txt"], gr.update(value=hint, visible=True)
1710
-
1711
- btn_software.click(fn=load_software, inputs=[], outputs=[contract_input, question_input, suggested_q])
1712
- btn_nda.click(fn=load_nda, inputs=[], outputs=[contract_input, question_input, suggested_q])
1713
- btn_service.click(fn=load_service, inputs=[], outputs=[contract_input, question_input, suggested_q])
1714
-
1715
- analyze_btn.click(
1716
- fn=analyze_contract,
1717
- inputs=[contract_input, question_input],
1718
- outputs=[label_display, answer_output, citation_output, reasoning_output],
1719
- api_name="contract_analyze",
1720
- )
1721
- question_input.submit(
1722
- fn=analyze_contract,
1723
- inputs=[contract_input, question_input],
1724
- outputs=[label_display, answer_output, citation_output, reasoning_output],
1725
- api_name="contract_analyze",
1726
- )
1727
-
1728
- btn_load_statement.click(fn=lambda: SAMPLE_STATEMENT, inputs=[], outputs=[bank_paste_input])
1729
-
1730
- analyse_stmt_btn.click(
1731
- fn=analyse_bank_statement,
1732
- inputs=[
1733
- bank_paste_input,
1734
- bank_pdf_input,
1735
- bank_pdf_password_input,
1736
- bank_csv_input,
1737
- bank_txt_input,
1738
- bank_xlsx_input,
1739
- bank_ofx_input,
1740
- ],
1741
- outputs=[summary_output, bank_statement_state, bank_summary_state],
1742
- )
1743
-
1744
- export_csv_btn.click(
1745
- fn=export_bank_summary_csv,
1746
- inputs=[bank_summary_state],
1747
- outputs=[export_file, export_status],
1748
- )
1749
- export_pdf_btn.click(
1750
- fn=export_bank_summary_pdf,
1751
- inputs=[bank_summary_state],
1752
- outputs=[export_file, export_status],
1753
- )
1754
-
1755
- bank_ask_btn.click(
1756
- fn=bank_qa,
1757
- inputs=[bank_statement_state, bank_question_input],
1758
- outputs=[bank_label_display, bank_answer_output, bank_citation_output, bank_reasoning_output],
1759
- )
1760
- bank_question_input.submit(
1761
- fn=bank_qa,
1762
- inputs=[bank_statement_state, bank_question_input],
1763
- outputs=[bank_label_display, bank_answer_output, bank_citation_output, bank_reasoning_output],
1764
- )
1765
-
1766
- def bank_analyze_api(
1767
- paste_text: str,
1768
- pdf_files,
1769
- pdf_password: str | None,
1770
- csv_files,
1771
- txt_files,
1772
- xlsx_files,
1773
- ofx_files,
1774
- question: str | None,
1775
- ) -> dict:
1776
- summary_md, combined_text, summary_json = analyse_bank_statement(
1777
- paste_text,
1778
- pdf_files,
1779
- pdf_password,
1780
- csv_files,
1781
- txt_files,
1782
- xlsx_files,
1783
- ofx_files,
1784
- )
1785
-
1786
- qa: dict | None = None
1787
- if (question or "").strip():
1788
- label_html, answer, citation, reasoning = bank_qa(combined_text, (question or "").strip())
1789
- qa = {
1790
- "label_html": label_html,
1791
- "answer": answer,
1792
- "citation": citation,
1793
- "reasoning": reasoning,
1794
- }
1795
-
1796
- return {
1797
- "summary_markdown": summary_md,
1798
- "combined_text": combined_text,
1799
- "summary_json": summary_json,
1800
- "qa": qa,
1801
- }
1802
-
1803
- bank_api_btn.click(
1804
- fn=bank_analyze_api,
1805
- inputs=[
1806
- bank_paste_input,
1807
- bank_pdf_input,
1808
- bank_pdf_password_input,
1809
- bank_csv_input,
1810
- bank_txt_input,
1811
- bank_xlsx_input,
1812
- bank_ofx_input,
1813
- bank_api_question,
1814
- ],
1815
- outputs=[bank_api_output],
1816
- api_name="bank_analyze",
1817
- )
1818
-
1819
-
1820
- if __name__ == "__main__":
1821
- demo.launch(show_error=True, theme=gr.themes.Base(), css=CHEX_CSS, ssr_mode=False)