Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
"""
|
| 2 |
-
CHEX
|
| 3 |
HuggingFace Spaces Gradio Demo
|
| 4 |
|
| 5 |
Tab 1: Analyze Contract — paste a contract, ask a question, get a structured answer
|
| 6 |
Tab 2: Benchmark Demo — side-by-side table showing base model hallucinations vs CHEX
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
from __future__ import annotations
|
|
@@ -22,9 +23,10 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
| 22 |
# ---------------------------------------------------------------------------
|
| 23 |
|
| 24 |
MODEL_PATH = os.environ.get(
|
| 25 |
-
"HF_MODEL_REPO", "PLACEHOLDER/
|
| 26 |
)
|
| 27 |
-
SAMPLE_DIR = Path(__file__).parent / ""
|
|
|
|
| 28 |
|
| 29 |
analyzer = None
|
| 30 |
model_load_error: Optional[str] = None
|
|
@@ -39,6 +41,10 @@ except Exception as e:
|
|
| 39 |
print(f"WARNING: Model failed to load: {e}")
|
| 40 |
print("Demo is running in preview mode — analysis will return a placeholder response.")
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# ---------------------------------------------------------------------------
|
| 43 |
# Sample contract content
|
| 44 |
# ---------------------------------------------------------------------------
|
|
@@ -65,31 +71,27 @@ SAMPLE_QUESTIONS = {
|
|
| 65 |
# Label badge HTML
|
| 66 |
# ---------------------------------------------------------------------------
|
| 67 |
|
| 68 |
-
|
| 69 |
-
"GROUNDED":
|
| 70 |
-
"ABSENT":
|
| 71 |
-
"CONTRADICTS_PRIOR": "#
|
| 72 |
-
"N/A":
|
| 73 |
-
"ERROR":
|
| 74 |
-
}
|
| 75 |
-
|
| 76 |
-
BADGE_ICONS = {
|
| 77 |
-
"GROUNDED": "✓",
|
| 78 |
-
"ABSENT": "✗",
|
| 79 |
-
"CONTRADICTS_PRIOR": "⚠",
|
| 80 |
-
"N/A": "—",
|
| 81 |
-
"ERROR": "!",
|
| 82 |
}
|
| 83 |
|
| 84 |
|
| 85 |
def format_label_html(label: str) -> str:
|
| 86 |
-
|
| 87 |
-
|
| 88 |
return (
|
| 89 |
-
f'<div style="
|
| 90 |
-
f'border-radius:
|
| 91 |
-
f'
|
| 92 |
-
f'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
)
|
| 94 |
|
| 95 |
|
|
@@ -129,6 +131,125 @@ def analyze_contract(
|
|
| 129 |
return format_label_html("ERROR"), "", "", f"Inference error: {e}"
|
| 130 |
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
# ---------------------------------------------------------------------------
|
| 133 |
# Benchmark table data (hardcoded — pre-computed base model outputs)
|
| 134 |
# ---------------------------------------------------------------------------
|
|
@@ -182,26 +303,525 @@ BENCHMARK_DF = pd.DataFrame(BENCHMARK_ROWS)
|
|
| 182 |
WARNING_HTML = ""
|
| 183 |
if model_load_error:
|
| 184 |
WARNING_HTML = (
|
| 185 |
-
'<div
|
| 186 |
-
'
|
| 187 |
-
f'<strong>
|
| 188 |
-
'
|
| 189 |
-
'to the correct model repository ID.'
|
| 190 |
'</div>'
|
| 191 |
)
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
with gr.Blocks(
|
| 194 |
-
title="CHEX
|
| 195 |
-
theme=gr.themes.
|
|
|
|
| 196 |
) as demo:
|
| 197 |
-
gr.
|
| 198 |
-
"# CHEX — Contractual Hallucination Eliminator\n"
|
| 199 |
-
"**Fine-tuned Qwen3.5-9B on AMD MI300X (ROCm)** — "
|
| 200 |
-
"detects hallucinations in contract analysis with calibrated uncertainty signals.\n\n"
|
| 201 |
-
"Instead of confidently fabricating answers, CHEX outputs one of three structured labels: "
|
| 202 |
-
"**GROUNDED** (answer exists), **ABSENT** (clause not present), or "
|
| 203 |
-
"**CONTRADICTS_PRIOR** (terms deviate from standard)."
|
| 204 |
-
)
|
| 205 |
|
| 206 |
if WARNING_HTML:
|
| 207 |
gr.HTML(WARNING_HTML)
|
|
@@ -265,19 +885,83 @@ with gr.Blocks(
|
|
| 265 |
# Suggested questions shown when loading a sample
|
| 266 |
suggested_q = gr.Markdown("", visible=False)
|
| 267 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
# ------------------------------------------------------------------ #
|
| 269 |
# Tab 2: Benchmark Demo #
|
| 270 |
# ------------------------------------------------------------------ #
|
| 271 |
with gr.Tab("Benchmark Demo"):
|
| 272 |
-
gr.
|
| 273 |
-
"## Base Qwen3.5-9B (untuned) vs. CHEX Fine-tuned Model\n\n"
|
| 274 |
-
"The table below shows 5 representative contract questions. "
|
| 275 |
-
"Rows marked with 🚨 show where the **base model hallucinated** "
|
| 276 |
-
"— it predicted GROUNDED (with a fabricated citation) when the correct "
|
| 277 |
-
"answer is **ABSENT** (the clause does not exist in the contract).\n\n"
|
| 278 |
-
"CHEX correctly returns ABSENT for all such cases."
|
| 279 |
-
)
|
| 280 |
-
|
| 281 |
gr.Dataframe(
|
| 282 |
value=BENCHMARK_DF,
|
| 283 |
headers=list(BENCHMARK_DF.columns),
|
|
@@ -286,15 +970,6 @@ with gr.Blocks(
|
|
| 286 |
interactive=False,
|
| 287 |
)
|
| 288 |
|
| 289 |
-
gr.Markdown(
|
| 290 |
-
"### Key Insight\n"
|
| 291 |
-
"The base model (Qwen3.5-9B, zero-shot) hallucinates **2 out of 5** "
|
| 292 |
-
"examples — fabricating legal clauses that do not exist in the document.\n\n"
|
| 293 |
-
"CHEX (fine-tuned on AMD MI300X with LoRA) achieves **0 hallucinations** "
|
| 294 |
-
"on these examples by learning to distinguish between what the contract "
|
| 295 |
-
"actually says and what it doesn't say."
|
| 296 |
-
)
|
| 297 |
-
|
| 298 |
# ------------------------------------------------------------------ #
|
| 299 |
# Event handlers #
|
| 300 |
# ------------------------------------------------------------------ #
|
|
@@ -349,6 +1024,36 @@ with gr.Blocks(
|
|
| 349 |
outputs=[label_display, answer_output, citation_output, reasoning_output],
|
| 350 |
)
|
| 351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
if __name__ == "__main__":
|
| 354 |
-
demo.launch(show_error=True)
|
|
|
|
| 1 |
"""
|
| 2 |
+
CHEX - Document Intelligence
|
| 3 |
HuggingFace Spaces Gradio Demo
|
| 4 |
|
| 5 |
Tab 1: Analyze Contract — paste a contract, ask a question, get a structured answer
|
| 6 |
Tab 2: Benchmark Demo — side-by-side table showing base model hallucinations vs CHEX
|
| 7 |
+
Tab 3: Analyse Bank Statement — paste / upload a bank statement, get a summary + Q&A
|
| 8 |
"""
|
| 9 |
|
| 10 |
from __future__ import annotations
|
|
|
|
| 23 |
# ---------------------------------------------------------------------------
|
| 24 |
|
| 25 |
MODEL_PATH = os.environ.get(
|
| 26 |
+
"HF_MODEL_REPO", "PLACEHOLDER/chex-document-intelligence"
|
| 27 |
)
|
| 28 |
+
SAMPLE_DIR = Path(__file__).parent / "sample_contracts"
|
| 29 |
+
STATEMENT_DIR = Path(__file__).parent / "sample_statements"
|
| 30 |
|
| 31 |
analyzer = None
|
| 32 |
model_load_error: Optional[str] = None
|
|
|
|
| 41 |
print(f"WARNING: Model failed to load: {e}")
|
| 42 |
print("Demo is running in preview mode — analysis will return a placeholder response.")
|
| 43 |
|
| 44 |
+
# BankStatementAnalyzer reuses the loaded ContractAnalyzer pipeline
|
| 45 |
+
from serving.bank_statement import BankStatementAnalyzer # type: ignore
|
| 46 |
+
bank_analyzer = BankStatementAnalyzer(contract_analyzer=analyzer)
|
| 47 |
+
|
| 48 |
# ---------------------------------------------------------------------------
|
| 49 |
# Sample contract content
|
| 50 |
# ---------------------------------------------------------------------------
|
|
|
|
| 71 |
# Label badge HTML
|
| 72 |
# ---------------------------------------------------------------------------
|
| 73 |
|
| 74 |
+
_BADGE_CFG = {
|
| 75 |
+
"GROUNDED": ("#0f9d58", "rgba(34,197,94,0.10)", "rgba(34,197,94,0.28)", "✓"),
|
| 76 |
+
"ABSENT": ("#d23131", "rgba(239,68,68,0.09)", "rgba(239,68,68,0.28)", "✗"),
|
| 77 |
+
"CONTRADICTS_PRIOR": ("#b87800", "rgba(245,158,11,0.10)", "rgba(245,158,11,0.30)", "⚠"),
|
| 78 |
+
"N/A": ("#8a91a3", "rgba(139,145,163,0.10)","rgba(139,145,163,0.25)","—"),
|
| 79 |
+
"ERROR": ("#991b1b", "rgba(220,38,38,0.10)", "rgba(220,38,38,0.32)", "!"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
|
| 82 |
|
| 83 |
def format_label_html(label: str) -> str:
|
| 84 |
+
fg, bg, border, icon = _BADGE_CFG.get(label, _BADGE_CFG["N/A"])
|
| 85 |
+
display = "CONTRADICTS PRIOR" if label == "CONTRADICTS_PRIOR" else label
|
| 86 |
return (
|
| 87 |
+
f'<div style="display:inline-flex;align-items:center;gap:8px;'
|
| 88 |
+
f'padding:11px 16px;border-radius:10px;border:1px solid {border};'
|
| 89 |
+
f'background:{bg};color:{fg};font-family:\'Inter\',sans-serif;'
|
| 90 |
+
f'font-size:12.5px;font-weight:600;letter-spacing:0.02em;'
|
| 91 |
+
f'backdrop-filter:blur(10px);">'
|
| 92 |
+
f'<span style="width:14px;height:14px;display:grid;place-items:center;'
|
| 93 |
+
f'font-size:13px;">{icon}</span>'
|
| 94 |
+
f'<span>{display}</span></div>'
|
| 95 |
)
|
| 96 |
|
| 97 |
|
|
|
|
| 131 |
return format_label_html("ERROR"), "", "", f"Inference error: {e}"
|
| 132 |
|
| 133 |
|
| 134 |
+
# ---------------------------------------------------------------------------
|
| 135 |
+
# Sample bank statement
|
| 136 |
+
# ---------------------------------------------------------------------------
|
| 137 |
+
|
| 138 |
+
def _read_sample_statement(filename: str) -> str:
|
| 139 |
+
p = STATEMENT_DIR / filename
|
| 140 |
+
if p.exists():
|
| 141 |
+
return p.read_text(encoding="utf-8")
|
| 142 |
+
return f"[Sample statement '{filename}' not found. Place it in demo/sample_statements/]"
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
SAMPLE_STATEMENT = _read_sample_statement("sample_statement.txt")
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
# ---------------------------------------------------------------------------
|
| 149 |
+
# Bank statement handlers
|
| 150 |
+
# ---------------------------------------------------------------------------
|
| 151 |
+
|
| 152 |
+
def _get_statement_text(
|
| 153 |
+
paste_text: str,
|
| 154 |
+
pdf_file,
|
| 155 |
+
csv_file,
|
| 156 |
+
) -> tuple[str, str]:
|
| 157 |
+
"""
|
| 158 |
+
Resolve whichever input was provided and return (statement_text, error_msg).
|
| 159 |
+
Priority: PDF > CSV > paste text.
|
| 160 |
+
"""
|
| 161 |
+
if pdf_file is not None:
|
| 162 |
+
try:
|
| 163 |
+
text = bank_analyzer.extract_text_from_pdf(pdf_file)
|
| 164 |
+
if not text.strip():
|
| 165 |
+
return "", "PDF was uploaded but no text could be extracted."
|
| 166 |
+
return text, ""
|
| 167 |
+
except Exception as e:
|
| 168 |
+
return "", f"PDF extraction error: {e}"
|
| 169 |
+
|
| 170 |
+
if csv_file is not None:
|
| 171 |
+
try:
|
| 172 |
+
text = bank_analyzer.parse_csv(csv_file)
|
| 173 |
+
return text, ""
|
| 174 |
+
except Exception as e:
|
| 175 |
+
return "", f"CSV parsing error: {e}"
|
| 176 |
+
|
| 177 |
+
if paste_text and paste_text.strip():
|
| 178 |
+
return paste_text.strip(), ""
|
| 179 |
+
|
| 180 |
+
return "", "Please paste a bank statement or upload a PDF / CSV file."
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def analyse_bank_statement(
|
| 184 |
+
paste_text: str,
|
| 185 |
+
pdf_file,
|
| 186 |
+
csv_file,
|
| 187 |
+
) -> tuple[str, str]:
|
| 188 |
+
"""
|
| 189 |
+
Returns (summary_markdown, extracted_text_for_qa).
|
| 190 |
+
"""
|
| 191 |
+
statement_text, error = _get_statement_text(paste_text, pdf_file, csv_file)
|
| 192 |
+
if error:
|
| 193 |
+
return f"**Error:** {error}", ""
|
| 194 |
+
|
| 195 |
+
if analyzer is None:
|
| 196 |
+
return (
|
| 197 |
+
"**Model not loaded.** "
|
| 198 |
+
f"Set `HF_MODEL_REPO` in Space secrets. Error: {model_load_error}",
|
| 199 |
+
statement_text,
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
try:
|
| 203 |
+
summary = bank_analyzer.summarize(statement_text)
|
| 204 |
+
lines = ["## Statement Summary", ""]
|
| 205 |
+
lines.append(f"**Total Credits:** {summary.total_credits or 'N/A'}")
|
| 206 |
+
lines.append(f"**Total Debits:** {summary.total_debits or 'N/A'}")
|
| 207 |
+
lines.append(f"**Largest Transaction:** {summary.largest_transaction or 'N/A'}")
|
| 208 |
+
if summary.recurring_payments:
|
| 209 |
+
lines.append("\n**Recurring Payments:**")
|
| 210 |
+
for p in summary.recurring_payments:
|
| 211 |
+
lines.append(f"- {p}")
|
| 212 |
+
if summary.flags:
|
| 213 |
+
lines.append("\n**Flags / Unusual Activity:**")
|
| 214 |
+
for f in summary.flags:
|
| 215 |
+
lines.append(f"- {f}")
|
| 216 |
+
lines.append(f"\n*{summary.raw_reasoning}*")
|
| 217 |
+
return "\n".join(lines), statement_text
|
| 218 |
+
except Exception as e:
|
| 219 |
+
return f"**Summarisation error:** {e}", statement_text
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def bank_qa(
|
| 223 |
+
statement_text: str,
|
| 224 |
+
question: str,
|
| 225 |
+
) -> tuple[str, str, str, str]:
|
| 226 |
+
"""
|
| 227 |
+
Returns (label_html, answer_text, citation_text, reasoning_text).
|
| 228 |
+
"""
|
| 229 |
+
if not statement_text.strip():
|
| 230 |
+
return (
|
| 231 |
+
format_label_html("N/A"), "", "",
|
| 232 |
+
"Please run 'Analyse Statement' first to load the statement.",
|
| 233 |
+
)
|
| 234 |
+
if not question.strip():
|
| 235 |
+
return format_label_html("N/A"), "", "", "Please enter a question."
|
| 236 |
+
|
| 237 |
+
if analyzer is None:
|
| 238 |
+
return (
|
| 239 |
+
format_label_html("N/A"), "Model not loaded", "",
|
| 240 |
+
f"Model failed to load: {model_load_error}.",
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
try:
|
| 244 |
+
result = bank_analyzer.answer_question(statement_text, question)
|
| 245 |
+
label_html = format_label_html(result.label.value)
|
| 246 |
+
answer = result.answer if result.answer else "(none — information not found in statement)"
|
| 247 |
+
citation = result.citation if result.citation else "(none)"
|
| 248 |
+
return label_html, answer, citation, result.reasoning
|
| 249 |
+
except Exception as e:
|
| 250 |
+
return format_label_html("ERROR"), "", "", f"Inference error: {e}"
|
| 251 |
+
|
| 252 |
+
|
| 253 |
# ---------------------------------------------------------------------------
|
| 254 |
# Benchmark table data (hardcoded — pre-computed base model outputs)
|
| 255 |
# ---------------------------------------------------------------------------
|
|
|
|
| 303 |
WARNING_HTML = ""
|
| 304 |
if model_load_error:
|
| 305 |
WARNING_HTML = (
|
| 306 |
+
'<div class="chex-banner">'
|
| 307 |
+
f'<span class="chex-banner-icon">⚠</span>'
|
| 308 |
+
f'<div class="chex-banner-body"><strong>Model not loaded</strong> · '
|
| 309 |
+
f'{model_load_error} — set <code>HF_MODEL_REPO</code> in Space secrets.</div>'
|
|
|
|
| 310 |
'</div>'
|
| 311 |
)
|
| 312 |
|
| 313 |
+
# ---------------------------------------------------------------------------
|
| 314 |
+
# CSS — CHEX design system (glassmorphic, Inter + JetBrains Mono)
|
| 315 |
+
# ---------------------------------------------------------------------------
|
| 316 |
+
|
| 317 |
+
CHEX_CSS = """
|
| 318 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap');
|
| 319 |
+
|
| 320 |
+
/* ── Reset & base ── */
|
| 321 |
+
*, *::before, *::after { box-sizing: border-box; }
|
| 322 |
+
|
| 323 |
+
:root {
|
| 324 |
+
--bg-base: #f3f4f7;
|
| 325 |
+
--bg-grad: radial-gradient(ellipse 1200px 700px at 18% -10%, rgba(120,150,200,0.18), transparent 60%),
|
| 326 |
+
radial-gradient(ellipse 900px 600px at 95% 110%, rgba(180,160,220,0.14), transparent 55%),
|
| 327 |
+
linear-gradient(180deg, #f5f6f9 0%, #eef0f4 100%);
|
| 328 |
+
--bg-elev: rgba(255,255,255,0.62);
|
| 329 |
+
--bg-elev-strong: rgba(255,255,255,0.78);
|
| 330 |
+
--bg-sunken: rgba(245,246,249,0.55);
|
| 331 |
+
--bg-input: rgba(255,255,255,0.55);
|
| 332 |
+
--border: rgba(15,18,30,0.08);
|
| 333 |
+
--border-strong: rgba(15,18,30,0.14);
|
| 334 |
+
--hairline: rgba(15,18,30,0.06);
|
| 335 |
+
--fg: #0d1220;
|
| 336 |
+
--fg-muted: #5b6275;
|
| 337 |
+
--fg-subtle: #8a91a3;
|
| 338 |
+
--green: #0f9d58;
|
| 339 |
+
--amber: #b87800;
|
| 340 |
+
--amber-bg: rgba(245,158,11,0.10);
|
| 341 |
+
--amber-border: rgba(245,158,11,0.30);
|
| 342 |
+
--blur: 22px;
|
| 343 |
+
--blur-strong: 32px;
|
| 344 |
+
--shadow-md: 0 1px 0 rgba(255,255,255,0.6) inset, 0 8px 24px rgba(15,18,30,0.06), 0 1px 2px rgba(15,18,30,0.04);
|
| 345 |
+
--radius: 10px;
|
| 346 |
+
--radius-lg: 16px;
|
| 347 |
+
}
|
| 348 |
+
|
| 349 |
+
.dark, [data-theme="dark"] {
|
| 350 |
+
--bg-base: #07090e;
|
| 351 |
+
--bg-grad: radial-gradient(ellipse 1100px 700px at 15% -5%, rgba(70,110,200,0.20), transparent 60%),
|
| 352 |
+
radial-gradient(ellipse 900px 600px at 95% 110%, rgba(150,90,220,0.14), transparent 55%),
|
| 353 |
+
linear-gradient(180deg, #0a0d14 0%, #06080d 100%);
|
| 354 |
+
--bg-elev: rgba(22,26,36,0.55);
|
| 355 |
+
--bg-elev-strong: rgba(28,32,44,0.72);
|
| 356 |
+
--bg-sunken: rgba(12,14,20,0.55);
|
| 357 |
+
--bg-input: rgba(14,17,25,0.55);
|
| 358 |
+
--border: rgba(255,255,255,0.07);
|
| 359 |
+
--border-strong: rgba(255,255,255,0.13);
|
| 360 |
+
--hairline: rgba(255,255,255,0.05);
|
| 361 |
+
--fg: #eceef4;
|
| 362 |
+
--fg-muted: #9ba3b6;
|
| 363 |
+
--fg-subtle: #6a7188;
|
| 364 |
+
--green: #4ade80;
|
| 365 |
+
--amber: #fbbf24;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
/* ── App shell ── */
|
| 369 |
+
.gradio-container {
|
| 370 |
+
font-family: 'Inter', system-ui, -apple-system, sans-serif !important;
|
| 371 |
+
font-size: 14px !important;
|
| 372 |
+
line-height: 1.55 !important;
|
| 373 |
+
color: var(--fg) !important;
|
| 374 |
+
background: var(--bg-grad) !important;
|
| 375 |
+
background-attachment: fixed !important;
|
| 376 |
+
background-color: var(--bg-base) !important;
|
| 377 |
+
-webkit-font-smoothing: antialiased !important;
|
| 378 |
+
letter-spacing: -0.006em !important;
|
| 379 |
+
max-width: 1480px !important;
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
/* ── Topbar / header ── */
|
| 383 |
+
.chex-topbar {
|
| 384 |
+
display: flex;
|
| 385 |
+
align-items: center;
|
| 386 |
+
gap: 16px;
|
| 387 |
+
padding: 0 28px;
|
| 388 |
+
height: 60px;
|
| 389 |
+
background: var(--bg-elev);
|
| 390 |
+
backdrop-filter: blur(var(--blur-strong)) saturate(160%);
|
| 391 |
+
-webkit-backdrop-filter: blur(var(--blur-strong)) saturate(160%);
|
| 392 |
+
border-bottom: 1px solid var(--hairline);
|
| 393 |
+
margin-bottom: 0;
|
| 394 |
+
}
|
| 395 |
+
.chex-brand {
|
| 396 |
+
display: flex;
|
| 397 |
+
align-items: center;
|
| 398 |
+
gap: 12px;
|
| 399 |
+
}
|
| 400 |
+
.chex-logo {
|
| 401 |
+
width: 26px; height: 26px;
|
| 402 |
+
border-radius: 8px;
|
| 403 |
+
background: linear-gradient(135deg, var(--fg), rgba(13,18,32,0.7));
|
| 404 |
+
color: var(--bg-base);
|
| 405 |
+
display: grid;
|
| 406 |
+
place-items: center;
|
| 407 |
+
font-family: 'JetBrains Mono', monospace;
|
| 408 |
+
font-weight: 700;
|
| 409 |
+
font-size: 11px;
|
| 410 |
+
letter-spacing: -0.05em;
|
| 411 |
+
box-shadow: 0 4px 14px rgba(15,18,30,0.18), 0 1px 0 rgba(255,255,255,0.25) inset;
|
| 412 |
+
}
|
| 413 |
+
.chex-name {
|
| 414 |
+
font-size: 15px;
|
| 415 |
+
font-weight: 600;
|
| 416 |
+
letter-spacing: -0.01em;
|
| 417 |
+
color: var(--fg);
|
| 418 |
+
}
|
| 419 |
+
.chex-tag {
|
| 420 |
+
font-size: 12px;
|
| 421 |
+
color: var(--fg-muted);
|
| 422 |
+
font-weight: 400;
|
| 423 |
+
padding-left: 12px;
|
| 424 |
+
border-left: 1px solid var(--hairline);
|
| 425 |
+
}
|
| 426 |
+
.chex-status-pill {
|
| 427 |
+
display: inline-flex;
|
| 428 |
+
align-items: center;
|
| 429 |
+
gap: 8px;
|
| 430 |
+
padding: 5px 12px 5px 10px;
|
| 431 |
+
border: 1px solid var(--border);
|
| 432 |
+
border-radius: 999px;
|
| 433 |
+
font-size: 12px;
|
| 434 |
+
color: var(--fg-muted);
|
| 435 |
+
background: var(--bg-elev);
|
| 436 |
+
backdrop-filter: blur(12px);
|
| 437 |
+
-webkit-backdrop-filter: blur(12px);
|
| 438 |
+
font-family: 'JetBrains Mono', monospace;
|
| 439 |
+
white-space: nowrap;
|
| 440 |
+
margin-left: auto;
|
| 441 |
+
}
|
| 442 |
+
.chex-status-dot {
|
| 443 |
+
width: 6px; height: 6px;
|
| 444 |
+
border-radius: 50%;
|
| 445 |
+
background: var(--green);
|
| 446 |
+
box-shadow: 0 0 0 3px rgba(15,157,88,0.22);
|
| 447 |
+
display: inline-block;
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
/* ── Banner ── */
|
| 451 |
+
.chex-banner {
|
| 452 |
+
display: flex;
|
| 453 |
+
align-items: center;
|
| 454 |
+
gap: 12px;
|
| 455 |
+
padding: 11px 20px;
|
| 456 |
+
border-bottom: 1px solid var(--amber-border);
|
| 457 |
+
background: var(--amber-bg);
|
| 458 |
+
backdrop-filter: blur(var(--blur)) saturate(160%);
|
| 459 |
+
-webkit-backdrop-filter: blur(var(--blur)) saturate(160%);
|
| 460 |
+
color: var(--amber);
|
| 461 |
+
font-size: 13px;
|
| 462 |
+
font-family: 'Inter', sans-serif;
|
| 463 |
+
margin-bottom: 0;
|
| 464 |
+
}
|
| 465 |
+
.chex-banner-icon { font-size: 14px; }
|
| 466 |
+
.chex-banner-body { color: var(--fg); font-weight: 400; }
|
| 467 |
+
.chex-banner-body strong { color: var(--fg); font-weight: 600; }
|
| 468 |
+
.chex-banner code {
|
| 469 |
+
font-family: 'JetBrains Mono', monospace;
|
| 470 |
+
font-size: 12px;
|
| 471 |
+
background: rgba(0,0,0,0.06);
|
| 472 |
+
padding: 1px 5px;
|
| 473 |
+
border-radius: 4px;
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
/* ── Tabs ── */
|
| 477 |
+
.tab-nav {
|
| 478 |
+
background: var(--bg-elev) !important;
|
| 479 |
+
backdrop-filter: blur(var(--blur)) saturate(160%) !important;
|
| 480 |
+
-webkit-backdrop-filter: blur(var(--blur)) saturate(160%) !important;
|
| 481 |
+
border-bottom: 1px solid var(--hairline) !important;
|
| 482 |
+
padding: 0 20px !important;
|
| 483 |
+
gap: 2px !important;
|
| 484 |
+
}
|
| 485 |
+
.tab-nav button {
|
| 486 |
+
background: transparent !important;
|
| 487 |
+
border: none !important;
|
| 488 |
+
border-radius: 0 !important;
|
| 489 |
+
padding: 14px 16px !important;
|
| 490 |
+
color: var(--fg-muted) !important;
|
| 491 |
+
font-size: 13px !important;
|
| 492 |
+
font-weight: 500 !important;
|
| 493 |
+
font-family: 'Inter', sans-serif !important;
|
| 494 |
+
position: relative !important;
|
| 495 |
+
white-space: nowrap !important;
|
| 496 |
+
transition: color 0.15s !important;
|
| 497 |
+
}
|
| 498 |
+
.tab-nav button:hover { color: var(--fg) !important; }
|
| 499 |
+
.tab-nav button.selected {
|
| 500 |
+
color: var(--fg) !important;
|
| 501 |
+
background: transparent !important;
|
| 502 |
+
}
|
| 503 |
+
.tab-nav button.selected::after {
|
| 504 |
+
content: "";
|
| 505 |
+
position: absolute;
|
| 506 |
+
left: 12px; right: 12px; bottom: -1px;
|
| 507 |
+
height: 1.5px;
|
| 508 |
+
background: var(--fg);
|
| 509 |
+
border-radius: 2px 2px 0 0;
|
| 510 |
+
}
|
| 511 |
+
|
| 512 |
+
/* ── Glass cards ── */
|
| 513 |
+
.chex-card {
|
| 514 |
+
background: var(--bg-elev);
|
| 515 |
+
backdrop-filter: blur(var(--blur)) saturate(180%);
|
| 516 |
+
-webkit-backdrop-filter: blur(var(--blur)) saturate(180%);
|
| 517 |
+
border: 1px solid var(--border);
|
| 518 |
+
border-radius: var(--radius-lg);
|
| 519 |
+
box-shadow: var(--shadow-md);
|
| 520 |
+
overflow: hidden;
|
| 521 |
+
margin-bottom: 0;
|
| 522 |
+
}
|
| 523 |
+
.chex-card-header {
|
| 524 |
+
padding: 16px 20px;
|
| 525 |
+
display: flex;
|
| 526 |
+
align-items: center;
|
| 527 |
+
justify-content: space-between;
|
| 528 |
+
gap: 12px;
|
| 529 |
+
border-bottom: 1px solid var(--hairline);
|
| 530 |
+
}
|
| 531 |
+
.chex-card-title {
|
| 532 |
+
font-size: 13.5px;
|
| 533 |
+
font-weight: 600;
|
| 534 |
+
letter-spacing: -0.01em;
|
| 535 |
+
display: inline-flex;
|
| 536 |
+
align-items: center;
|
| 537 |
+
gap: 10px;
|
| 538 |
+
color: var(--fg);
|
| 539 |
+
white-space: nowrap;
|
| 540 |
+
}
|
| 541 |
+
.chex-card-kicker {
|
| 542 |
+
font-family: 'JetBrains Mono', monospace;
|
| 543 |
+
font-size: 11px;
|
| 544 |
+
color: var(--fg-subtle);
|
| 545 |
+
font-weight: 400;
|
| 546 |
+
}
|
| 547 |
+
|
| 548 |
+
/* ── Override Gradio inputs to match design ── */
|
| 549 |
+
.gradio-container input[type="text"],
|
| 550 |
+
.gradio-container textarea,
|
| 551 |
+
.gradio-container select,
|
| 552 |
+
.gradio-container .gr-input,
|
| 553 |
+
label.block textarea,
|
| 554 |
+
label.block input {
|
| 555 |
+
background: var(--bg-input) !important;
|
| 556 |
+
backdrop-filter: blur(10px) !important;
|
| 557 |
+
-webkit-backdrop-filter: blur(10px) !important;
|
| 558 |
+
border: 1px solid var(--border) !important;
|
| 559 |
+
border-radius: var(--radius) !important;
|
| 560 |
+
color: var(--fg) !important;
|
| 561 |
+
font-family: 'Inter', sans-serif !important;
|
| 562 |
+
font-size: 13px !important;
|
| 563 |
+
transition: border-color 0.18s, box-shadow 0.18s !important;
|
| 564 |
+
}
|
| 565 |
+
label.block textarea:focus,
|
| 566 |
+
label.block input:focus {
|
| 567 |
+
border-color: var(--border-strong) !important;
|
| 568 |
+
background: var(--bg-elev-strong) !important;
|
| 569 |
+
box-shadow: 0 0 0 4px rgba(13,18,32,0.08) !important;
|
| 570 |
+
outline: none !important;
|
| 571 |
+
}
|
| 572 |
+
|
| 573 |
+
/* Labels */
|
| 574 |
+
label.block > span,
|
| 575 |
+
.gr-form > label > span {
|
| 576 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 577 |
+
font-size: 10.5px !important;
|
| 578 |
+
font-weight: 500 !important;
|
| 579 |
+
text-transform: uppercase !important;
|
| 580 |
+
letter-spacing: 0.08em !important;
|
| 581 |
+
color: var(--fg-subtle) !important;
|
| 582 |
+
}
|
| 583 |
+
|
| 584 |
+
/* ── Buttons ── */
|
| 585 |
+
.gradio-container button.primary,
|
| 586 |
+
.gradio-container .gr-button-primary {
|
| 587 |
+
background: var(--fg) !important;
|
| 588 |
+
color: var(--bg-base) !important;
|
| 589 |
+
border: 1px solid var(--fg) !important;
|
| 590 |
+
border-radius: var(--radius) !important;
|
| 591 |
+
font-family: 'Inter', sans-serif !important;
|
| 592 |
+
font-weight: 500 !important;
|
| 593 |
+
font-size: 13px !important;
|
| 594 |
+
padding: 10px 16px !important;
|
| 595 |
+
box-shadow: 0 6px 18px rgba(13,18,32,0.28), 0 1px 0 rgba(255,255,255,0.15) inset !important;
|
| 596 |
+
transition: opacity 0.18s !important;
|
| 597 |
+
}
|
| 598 |
+
.gradio-container button.primary:hover,
|
| 599 |
+
.gradio-container .gr-button-primary:hover { opacity: 0.92 !important; }
|
| 600 |
+
|
| 601 |
+
.gradio-container button.secondary,
|
| 602 |
+
.gradio-container .gr-button-secondary {
|
| 603 |
+
background: var(--bg-elev) !important;
|
| 604 |
+
backdrop-filter: blur(10px) !important;
|
| 605 |
+
color: var(--fg) !important;
|
| 606 |
+
border: 1px solid var(--border) !important;
|
| 607 |
+
border-radius: var(--radius) !important;
|
| 608 |
+
font-family: 'Inter', sans-serif !important;
|
| 609 |
+
font-weight: 500 !important;
|
| 610 |
+
font-size: 13px !important;
|
| 611 |
+
padding: 10px 16px !important;
|
| 612 |
+
transition: background 0.18s, border-color 0.18s !important;
|
| 613 |
+
}
|
| 614 |
+
.gradio-container button.secondary:hover,
|
| 615 |
+
.gradio-container .gr-button-secondary:hover {
|
| 616 |
+
background: var(--bg-elev-strong) !important;
|
| 617 |
+
border-color: var(--border-strong) !important;
|
| 618 |
+
}
|
| 619 |
+
|
| 620 |
+
/* Small ghost buttons (load sample etc.) */
|
| 621 |
+
button.lg.secondary.svelte-cmf5ev,
|
| 622 |
+
button[class*="sm"] {
|
| 623 |
+
font-size: 12px !important;
|
| 624 |
+
padding: 7px 11px !important;
|
| 625 |
+
}
|
| 626 |
+
|
| 627 |
+
/* ── Dataframe / benchmark table ── */
|
| 628 |
+
.gradio-container table,
|
| 629 |
+
.gradio-container .gr-dataframe table {
|
| 630 |
+
background: var(--bg-elev) !important;
|
| 631 |
+
backdrop-filter: blur(var(--blur)) saturate(180%) !important;
|
| 632 |
+
border: 1px solid var(--border) !important;
|
| 633 |
+
border-radius: var(--radius-lg) !important;
|
| 634 |
+
box-shadow: var(--shadow-md) !important;
|
| 635 |
+
font-size: 13px !important;
|
| 636 |
+
font-family: 'Inter', sans-serif !important;
|
| 637 |
+
border-collapse: separate !important;
|
| 638 |
+
border-spacing: 0 !important;
|
| 639 |
+
overflow: hidden !important;
|
| 640 |
+
width: 100% !important;
|
| 641 |
+
}
|
| 642 |
+
.gradio-container th {
|
| 643 |
+
background: var(--bg-sunken) !important;
|
| 644 |
+
border-bottom: 1px solid var(--hairline) !important;
|
| 645 |
+
padding: 14px 18px !important;
|
| 646 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 647 |
+
font-size: 10.5px !important;
|
| 648 |
+
text-transform: uppercase !important;
|
| 649 |
+
letter-spacing: 0.08em !important;
|
| 650 |
+
color: var(--fg-muted) !important;
|
| 651 |
+
font-weight: 500 !important;
|
| 652 |
+
text-align: left !important;
|
| 653 |
+
}
|
| 654 |
+
.gradio-container td {
|
| 655 |
+
padding: 16px 18px !important;
|
| 656 |
+
border-top: 1px solid var(--hairline) !important;
|
| 657 |
+
vertical-align: top !important;
|
| 658 |
+
line-height: 1.6 !important;
|
| 659 |
+
color: var(--fg) !important;
|
| 660 |
+
}
|
| 661 |
+
.gradio-container tr:first-child td { border-top: none !important; }
|
| 662 |
+
|
| 663 |
+
/* ── Markdown inside Gradio ── */
|
| 664 |
+
.gradio-container .prose,
|
| 665 |
+
.gradio-container .md {
|
| 666 |
+
color: var(--fg) !important;
|
| 667 |
+
font-family: 'Inter', sans-serif !important;
|
| 668 |
+
}
|
| 669 |
+
.gradio-container .prose h2 {
|
| 670 |
+
font-size: 19px !important;
|
| 671 |
+
font-weight: 600 !important;
|
| 672 |
+
letter-spacing: -0.02em !important;
|
| 673 |
+
color: var(--fg) !important;
|
| 674 |
+
margin-bottom: 10px !important;
|
| 675 |
+
}
|
| 676 |
+
.gradio-container .prose h3 {
|
| 677 |
+
font-size: 14px !important;
|
| 678 |
+
font-weight: 600 !important;
|
| 679 |
+
letter-spacing: -0.01em !important;
|
| 680 |
+
color: var(--fg) !important;
|
| 681 |
+
margin-bottom: 8px !important;
|
| 682 |
+
}
|
| 683 |
+
.gradio-container .prose p {
|
| 684 |
+
color: var(--fg-muted) !important;
|
| 685 |
+
font-size: 13px !important;
|
| 686 |
+
line-height: 1.65 !important;
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
/* ── Bench intro card ── */
|
| 690 |
+
.chex-bench-intro {
|
| 691 |
+
background: var(--bg-elev);
|
| 692 |
+
backdrop-filter: blur(var(--blur)) saturate(180%);
|
| 693 |
+
-webkit-backdrop-filter: blur(var(--blur)) saturate(180%);
|
| 694 |
+
border: 1px solid var(--border);
|
| 695 |
+
border-radius: var(--radius-lg);
|
| 696 |
+
box-shadow: var(--shadow-md);
|
| 697 |
+
padding: 24px 28px;
|
| 698 |
+
margin-bottom: 22px;
|
| 699 |
+
}
|
| 700 |
+
.chex-bench-intro h2 {
|
| 701 |
+
margin: 0 0 10px;
|
| 702 |
+
font-size: 19px;
|
| 703 |
+
font-weight: 600;
|
| 704 |
+
letter-spacing: -0.02em;
|
| 705 |
+
color: var(--fg);
|
| 706 |
+
}
|
| 707 |
+
.chex-bench-intro p {
|
| 708 |
+
margin: 0;
|
| 709 |
+
color: var(--fg-muted);
|
| 710 |
+
font-size: 13px;
|
| 711 |
+
line-height: 1.65;
|
| 712 |
+
font-family: 'Inter', sans-serif;
|
| 713 |
+
}
|
| 714 |
+
.chex-bench-stats {
|
| 715 |
+
display: grid;
|
| 716 |
+
grid-template-columns: repeat(3, 1fr);
|
| 717 |
+
gap: 8px;
|
| 718 |
+
margin-top: 18px;
|
| 719 |
+
}
|
| 720 |
+
.chex-bench-stat {
|
| 721 |
+
background: var(--bg-sunken);
|
| 722 |
+
border: 1px solid var(--hairline);
|
| 723 |
+
border-radius: var(--radius);
|
| 724 |
+
padding: 12px 14px;
|
| 725 |
+
}
|
| 726 |
+
.chex-bench-stat .v {
|
| 727 |
+
font-family: 'Inter', sans-serif;
|
| 728 |
+
font-size: 20px;
|
| 729 |
+
font-weight: 600;
|
| 730 |
+
letter-spacing: -0.025em;
|
| 731 |
+
color: var(--fg);
|
| 732 |
+
}
|
| 733 |
+
.chex-bench-stat .v.red { color: #d23131; }
|
| 734 |
+
.chex-bench-stat .v.green { color: #0f9d58; }
|
| 735 |
+
.chex-bench-stat .k {
|
| 736 |
+
font-size: 10px;
|
| 737 |
+
text-transform: uppercase;
|
| 738 |
+
letter-spacing: 0.08em;
|
| 739 |
+
color: var(--fg-subtle);
|
| 740 |
+
font-family: 'JetBrains Mono', monospace;
|
| 741 |
+
}
|
| 742 |
+
|
| 743 |
+
/* ── Footer ── */
|
| 744 |
+
.chex-footer {
|
| 745 |
+
border-top: 1px solid var(--hairline);
|
| 746 |
+
padding: 14px 28px;
|
| 747 |
+
display: flex;
|
| 748 |
+
align-items: center;
|
| 749 |
+
gap: 18px;
|
| 750 |
+
color: var(--fg-subtle);
|
| 751 |
+
font-size: 11.5px;
|
| 752 |
+
font-family: 'JetBrains Mono', monospace;
|
| 753 |
+
background: var(--bg-elev);
|
| 754 |
+
backdrop-filter: blur(var(--blur));
|
| 755 |
+
-webkit-backdrop-filter: blur(var(--blur));
|
| 756 |
+
margin-top: 28px;
|
| 757 |
+
}
|
| 758 |
+
.chex-footer .sep { opacity: 0.5; }
|
| 759 |
+
|
| 760 |
+
/* ── Output textboxes ── */
|
| 761 |
+
.gradio-container .gr-textbox[data-testid],
|
| 762 |
+
.gradio-container textarea[readonly] {
|
| 763 |
+
background: var(--bg-sunken) !important;
|
| 764 |
+
border: 1px solid var(--hairline) !important;
|
| 765 |
+
font-size: 13px !important;
|
| 766 |
+
line-height: 1.65 !important;
|
| 767 |
+
color: var(--fg) !important;
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
/* Scrollbars */
|
| 771 |
+
*::-webkit-scrollbar { width: 10px; height: 10px; }
|
| 772 |
+
*::-webkit-scrollbar-thumb {
|
| 773 |
+
background: var(--border-strong);
|
| 774 |
+
border-radius: 999px;
|
| 775 |
+
border: 2px solid transparent;
|
| 776 |
+
background-clip: padding-box;
|
| 777 |
+
}
|
| 778 |
+
*::-webkit-scrollbar-track { background: transparent; }
|
| 779 |
+
"""
|
| 780 |
+
|
| 781 |
+
TOPBAR_HTML = """
|
| 782 |
+
<div class="chex-topbar">
|
| 783 |
+
<div class="chex-brand">
|
| 784 |
+
<div class="chex-logo">CX</div>
|
| 785 |
+
<div class="chex-name">CHEX</div>
|
| 786 |
+
<div class="chex-tag">grounded answers from documents</div>
|
| 787 |
+
</div>
|
| 788 |
+
<div class="chex-status-pill">
|
| 789 |
+
<span class="chex-status-dot"></span>
|
| 790 |
+
<span>MI300X · ready</span>
|
| 791 |
+
</div>
|
| 792 |
+
</div>
|
| 793 |
+
"""
|
| 794 |
+
|
| 795 |
+
FOOTER_HTML = """
|
| 796 |
+
<div class="chex-footer">
|
| 797 |
+
<span>chex/v0.4.1</span>
|
| 798 |
+
<span class="sep">·</span>
|
| 799 |
+
<span>endpoint: mi300x-east-2</span>
|
| 800 |
+
<span class="sep">·</span>
|
| 801 |
+
<span>tokens/s 142.7</span>
|
| 802 |
+
</div>
|
| 803 |
+
"""
|
| 804 |
+
|
| 805 |
+
BENCH_INTRO_HTML = """
|
| 806 |
+
<div class="chex-bench-intro">
|
| 807 |
+
<h2>Why grounding matters</h2>
|
| 808 |
+
<p>We ran the same questions through a base instruction-tuned model and through CHEX.
|
| 809 |
+
The base model invented or extrapolated answers in 4 of 5 cases — confident, plausible, wrong.
|
| 810 |
+
CHEX returned a verifiable label, a verbatim citation, and refused to answer when the source was silent.</p>
|
| 811 |
+
<div class="chex-bench-stats">
|
| 812 |
+
<div class="chex-bench-stat"><div class="v red">4/5</div><div class="k">Base hallucinations</div></div>
|
| 813 |
+
<div class="chex-bench-stat"><div class="v green">5/5</div><div class="k">CHEX correct</div></div>
|
| 814 |
+
<div class="chex-bench-stat"><div class="v">100%</div><div class="k">Cited verbatim</div></div>
|
| 815 |
+
</div>
|
| 816 |
+
</div>
|
| 817 |
+
"""
|
| 818 |
+
|
| 819 |
with gr.Blocks(
|
| 820 |
+
title="CHEX - Document Intelligence",
|
| 821 |
+
theme=gr.themes.Base(),
|
| 822 |
+
css=CHEX_CSS,
|
| 823 |
) as demo:
|
| 824 |
+
gr.HTML(TOPBAR_HTML)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 825 |
|
| 826 |
if WARNING_HTML:
|
| 827 |
gr.HTML(WARNING_HTML)
|
|
|
|
| 885 |
# Suggested questions shown when loading a sample
|
| 886 |
suggested_q = gr.Markdown("", visible=False)
|
| 887 |
|
| 888 |
+
# ------------------------------------------------------------------ #
|
| 889 |
+
# Tab 2: Analyse Bank Statement #
|
| 890 |
+
# ------------------------------------------------------------------ #
|
| 891 |
+
with gr.Tab("Analyse Bank Statement"):
|
| 892 |
+
with gr.Row():
|
| 893 |
+
# Left column: statement input (3 sub-tabs)
|
| 894 |
+
with gr.Column(scale=2):
|
| 895 |
+
gr.Markdown("### Bank Statement Input")
|
| 896 |
+
with gr.Tabs():
|
| 897 |
+
with gr.Tab("Paste Text"):
|
| 898 |
+
bank_paste_input = gr.Textbox(
|
| 899 |
+
label="Paste bank statement text",
|
| 900 |
+
lines=20,
|
| 901 |
+
placeholder="Paste your bank statement here, or load the sample below...",
|
| 902 |
+
show_label=False,
|
| 903 |
+
)
|
| 904 |
+
btn_load_statement = gr.Button("Load Sample Statement", size="sm")
|
| 905 |
+
with gr.Tab("Upload PDF"):
|
| 906 |
+
bank_pdf_input = gr.File(
|
| 907 |
+
label="Upload PDF bank statement",
|
| 908 |
+
file_types=[".pdf"],
|
| 909 |
+
)
|
| 910 |
+
with gr.Tab("Upload CSV"):
|
| 911 |
+
bank_csv_input = gr.File(
|
| 912 |
+
label="Upload CSV bank statement",
|
| 913 |
+
file_types=[".csv"],
|
| 914 |
+
)
|
| 915 |
+
|
| 916 |
+
# Right column: summary + Q&A
|
| 917 |
+
with gr.Column(scale=1):
|
| 918 |
+
analyse_stmt_btn = gr.Button(
|
| 919 |
+
"Analyse Statement",
|
| 920 |
+
variant="primary",
|
| 921 |
+
)
|
| 922 |
+
summary_output = gr.Markdown(
|
| 923 |
+
value="*Run 'Analyse Statement' to generate a financial summary.*"
|
| 924 |
+
)
|
| 925 |
+
|
| 926 |
+
gr.Markdown("---")
|
| 927 |
+
gr.Markdown("### Ask a Question")
|
| 928 |
+
bank_question_input = gr.Textbox(
|
| 929 |
+
label="Question about the statement",
|
| 930 |
+
placeholder="e.g., What was the largest debit this month?",
|
| 931 |
+
lines=2,
|
| 932 |
+
show_label=False,
|
| 933 |
+
)
|
| 934 |
+
bank_ask_btn = gr.Button("Ask", variant="secondary")
|
| 935 |
+
|
| 936 |
+
gr.Markdown("### Q&A Result")
|
| 937 |
+
bank_label_display = gr.HTML(
|
| 938 |
+
value=format_label_html("N/A"),
|
| 939 |
+
label="Classification",
|
| 940 |
+
)
|
| 941 |
+
bank_answer_output = gr.Textbox(
|
| 942 |
+
label="Answer",
|
| 943 |
+
interactive=False,
|
| 944 |
+
lines=3,
|
| 945 |
+
)
|
| 946 |
+
bank_citation_output = gr.Textbox(
|
| 947 |
+
label="Citation (verbatim from statement)",
|
| 948 |
+
interactive=False,
|
| 949 |
+
lines=3,
|
| 950 |
+
)
|
| 951 |
+
bank_reasoning_output = gr.Textbox(
|
| 952 |
+
label="Reasoning",
|
| 953 |
+
interactive=False,
|
| 954 |
+
lines=2,
|
| 955 |
+
)
|
| 956 |
+
|
| 957 |
+
# Hidden state: extracted statement text shared between summary and Q&A
|
| 958 |
+
bank_statement_state = gr.State("")
|
| 959 |
+
|
| 960 |
# ------------------------------------------------------------------ #
|
| 961 |
# Tab 2: Benchmark Demo #
|
| 962 |
# ------------------------------------------------------------------ #
|
| 963 |
with gr.Tab("Benchmark Demo"):
|
| 964 |
+
gr.HTML(BENCH_INTRO_HTML)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 965 |
gr.Dataframe(
|
| 966 |
value=BENCHMARK_DF,
|
| 967 |
headers=list(BENCHMARK_DF.columns),
|
|
|
|
| 970 |
interactive=False,
|
| 971 |
)
|
| 972 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 973 |
# ------------------------------------------------------------------ #
|
| 974 |
# Event handlers #
|
| 975 |
# ------------------------------------------------------------------ #
|
|
|
|
| 1024 |
outputs=[label_display, answer_output, citation_output, reasoning_output],
|
| 1025 |
)
|
| 1026 |
|
| 1027 |
+
# ------------------------------------------------------------------ #
|
| 1028 |
+
# Bank Statement event handlers #
|
| 1029 |
+
# ------------------------------------------------------------------ #
|
| 1030 |
+
|
| 1031 |
+
btn_load_statement.click(
|
| 1032 |
+
fn=lambda: SAMPLE_STATEMENT,
|
| 1033 |
+
inputs=[],
|
| 1034 |
+
outputs=[bank_paste_input],
|
| 1035 |
+
)
|
| 1036 |
+
|
| 1037 |
+
analyse_stmt_btn.click(
|
| 1038 |
+
fn=analyse_bank_statement,
|
| 1039 |
+
inputs=[bank_paste_input, bank_pdf_input, bank_csv_input],
|
| 1040 |
+
outputs=[summary_output, bank_statement_state],
|
| 1041 |
+
)
|
| 1042 |
+
|
| 1043 |
+
bank_ask_btn.click(
|
| 1044 |
+
fn=bank_qa,
|
| 1045 |
+
inputs=[bank_statement_state, bank_question_input],
|
| 1046 |
+
outputs=[bank_label_display, bank_answer_output, bank_citation_output, bank_reasoning_output],
|
| 1047 |
+
)
|
| 1048 |
+
|
| 1049 |
+
bank_question_input.submit(
|
| 1050 |
+
fn=bank_qa,
|
| 1051 |
+
inputs=[bank_statement_state, bank_question_input],
|
| 1052 |
+
outputs=[bank_label_display, bank_answer_output, bank_citation_output, bank_reasoning_output],
|
| 1053 |
+
)
|
| 1054 |
+
|
| 1055 |
+
gr.HTML(FOOTER_HTML)
|
| 1056 |
+
|
| 1057 |
|
| 1058 |
if __name__ == "__main__":
|
| 1059 |
+
demo.launch(show_error=True)
|