| """ |
| CHEX - Document Intelligence |
| HuggingFace Spaces Gradio Demo β fully self-contained (no relative imports) |
| |
| Tab 1: Analyze Contract β paste a contract, ask a question, get a structured answer |
| Tab 2: Benchmark Demo β side-by-side table showing base model hallucinations vs CHEX |
| Tab 3: Analyse Bank Statement β paste / upload a bank statement, get a summary + Q&A |
| """ |
|
|
| from __future__ import annotations |
|
|
| import csv |
| import datetime as _dt |
| import importlib.util |
| import io |
| import json |
| import os |
| import re |
| import tempfile |
| from enum import Enum |
| from pathlib import Path |
| from typing import Optional |
|
|
| import gradio as gr |
| from pydantic import BaseModel |
|
|
| |
| |
| |
|
|
| class Label(str, Enum): |
| GROUNDED = "GROUNDED" |
| ABSENT = "ABSENT" |
| CONTRADICTS_PRIOR = "CONTRADICTS_PRIOR" |
|
|
|
|
| class ModelOutput(BaseModel): |
| question: str |
| label: Label |
| answer: Optional[str] = None |
| citation: Optional[str] = None |
| reasoning: str |
|
|
|
|
| class BankStatementSummary(BaseModel): |
| total_credits: Optional[str] = None |
| total_debits: Optional[str] = None |
| largest_transaction: Optional[str] = None |
| recurring_payments: Optional[list[str]] = None |
| flags: Optional[list[str]] = None |
| raw_reasoning: str |
|
|
|
|
| |
| |
| |
|
|
| SYSTEM_PROMPT = """\ |
| You are a contract analysis assistant specializing in detecting hallucinations \ |
| and calibrated uncertainty. Given a contract text and a question about a specific \ |
| clause, output a single JSON object with exactly these fields: |
| |
| question : the question asked (copy verbatim) |
| label : one of GROUNDED, ABSENT, or CONTRADICTS_PRIOR |
| - GROUNDED : the information exists verbatim in the contract |
| - ABSENT : the contract does not contain this clause at all |
| - CONTRADICTS_PRIOR: the contract contains a clause but it deviates \ |
| from standard legal terms (e.g., inverted obligations, non-standard timeframes) |
| answer : the answer text if GROUNDED or CONTRADICTS_PRIOR, null if ABSENT |
| citation : the exact verbatim span from the contract that supports the answer, \ |
| null if ABSENT |
| reasoning : one sentence explaining your classification |
| |
| Output ONLY the JSON object. No preamble, no markdown fences, no text outside the JSON. |
| |
| ### Example 1 β GROUNDED |
| |
| [CONTRACT] |
| This Software License Agreement ("Agreement") is entered into as of January 1, 2024, \ |
| between TechVision Inc. ("Licensor") and GlobalCorp Ltd. ("Licensee"). The Agreement \ |
| shall remain in effect for a period of two (2) years from the Effective Date, unless \ |
| earlier terminated pursuant to Section 8. Licensor grants Licensee a non-exclusive, \ |
| non-transferable license to use the Software solely for Licensee's internal business \ |
| purposes. |
| [/CONTRACT] |
| |
| Question: What is the duration of this agreement? |
| |
| {"question": "What is the duration of this agreement?", "label": "GROUNDED", \ |
| "answer": "Two years from the Effective Date", \ |
| "citation": "remain in effect for a period of two (2) years from the Effective Date", \ |
| "reasoning": "The contract explicitly specifies a two-year term starting from the Effective Date."} |
| |
| ### Example 2 β ABSENT |
| |
| [CONTRACT] |
| The Licensee shall pay a monthly fee of five hundred dollars ($500.00). Payment is due \ |
| on the first business day of each calendar month. Late payments shall accrue interest \ |
| at a rate of one and one-half percent (1.5%) per month. Licensee shall maintain \ |
| accurate records of all uses of the Software. |
| [/CONTRACT] |
| |
| Question: Does this agreement include a limitation of liability clause? |
| |
| {"question": "Does this agreement include a limitation of liability clause?", \ |
| "label": "ABSENT", "answer": null, "citation": null, \ |
| "reasoning": "No limitation of liability clause appears anywhere in the provided contract text."} |
| |
| ### Example 3 β CONTRADICTS_PRIOR |
| |
| [CONTRACT] |
| This Non-Disclosure Agreement is made between AlphaTech Solutions ("Discloser") and \ |
| Beta Dynamics Corp. ("Recipient"). The Recipient shall not disclose Confidential \ |
| Information to any third party. NON-COMPETE: The Recipient shall engage in any \ |
| business activity that competes with the Discloser's primary operations during the \ |
| term and for a period of 24 months thereafter. The Recipient shall not take any \ |
| steps to protect Discloser's trade secrets. |
| [/CONTRACT] |
| |
| Question: Does this agreement restrict the Recipient from competing with the Discloser? |
| |
| {"question": "Does this agreement restrict the Recipient from competing with the Discloser?", \ |
| "label": "CONTRADICTS_PRIOR", \ |
| "answer": "The non-compete clause has inverted obligations β it permits competition rather than prohibiting it", \ |
| "citation": "The Recipient shall engage in any business activity that competes with the Discloser's primary operations", \ |
| "reasoning": "The clause uses 'shall engage' instead of 'shall not engage', inverting the standard non-compete obligation."} |
| """ |
|
|
| BANK_SYSTEM_PROMPT = """\ |
| You are a financial analysis assistant specialising in bank statement review. \ |
| Given a bank statement (plain text, CSV/Excel-derived, OFX/QFX-derived, or PDF-extracted) and either a \ |
| summary request or a specific question, produce a single JSON object. |
| |
| For SUMMARY mode (question is "SUMMARISE"): |
| Output a JSON object with exactly these fields: |
| total_credits : total money received (e.g. "Β£3,420.50") or null |
| total_debits : total money spent (e.g. "Β£2,105.30") or null |
| largest_transaction: description + amount of the single largest transaction or null |
| recurring_payments : list of detected recurring charges (e.g. ["Netflix Β£9.99", "Gym Β£35.00"]) or [] |
| flags : list of unusual or suspicious items (e.g. ["Large cash withdrawal Β£800"]) or [] |
| raw_reasoning : one sentence summarising your analysis |
| |
| For Q&A mode (any other question), output a JSON object with exactly these fields: |
| question : the question asked (copy verbatim) |
| label : one of GROUNDED, ABSENT, or CONTRADICTS_PRIOR |
| answer : the answer text if GROUNDED or CONTRADICTS_PRIOR, null if ABSENT |
| citation : the exact verbatim span from the statement, null if ABSENT |
| reasoning : one sentence explaining your classification |
| |
| Output ONLY the JSON object. No preamble, no markdown fences, no text outside the JSON. |
| """ |
|
|
| STRICT_SUFFIX = ( |
| "\n\nIMPORTANT: You must output ONLY a valid JSON object. " |
| "Do not include any text before or after the JSON." |
| ) |
|
|
|
|
| def _build_contract_messages(contract_text: str, question: str) -> list[dict]: |
| return [ |
| {"role": "system", "content": SYSTEM_PROMPT}, |
| {"role": "user", "content": f"[CONTRACT]\n{contract_text}\n[/CONTRACT]\n\nQuestion: {question}"}, |
| ] |
|
|
|
|
| def _build_bank_messages(statement_text: str, question: str) -> list[dict]: |
| return [ |
| {"role": "system", "content": BANK_SYSTEM_PROMPT}, |
| {"role": "user", "content": f"[STATEMENT]\n{statement_text}\n[/STATEMENT]\n\nQuestion: {question}"}, |
| ] |
|
|
|
|
| |
| |
| |
|
|
| def _extract_json_str(raw_text: str) -> str: |
| match = re.search(r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)?\}", raw_text, re.DOTALL) |
| if not match: |
| match = re.search(r"\{.*\}", raw_text, re.DOTALL) |
| if not match: |
| raise ValueError(f"No JSON object found in model output: {raw_text[:300]!r}") |
| return match.group() |
|
|
|
|
| def _parse_model_output(raw_text: str, question: str) -> ModelOutput: |
| json_str = _extract_json_str(raw_text) |
| return ModelOutput.model_validate_json(json_str) |
|
|
|
|
| def _parse_summary(raw_text: str) -> BankStatementSummary: |
| data = json.loads(_extract_json_str(raw_text)) |
| return BankStatementSummary( |
| total_credits=data.get("total_credits"), |
| total_debits=data.get("total_debits"), |
| largest_transaction=data.get("largest_transaction"), |
| recurring_payments=data.get("recurring_payments") or [], |
| flags=data.get("flags") or [], |
| raw_reasoning=data.get("raw_reasoning", ""), |
| ) |
|
|
|
|
| |
| |
| |
|
|
| MLX_SERVER_URL = os.environ.get("MLX_SERVER_URL", "").rstrip("/") |
| SAMPLE_DIR = Path(__file__).parent / "sample_contracts" |
| STATEMENT_DIR = Path(__file__).parent / "sample_statements" |
|
|
| model_load_error: Optional[str] = None |
|
|
| if not MLX_SERVER_URL: |
| model_load_error = "MLX_SERVER_URL not set. Set it in Space secrets to your Mac's ngrok URL." |
| print(f"WARNING: {model_load_error}") |
| else: |
| print(f"MLX server configured at: {MLX_SERVER_URL}") |
|
|
|
|
| |
| |
| |
|
|
| MAX_CHARS = 32000 |
|
|
|
|
| def _truncate(text: str) -> str: |
| if len(text) > MAX_CHARS: |
| print(f"WARNING: Text truncated from {len(text)} to {MAX_CHARS} chars.") |
| return text[:MAX_CHARS] |
| return text |
|
|
|
|
| def _apply_messages(messages: list[dict], strict: bool = False) -> list[dict]: |
| if strict: |
| messages = list(messages) |
| messages[-1] = dict(messages[-1]) |
| messages[-1]["content"] += STRICT_SUFFIX |
| return messages |
|
|
|
|
| def _run_inference(messages: list[dict]) -> str: |
| import urllib.request |
| payload = json.dumps({ |
| "messages": messages, |
| "max_tokens": 512, |
| "temperature": 0.0, |
| }).encode() |
| req = urllib.request.Request( |
| f"{MLX_SERVER_URL}/v1/chat/completions", |
| data=payload, |
| headers={"Content-Type": "application/json"}, |
| method="POST", |
| ) |
| with urllib.request.urlopen(req, timeout=120) as resp: |
| data = json.loads(resp.read()) |
| return data["choices"][0]["message"]["content"] |
|
|
|
|
| |
| |
| |
|
|
| def _read_sample(filename: str) -> str: |
| p = SAMPLE_DIR / filename |
| if p.exists(): |
| return p.read_text(encoding="utf-8") |
| return f"[Sample contract '{filename}' not found. Place it in demo/sample_contracts/]" |
|
|
|
|
| SOFTWARE_LICENSE = _read_sample("software_license.txt") |
| NDA = _read_sample("nda.txt") |
| SERVICE_AGREEMENT = _read_sample("service_agreement.txt") |
|
|
| SAMPLE_QUESTIONS = { |
| "software_license.txt": "What is the limitation of liability in this agreement?", |
| "nda.txt": "Does this agreement include a non-compete clause?", |
| "service_agreement.txt": "Does this contract include a termination for convenience clause?", |
| } |
|
|
|
|
| def _read_sample_statement(filename: str) -> str: |
| p = STATEMENT_DIR / filename |
| if p.exists(): |
| return p.read_text(encoding="utf-8") |
| return f"[Sample statement '{filename}' not found. Place it in demo/sample_statements/]" |
|
|
|
|
| SAMPLE_STATEMENT = _read_sample_statement("sample_statement.txt") |
|
|
|
|
| |
| |
| |
|
|
| _BADGE_CFG = { |
| "GROUNDED": ("#0f9d58", "rgba(34,197,94,0.10)", "rgba(34,197,94,0.28)", "β"), |
| "ABSENT": ("#d23131", "rgba(239,68,68,0.09)", "rgba(239,68,68,0.28)", "β"), |
| "CONTRADICTS_PRIOR": ("#b87800", "rgba(245,158,11,0.10)", "rgba(245,158,11,0.30)", "β "), |
| "N/A": ("#8a91a3", "rgba(139,145,163,0.10)","rgba(139,145,163,0.25)","β"), |
| "ERROR": ("#991b1b", "rgba(220,38,38,0.10)", "rgba(220,38,38,0.32)", "!"), |
| } |
|
|
|
|
| def format_label_html(label: str) -> str: |
| fg, bg, border, icon = _BADGE_CFG.get(label, _BADGE_CFG["N/A"]) |
| display = "CONTRADICTS PRIOR" if label == "CONTRADICTS_PRIOR" else label |
| return ( |
| f'<div style="display:inline-flex;align-items:center;gap:8px;' |
| f'padding:11px 16px;border-radius:10px;border:1px solid {border};' |
| f'background:{bg};color:{fg};font-family:\'Inter\',sans-serif;' |
| f'font-size:12.5px;font-weight:600;letter-spacing:0.02em;' |
| f'backdrop-filter:blur(10px);">' |
| f'<span style="width:14px;height:14px;display:grid;place-items:center;' |
| f'font-size:13px;">{icon}</span>' |
| f'<span>{display}</span></div>' |
| ) |
|
|
|
|
| |
| |
| |
|
|
| def analyze_contract(contract_text: str, question: str) -> tuple[str, str, str, str]: |
| if not contract_text.strip(): |
| return format_label_html("N/A"), "", "", "Please paste a contract above." |
| if not question.strip(): |
| return format_label_html("N/A"), "", "", "Please enter a question." |
| if not MLX_SERVER_URL: |
| return ( |
| format_label_html("N/A"), |
| "Model not loaded", |
| "", |
| f"Model failed to load: {model_load_error}.", |
| ) |
|
|
| contract_text = _truncate(contract_text) |
| messages = _build_contract_messages(contract_text, question) |
|
|
| for attempt in range(2): |
| msgs = _apply_messages(messages, strict=(attempt == 1)) |
| try: |
| raw = _run_inference(msgs) |
| result = _parse_model_output(raw, question) |
| label_html = format_label_html(result.label.value) |
| answer = result.answer or "(none β clause is absent or not applicable)" |
| citation = result.citation or "(none)" |
| return label_html, answer, citation, result.reasoning |
| except Exception as e: |
| if attempt == 0: |
| print(f" Parse attempt 1 failed ({e}). Retrying with stricter prompt...") |
| else: |
| print(f" Parse attempt 2 failed ({e}). Returning safe fallback.") |
|
|
| return ( |
| format_label_html("ABSENT"), |
| "(none β clause is absent or not applicable)", |
| "(none)", |
| "Model output could not be parsed as valid JSON after two attempts.", |
| ) |
|
|
|
|
| def _get_statement_text( |
| paste_text: str, |
| pdf_file, |
| pdf_password: str | None, |
| csv_file, |
| txt_file, |
| xlsx_file, |
| ofx_file, |
| ) -> tuple[str, str]: |
| |
| texts, errors = _get_statement_texts( |
| paste_text, |
| pdf_file, |
| pdf_password, |
| csv_file, |
| txt_file, |
| xlsx_file, |
| ofx_file, |
| ) |
| if not texts: |
| return ( |
| "", |
| errors[0] |
| if errors |
| else "Please paste a bank statement or upload a PDF / CSV / TXT / XLSX / OFX/QFX file." |
| ) |
| return texts[0], "" |
|
|
|
|
| def _ensure_file_list(files) -> list: |
| if files is None: |
| return [] |
| if isinstance(files, (list, tuple)): |
| return [f for f in files if f is not None] |
| return [files] |
|
|
|
|
| def _split_statements(paste_text: str) -> list[str]: |
| """ |
| Split pasted content into multiple statements. |
| |
| Delimiter: a line containing only `---` (3+ dashes), optionally surrounded by whitespace. |
| """ |
| text = (paste_text or "").strip() |
| if not text: |
| return [] |
| parts = re.split(r"(?m)^[ \t]*-{3,}[ \t]*$", text) |
| cleaned = [p.strip() for p in parts if p.strip()] |
| return cleaned if cleaned else [text] |
|
|
|
|
| def _get_statement_texts( |
| paste_text: str, |
| pdf_files, |
| pdf_password: str | None, |
| csv_files, |
| txt_files, |
| xlsx_files, |
| ofx_files, |
| ) -> tuple[list[str], list[str]]: |
| """ |
| Extract statement text blocks from: |
| - pasted text (can contain multiple statements separated by `---`) |
| - uploaded PDFs (supports multiple) |
| - uploaded CSVs (supports multiple) |
| - uploaded TXT files (supports multiple) |
| - uploaded Excel (.xlsx) (supports multiple) |
| - uploaded OFX/QFX files (supports multiple) |
| """ |
| statement_texts: list[str] = [] |
| errors: list[str] = [] |
|
|
| pdf_list = _ensure_file_list(pdf_files) |
| csv_list = _ensure_file_list(csv_files) |
| txt_list = _ensure_file_list(txt_files) |
| xlsx_list = _ensure_file_list(xlsx_files) |
| ofx_list = _ensure_file_list(ofx_files) |
|
|
| |
| if pdf_list: |
| try: |
| if importlib.util.find_spec("pdfplumber") is None: |
| errors.append("pdfplumber not installed.") |
| else: |
| import pdfplumber |
| password = (pdf_password or "").strip() |
| for idx, pdf_file in enumerate(pdf_list): |
| try: |
| text_parts: list[str] = [] |
| try: |
| with pdfplumber.open( |
| str(pdf_file), |
| password=password if password else "", |
| ) as pdf: |
| for page in pdf.pages: |
| t = page.extract_text() |
| if t: |
| text_parts.append(t) |
| except TypeError: |
| |
| with pdfplumber.open(str(pdf_file)) as pdf: |
| for page in pdf.pages: |
| t = page.extract_text() |
| if t: |
| text_parts.append(t) |
| text = "\n".join(text_parts).strip() |
| if not text: |
| errors.append(f"PDF #{idx+1} uploaded but no text could be extracted.") |
| else: |
| statement_texts.append(text) |
| except Exception as e: |
| msg = str(e).lower() |
| if "password" in msg or "encrypted" in msg or "decrypt" in msg: |
| errors.append( |
| f"PDF #{idx+1} is password-protected. Please enter the correct password." |
| ) |
| else: |
| errors.append(f"PDF #{idx+1} extraction error: {e}") |
| except Exception as e: |
| errors.append(f"PDF extraction error: {e}") |
|
|
| |
| if csv_list: |
| try: |
| import pandas as pd |
| except Exception: |
| if importlib.util.find_spec("pandas") is None: |
| errors.append("pandas not installed.") |
| else: |
| errors.append("CSV parsing error: pandas import failed.") |
| else: |
| for idx, csv_file in enumerate(csv_list): |
| try: |
| df = pd.read_csv(str(csv_file)) |
| df.columns = [c.strip().lower() for c in df.columns] |
| lines: list[str] = [] |
| for _, row in df.iterrows(): |
| parts = [ |
| str(v).strip() |
| for v in row.values |
| if str(v).strip() not in ("", "nan") |
| ] |
| lines.append(", ".join(parts)) |
| statement_texts.append( |
| ", ".join(df.columns.tolist()) + "\n" + "\n".join(lines) |
| ) |
| except Exception as e: |
| errors.append(f"CSV #{idx+1} parsing error: {e}") |
|
|
| |
| if txt_list: |
| for idx, txt_file in enumerate(txt_list): |
| try: |
| |
| |
| p = Path(str(txt_file)) |
| content = p.read_text(encoding="utf-8", errors="replace") |
| parts = _split_statements(content) |
| if not parts: |
| errors.append(f"TXT #{idx+1} uploaded but no text could be read.") |
| else: |
| statement_texts.extend(parts) |
| except Exception as e: |
| errors.append(f"TXT #{idx+1} parsing error: {e}") |
|
|
| |
| if xlsx_list: |
| try: |
| import pandas as pd |
| except Exception: |
| if importlib.util.find_spec("pandas") is None: |
| errors.append("pandas not installed.") |
| else: |
| errors.append("Excel parsing error: pandas import failed.") |
| else: |
| for idx, xlsx_file in enumerate(xlsx_list): |
| try: |
| df = pd.read_excel(str(xlsx_file), sheet_name=0) |
| if df is None or df.empty: |
| errors.append(f"XLSX #{idx+1} uploaded but no rows were found.") |
| continue |
| df.columns = [str(c).strip().lower() for c in df.columns] |
| lines: list[str] = [] |
| for _, row in df.iterrows(): |
| parts = [ |
| str(v).strip() |
| for v in row.values |
| if str(v).strip() not in ("", "nan", "NaN") |
| ] |
| lines.append(", ".join(parts)) |
| statement_texts.append( |
| ", ".join(df.columns.tolist()) + "\n" + "\n".join(lines) |
| ) |
| except Exception as e: |
| errors.append(f"XLSX #{idx+1} parsing error: {e}") |
|
|
| |
| if ofx_list: |
| def _format_ofx_date(d: str) -> str: |
| d = (d or "").strip() |
| if len(d) == 8 and d.isdigit(): |
| return f"{d[:4]}-{d[4:6]}-{d[6:]}" |
| return d |
|
|
| for idx, ofx_file in enumerate(ofx_list): |
| try: |
| p = Path(str(ofx_file)) |
| raw = p.read_bytes() |
| try: |
| content = raw.decode("utf-8") |
| except UnicodeDecodeError: |
| content = raw.decode("utf-8", errors="replace") |
|
|
| blocks = re.findall( |
| r"<STMTTRN>(.*?)</STMTTRN>", |
| content, |
| flags=re.IGNORECASE | re.DOTALL, |
| ) |
|
|
| def _get_tag(block: str, tag: str) -> str: |
| m = re.search(rf"<{tag}>([^<]*)", block, flags=re.IGNORECASE) |
| return (m.group(1) if m else "").strip() |
|
|
| lines: list[str] = [] |
| for b in blocks: |
| dt = _get_tag(b, "DTPOSTED") or _get_tag(b, "DTTRAN") |
| name = _get_tag(b, "NAME") or _get_tag(b, "PAYEE") |
| memo = _get_tag(b, "MEMO") or _get_tag(b, "TRNTYPE") |
| amt = _get_tag(b, "TRNAMT") or _get_tag(b, "AMOUNT") |
|
|
| if not any([dt, name, memo, amt]): |
| continue |
|
|
| dt = _format_ofx_date(dt) |
| desc_parts = [p for p in [name, memo] if p] |
| desc = " - ".join(desc_parts) if desc_parts else "Transaction" |
| lines.append(f"{dt}, {desc}, {amt}".strip(", ")) |
|
|
| if lines: |
| statement_texts.append("Date, Description, Amount\n" + "\n".join(lines)) |
| else: |
| |
| statement_texts.append(content.strip()[:20000]) |
| except Exception as e: |
| errors.append(f"OFX/QFX #{idx+1} parsing error: {e}") |
|
|
| |
| pasted_parts = _split_statements(paste_text) |
| if pasted_parts: |
| statement_texts.extend(pasted_parts) |
|
|
| if not statement_texts: |
| errors.append( |
| "Please paste a bank statement or upload a PDF / CSV / TXT / XLSX / OFX/QFX file(s)." |
| ) |
|
|
| return statement_texts, errors |
|
|
|
|
| def analyse_bank_statement( |
| paste_text: str, |
| pdf_file, |
| pdf_password: str | None, |
| csv_file, |
| txt_file, |
| xlsx_file, |
| ofx_file, |
| ) -> tuple[str, str, str]: |
| statement_texts, errors = _get_statement_texts( |
| paste_text, |
| pdf_file, |
| pdf_password, |
| csv_file, |
| txt_file, |
| xlsx_file, |
| ofx_file, |
| ) |
| if not statement_texts: |
| return f"**Error:** {errors[0] if errors else 'No bank statement provided.'}", "", "" |
|
|
| MAX_STATEMENTS = 6 |
| if len(statement_texts) > MAX_STATEMENTS: |
| errors.append(f"Too many statements provided; only the first {MAX_STATEMENTS} were used.") |
| statement_texts = statement_texts[:MAX_STATEMENTS] |
|
|
| combined_text = "\n\n".join( |
| f"===== Statement {i+1}/{len(statement_texts)} =====\n\n{st.strip()}" |
| for i, st in enumerate(statement_texts) |
| if st.strip() |
| ).strip() |
|
|
| if not MLX_SERVER_URL: |
| return ( |
| f"**Inference client not initialised.** Error: {model_load_error}", |
| combined_text, |
| "", |
| ) |
|
|
| summaries: list[BankStatementSummary] = [] |
| for idx, statement_text in enumerate(statement_texts): |
| statement_text = _truncate(statement_text) |
| messages = _build_bank_messages(statement_text, "SUMMARISE") |
|
|
| summary: BankStatementSummary | None = None |
| for attempt in range(2): |
| msgs = _apply_messages(messages, strict=(attempt == 1)) |
| try: |
| raw = _run_inference(msgs) |
| summary = _parse_summary(raw) |
| break |
| except Exception as e: |
| if attempt == 0: |
| print(f" Summary parse attempt 1 failed (statement {idx+1}, {e}). Retrying...") |
| else: |
| print(f" Summary parse attempt 2 failed (statement {idx+1}, {e}). Returning error.") |
|
|
| if summary is None: |
| summary = BankStatementSummary( |
| raw_reasoning=f"Could not parse model output for statement {idx+1}." |
| ) |
| summaries.append(summary) |
|
|
| |
| lines: list[str] = [] |
| lines.append("## Statements Summary") |
| lines.append("") |
| if errors: |
| lines.append("**Notes:**") |
| for e in errors: |
| lines.append(f"- {e}") |
| lines.append("") |
|
|
| for idx, summary in enumerate(summaries): |
| lines.append(f"### Statement {idx+1}") |
| lines.append(f"**Total Credits:** {summary.total_credits or 'N/A'}") |
| lines.append(f"**Total Debits:** {summary.total_debits or 'N/A'}") |
| lines.append( |
| f"**Largest Transaction:** {summary.largest_transaction or 'N/A'}" |
| ) |
| if summary.recurring_payments: |
| lines.append("\n**Recurring Payments:**") |
| for p in summary.recurring_payments: |
| lines.append(f"- {p}") |
| if summary.flags: |
| lines.append("\n**Flags / Unusual Activity:**") |
| for f in summary.flags: |
| lines.append(f"- {f}") |
| lines.append(f"\n*{summary.raw_reasoning}*") |
| lines.append("") |
|
|
| |
| overall_recurring: list[str] = [] |
| overall_flags: list[str] = [] |
| for s in summaries: |
| for r in (s.recurring_payments or []): |
| if r not in overall_recurring: |
| overall_recurring.append(r) |
| for f in (s.flags or []): |
| if f not in overall_flags: |
| overall_flags.append(f) |
|
|
| lines.append("## Overall (union across statements)") |
| if overall_recurring: |
| lines.append("\n**Recurring Payments (union):**") |
| for p in overall_recurring: |
| lines.append(f"- {p}") |
| else: |
| lines.append("\n**Recurring Payments (union):** N/A") |
|
|
| if overall_flags: |
| lines.append("\n**Flags / Unusual Activity (union):**") |
| for f in overall_flags: |
| lines.append(f"- {f}") |
| else: |
| lines.append("\n**Flags / Unusual Activity (union):** N/A") |
|
|
| summary_json = json.dumps([s.model_dump() for s in summaries], ensure_ascii=False) |
| return "\n".join(lines).strip(), combined_text, summary_json |
|
|
|
|
| def _safe_json_loads(s: str) -> object: |
| try: |
| obj = json.loads(s or "") |
| if isinstance(obj, (dict, list)): |
| return obj |
| return {} |
| except Exception: |
| return {} |
|
|
|
|
| def _escape_pdf_text(s: str) -> str: |
| |
| return (s or "").replace("\\", "\\\\").replace("(", "\\(").replace(")", "\\)") |
|
|
|
|
| def _simple_pdf_bytes(title: str, lines: list[str]) -> bytes: |
| """ |
| Tiny, dependency-free, single-page PDF generator for short text reports. |
| """ |
| font = "Helvetica" |
| font_size = 11 |
| left = 54 |
| top = 790 |
| leading = 14 |
|
|
| safe_title = _escape_pdf_text(title) |
| safe_lines = [_escape_pdf_text(ln) for ln in lines] |
|
|
| content_lines: list[str] = [] |
| content_lines.append("BT") |
| content_lines.append(f"/F1 {font_size} Tf") |
| content_lines.append(f"{left} {top} Td") |
| content_lines.append(f"({_escape_pdf_text(safe_title)}) Tj") |
| content_lines.append(f"0 -{leading*2} Td") |
| for ln in safe_lines: |
| content_lines.append(f"({ln}) Tj") |
| content_lines.append(f"0 -{leading} Td") |
| content_lines.append("ET") |
| stream = "\n".join(content_lines).encode("latin-1", errors="replace") |
|
|
| objects: list[bytes] = [] |
| objects.append(b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n") |
| objects.append(b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n") |
| objects.append( |
| b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] " |
| b"/Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>\nendobj\n" |
| ) |
| objects.append(f"4 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /{font} >>\nendobj\n".encode()) |
| objects.append( |
| b"5 0 obj\n<< /Length " + str(len(stream)).encode() + b" >>\nstream\n" + stream + b"\nendstream\nendobj\n" |
| ) |
|
|
| out = io.BytesIO() |
| out.write(b"%PDF-1.4\n%\xe2\xe3\xcf\xd3\n") |
| xref: list[int] = [0] |
| for obj in objects: |
| xref.append(out.tell()) |
| out.write(obj) |
| xref_start = out.tell() |
| out.write(f"xref\n0 {len(xref)}\n".encode()) |
| out.write(b"0000000000 65535 f \n") |
| for off in xref[1:]: |
| out.write(f"{off:010d} 00000 n \n".encode()) |
| out.write( |
| b"trailer\n<< /Size " |
| + str(len(xref)).encode() |
| + b" /Root 1 0 R >>\nstartxref\n" |
| + str(xref_start).encode() |
| + b"\n%%EOF\n" |
| ) |
| return out.getvalue() |
|
|
|
|
| def export_bank_summary_csv(summary_json: str) -> tuple[str | None, str]: |
| data = _safe_json_loads(summary_json) |
| if not data: |
| return None, "**Export error:** Run 'Analyse statement' first." |
|
|
| statements = data if isinstance(data, list) else [data] |
|
|
| filename = f"bank-statement-summaries_{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="chex_", mode="w", newline="", encoding="utf-8") |
| try: |
| writer = csv.writer(tmp) |
| writer.writerow([ |
| "statement_index", |
| "total_credits", |
| "total_debits", |
| "largest_transaction", |
| "recurring_payments", |
| "flags", |
| "raw_reasoning", |
| ]) |
|
|
| overall_recurring: list[str] = [] |
| overall_flags: list[str] = [] |
| for s in statements: |
| if not isinstance(s, dict): |
| continue |
| for r in (s.get("recurring_payments") or []): |
| if r not in overall_recurring: |
| overall_recurring.append(r) |
| for f in (s.get("flags") or []): |
| if f not in overall_flags: |
| overall_flags.append(f) |
|
|
| for i, s in enumerate(statements, start=1): |
| if not isinstance(s, dict): |
| continue |
| writer.writerow([ |
| i, |
| s.get("total_credits") or "", |
| s.get("total_debits") or "", |
| s.get("largest_transaction") or "", |
| " | ".join(s.get("recurring_payments") or []), |
| " | ".join(s.get("flags") or []), |
| s.get("raw_reasoning") or "", |
| ]) |
|
|
| |
| writer.writerow([ |
| "overall", |
| "", |
| "", |
| "", |
| " | ".join(overall_recurring), |
| " | ".join(overall_flags), |
| "", |
| ]) |
| finally: |
| tmp.close() |
|
|
| |
| return tmp.name, f"**CSV ready:** `{filename}`" |
|
|
|
|
| def export_bank_summary_pdf(summary_json: str) -> tuple[str | None, str]: |
| data = _safe_json_loads(summary_json) |
| if not data: |
| return None, "**Export error:** Run 'Analyse statement' first." |
|
|
| statements = data if isinstance(data, list) else [data] |
|
|
| title = "CHEX β Bank Statement Summary (Multiple)" |
| lines: list[str] = [ |
| f"Generated: {_dt.datetime.now().isoformat(timespec='seconds')}", |
| "", |
| f"Statements analysed: {len(statements)}", |
| "", |
| ] |
|
|
| overall_recurring: list[str] = [] |
| overall_flags: list[str] = [] |
| for s in statements: |
| if not isinstance(s, dict): |
| continue |
| for r in (s.get("recurring_payments") or []): |
| if r not in overall_recurring: |
| overall_recurring.append(r) |
| for f in (s.get("flags") or []): |
| if f not in overall_flags: |
| overall_flags.append(f) |
|
|
| lines += [ |
| "Overall Recurring Payments:", |
| *([f"- {x}" for x in overall_recurring] if overall_recurring else ["- (none)"]), |
| "", |
| "Overall Flags / Unusual Activity:", |
| *([f"- {x}" for x in overall_flags] if overall_flags else ["- (none)"]), |
| "", |
| ] |
|
|
| for i, s in enumerate(statements, start=1): |
| if not isinstance(s, dict): |
| continue |
| lines += [ |
| f"Statement {i}:", |
| f"- Total Credits: {s.get('total_credits') or 'N/A'}", |
| f"- Total Debits: {s.get('total_debits') or 'N/A'}", |
| f"- Largest Transaction: {s.get('largest_transaction') or 'N/A'}", |
| ] |
| rr = (s.get("raw_reasoning") or "").strip() |
| if rr: |
| lines += ["- Model reasoning: " + rr] |
| lines.append("") |
|
|
| pdf_bytes = _simple_pdf_bytes(title, lines) |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf", prefix="chex_", mode="wb") |
| try: |
| tmp.write(pdf_bytes) |
| finally: |
| tmp.close() |
|
|
| filename = f"bank-statement-summaries_{_dt.datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf" |
| return tmp.name, f"**PDF ready:** `{filename}`" |
|
|
|
|
| def bank_qa(statement_text: str, question: str) -> tuple[str, str, str, str]: |
| if not statement_text.strip(): |
| return ( |
| format_label_html("N/A"), "", "", |
| "Please run 'Analyse Statement' first to load the statement.", |
| ) |
| if not question.strip(): |
| return format_label_html("N/A"), "", "", "Please enter a question." |
| if not MLX_SERVER_URL: |
| return ( |
| format_label_html("N/A"), "Inference client not initialised", "", |
| f"Error: {model_load_error}.", |
| ) |
|
|
| statement_text = _truncate(statement_text) |
| messages = _build_bank_messages(statement_text, question) |
|
|
| for attempt in range(2): |
| msgs = _apply_messages(messages, strict=(attempt == 1)) |
| try: |
| raw = _run_inference(msgs) |
| result = _parse_model_output(raw, question) |
| label_html = format_label_html(result.label.value) |
| answer = result.answer or "(none β information not found in statement)" |
| citation = result.citation or "(none)" |
| return label_html, answer, citation, result.reasoning |
| except Exception as e: |
| if attempt == 0: |
| print(f" Q&A parse attempt 1 failed ({e}). Retrying...") |
| else: |
| print(f" Q&A parse attempt 2 failed ({e}). Returning fallback.") |
|
|
| return ( |
| format_label_html("ABSENT"), |
| "(none β information not found in statement)", |
| "(none)", |
| "Model output could not be parsed after two attempts.", |
| ) |
|
|
|
|
| |
| |
| |
|
|
| import pandas as pd |
|
|
| BENCHMARK_ROWS = [ |
| { |
| "Question": "What is the limitation of liability?", |
| "Ground Truth": "GROUNDED β $50,000 cap", |
| "Base Model (untuned)": "GROUNDED β $100,000 cap (wrong amount)", |
| "CHEX Fine-tuned": "GROUNDED β $50,000 cap β", |
| "Hallucinated?": "No (wrong value)", |
| }, |
| { |
| "Question": "Does this contract include a non-compete clause?", |
| "Ground Truth": "ABSENT", |
| "Base Model (untuned)": "π¨ GROUNDED β 'Licensee shall not engage in competing activities...' (fabricated)", |
| "CHEX Fine-tuned": "ABSENT β null β", |
| "Hallucinated?": "YES", |
| }, |
| { |
| "Question": "What is the term of the NDA?", |
| "Ground Truth": "GROUNDED β 3 years", |
| "Base Model (untuned)": "GROUNDED β 2 years (wrong duration)", |
| "CHEX Fine-tuned": "GROUNDED β three (3) years β", |
| "Hallucinated?": "No (wrong value)", |
| }, |
| { |
| "Question": "Is there a termination for convenience clause?", |
| "Ground Truth": "ABSENT", |
| "Base Model (untuned)": "π¨ GROUNDED β 'Either party may terminate at any time...' (fabricated)", |
| "CHEX Fine-tuned": "ABSENT β null β", |
| "Hallucinated?": "YES", |
| }, |
| { |
| "Question": "What are the monthly payment terms?", |
| "Ground Truth": "GROUNDED β $5,000/month", |
| "Base Model (untuned)": "GROUNDED β $5,000/month β", |
| "CHEX Fine-tuned": "GROUNDED β $5,000/month β", |
| "Hallucinated?": "No", |
| }, |
| ] |
|
|
| BENCHMARK_DF = pd.DataFrame(BENCHMARK_ROWS) |
|
|
| |
| |
| |
|
|
| WARNING_HTML = "" |
| if model_load_error: |
| WARNING_HTML = ( |
| '<div class="chex-banner">' |
| '<span class="chex-banner-icon">β </span>' |
| f'<div class="chex-banner-body"><strong>Model not loaded</strong> Β· ' |
| f'{model_load_error} β set <code>HF_MODEL_REPO</code> in Space secrets.</div>' |
| '</div>' |
| ) |
|
|
| |
| |
| |
|
|
| CHEX_CSS = """ |
| *, *::before, *::after { box-sizing: border-box; } |
| |
| :root { |
| --green: #16a34a; |
| --red: #dc2626; |
| --amber: #d97706; |
| } |
| |
| /* label badges */ |
| .badge { |
| display: inline-block; padding: 3px 10px; border-radius: 4px; |
| font-size: 12px; font-weight: 600; letter-spacing: 0.02em; |
| } |
| .badge-green { background: #dcfce7; color: var(--green); } |
| .badge-red { background: #fee2e2; color: var(--red); } |
| .badge-amber { background: #fef3c7; color: var(--amber); } |
| .badge-gray { background: #f1f5f9; color: #64748b; } |
| """ |
|
|
| |
| |
| |
|
|
| WARNING_BANNER_HTML = ( |
| '<div style="background:#fef3c7;border:1px solid #fde68a;border-radius:6px;' |
| 'padding:10px 14px;margin-bottom:12px;color:#92400e;font-size:13px;">' |
| '<strong>β Model not loaded</strong> — {msg}</div>' |
| ) |
|
|
| |
| |
| |
|
|
| with gr.Blocks(title="CHEX β Document Intelligence") as demo: |
|
|
| if model_load_error: |
| gr.HTML(WARNING_BANNER_HTML.format(msg=model_load_error)) |
|
|
| with gr.Tabs(): |
|
|
| |
| with gr.Tab("Contract analysis"): |
| with gr.Row(equal_height=False): |
|
|
| with gr.Column(scale=9): |
| gr.Markdown("### Contract source\nPaste text or load a sample.") |
| contract_input = gr.Textbox( |
| label="Contract text", |
| lines=20, |
| placeholder="Paste contract text hereβ¦", |
| show_label=False, |
| ) |
| gr.Markdown("**Load sample:**") |
| with gr.Row(): |
| btn_software = gr.Button("Software License", variant="secondary", size="sm") |
| btn_nda = gr.Button("NDA", variant="secondary", size="sm") |
| btn_service = gr.Button("Service Agreement", variant="secondary", size="sm") |
| suggested_q = gr.Markdown(value="", visible=False) |
|
|
| with gr.Column(scale=11): |
| gr.Markdown("### Classification\nAsk a yes/no or factual question about the contract.") |
| with gr.Row(): |
| question_input = gr.Textbox( |
| label="Question", |
| placeholder="e.g., What is the annual license fee?", |
| lines=1, |
| scale=8, |
| ) |
| analyze_btn = gr.Button("Analyze", variant="primary", scale=2) |
| label_display = gr.HTML(value=format_label_html("N/A")) |
| answer_output = gr.Textbox(label="Answer", interactive=False, lines=3) |
| citation_output = gr.Textbox(label="Citation", interactive=False, lines=2) |
| reasoning_output = gr.Textbox(label="Reasoning", interactive=False, lines=3) |
|
|
| |
| with gr.Tab("Bank statements"): |
| with gr.Row(equal_height=False): |
|
|
| with gr.Column(scale=9): |
| gr.Markdown("### Statement source\nPaste, upload, or load the sample.") |
| with gr.Tabs(): |
| with gr.Tab("Paste text"): |
| bank_paste_input = gr.Textbox( |
| label="Statement text", |
| lines=20, |
| placeholder="Paste statement here, e.g. lines like: 2025-03-15 Direct deposit +5,420.00β¦\n\nSeparate multiple statements with a line containing only ---", |
| show_label=False, |
| ) |
| btn_load_statement = gr.Button("Load sample", variant="secondary", size="sm") |
| with gr.Tab("Upload PDF"): |
| bank_pdf_input = gr.File( |
| label="PDF (multiple allowed)", |
| file_types=[".pdf"], |
| file_count="multiple", |
| ) |
| bank_pdf_password_input = gr.Textbox( |
| label="PDF password (leave blank if unencrypted)", |
| type="password", |
| ) |
| with gr.Tab("Upload CSV"): |
| bank_csv_input = gr.File( |
| label="CSV (multiple allowed)", |
| file_types=[".csv"], |
| file_count="multiple", |
| ) |
| with gr.Tab("Upload TXT"): |
| bank_txt_input = gr.File( |
| label="TXT (multiple allowed)", |
| file_types=[".txt", ".text"], |
| file_count="multiple", |
| ) |
| with gr.Tab("Upload Excel"): |
| bank_xlsx_input = gr.File( |
| label="XLSX (multiple allowed)", |
| file_types=[".xlsx"], |
| file_count="multiple", |
| ) |
| with gr.Tab("Upload OFX/QFX"): |
| bank_ofx_input = gr.File( |
| label="OFX/QFX (multiple allowed)", |
| file_types=[".ofx", ".qfx"], |
| file_count="multiple", |
| ) |
|
|
| with gr.Column(scale=11): |
| gr.Markdown("### Statement analysis") |
| analyse_stmt_btn = gr.Button("Analyse statement", variant="primary") |
| summary_md_output = gr.Markdown(value="*Run Analyse statement to see results.*") |
| with gr.Row(): |
| export_csv_btn = gr.Button("Export CSV", variant="secondary", size="sm") |
| export_pdf_btn = gr.Button("Export PDF", variant="secondary", size="sm") |
| export_status = gr.Markdown(value="") |
| export_file = gr.File(label="Download", interactive=False) |
| gr.Markdown("---\n**Ask a question about the statement:**") |
| with gr.Row(): |
| bank_question_input = gr.Textbox( |
| label="Question", |
| placeholder="e.g., What was the largest debit this month?", |
| lines=1, |
| scale=8, |
| ) |
| bank_ask_btn = gr.Button("Ask", variant="secondary", scale=2) |
| bank_label_display = gr.HTML(value=format_label_html("N/A")) |
| bank_answer_output = gr.Textbox(label="Answer", interactive=False, lines=3) |
| bank_citation_output = gr.Textbox(label="Citation", interactive=False, lines=2) |
| bank_reasoning_output = gr.Textbox(label="Reasoning", interactive=False, lines=3) |
|
|
| bank_statement_state = gr.State("") |
| bank_summary_state = gr.State("") |
| bank_api_output = gr.JSON(visible=False) |
| bank_api_question = gr.Textbox(visible=False) |
| bank_api_btn = gr.Button(visible=False) |
|
|
| |
| with gr.Tab("Benchmark"): |
| gr.Markdown(""" |
| ### Why grounding matters |
| |
| We ran the same five questions through a base instruction-tuned model and through CHEX. |
| The base model invented or extrapolated answers in **4 of 5 cases** β confident, plausible, wrong. |
| CHEX returned a verifiable label, a verbatim citation, and refused to answer when the source was silent. |
| |
| | Metric | Result | |
| |---|---| |
| | Base hallucinations | **4 / 5** | |
| | CHEX correct | **5 / 5** | |
| | Cited verbatim | **100%** | |
| """) |
| gr.Dataframe( |
| value=BENCHMARK_DF, |
| headers=list(BENCHMARK_DF.columns), |
| datatype=["str"] * len(BENCHMARK_DF.columns), |
| wrap=True, |
| interactive=False, |
| ) |
|
|
| |
|
|
| def load_software(): |
| hint = '<div class="chex-suggested"><span class="chex-suggested-icon">π‘</span><span><strong>Suggested:</strong> What is the limitation of liability in this agreement?</span></div>' |
| return SOFTWARE_LICENSE, SAMPLE_QUESTIONS["software_license.txt"], gr.update(value=hint, visible=True) |
|
|
| def load_nda(): |
| hint = '<div class="chex-suggested"><span class="chex-suggested-icon">π‘</span><span><strong>Suggested:</strong> Does this agreement include a non-compete clause?</span></div>' |
| return NDA, SAMPLE_QUESTIONS["nda.txt"], gr.update(value=hint, visible=True) |
|
|
| def load_service(): |
| hint = '<div class="chex-suggested"><span class="chex-suggested-icon">π‘</span><span><strong>Suggested:</strong> Does this contract include a termination for convenience clause? <em>(expected: ABSENT)</em></span></div>' |
| return SERVICE_AGREEMENT, SAMPLE_QUESTIONS["service_agreement.txt"], gr.update(value=hint, visible=True) |
|
|
| btn_software.click(fn=load_software, inputs=[], outputs=[contract_input, question_input, suggested_q]) |
| btn_nda.click(fn=load_nda, inputs=[], outputs=[contract_input, question_input, suggested_q]) |
| btn_service.click(fn=load_service, inputs=[], outputs=[contract_input, question_input, suggested_q]) |
|
|
| def analyze_contract_ui(contract_text: str, question: str): |
| return analyze_contract(contract_text, question) |
|
|
| analyze_btn.click( |
| fn=analyze_contract_ui, |
| inputs=[contract_input, question_input], |
| outputs=[label_display, answer_output, citation_output, reasoning_output], |
| api_name="contract_analyze", |
| ) |
| question_input.submit( |
| fn=analyze_contract_ui, |
| inputs=[contract_input, question_input], |
| outputs=[label_display, answer_output, citation_output, reasoning_output], |
| api_name="contract_analyze", |
| ) |
|
|
| btn_load_statement.click(fn=lambda: SAMPLE_STATEMENT, inputs=[], outputs=[bank_paste_input]) |
|
|
| analyse_stmt_btn.click( |
| fn=analyse_bank_statement, |
| inputs=[ |
| bank_paste_input, |
| bank_pdf_input, |
| bank_pdf_password_input, |
| bank_csv_input, |
| bank_txt_input, |
| bank_xlsx_input, |
| bank_ofx_input, |
| ], |
| outputs=[summary_md_output, bank_statement_state, bank_summary_state], |
| ) |
|
|
| export_csv_btn.click( |
| fn=export_bank_summary_csv, |
| inputs=[bank_summary_state], |
| outputs=[export_file, export_status], |
| ) |
| export_pdf_btn.click( |
| fn=export_bank_summary_pdf, |
| inputs=[bank_summary_state], |
| outputs=[export_file, export_status], |
| ) |
|
|
| bank_ask_btn.click( |
| fn=bank_qa, |
| inputs=[bank_statement_state, bank_question_input], |
| outputs=[bank_label_display, bank_answer_output, bank_citation_output, bank_reasoning_output], |
| ) |
| bank_question_input.submit( |
| fn=bank_qa, |
| inputs=[bank_statement_state, bank_question_input], |
| outputs=[bank_label_display, bank_answer_output, bank_citation_output, bank_reasoning_output], |
| ) |
|
|
| def bank_analyze_api( |
| paste_text: str, |
| pdf_files, |
| pdf_password: str | None, |
| csv_files, |
| txt_files, |
| xlsx_files, |
| ofx_files, |
| question: str | None, |
| ) -> dict: |
| summary_md, combined_text, summary_json = analyse_bank_statement( |
| paste_text, |
| pdf_files, |
| pdf_password, |
| csv_files, |
| txt_files, |
| xlsx_files, |
| ofx_files, |
| ) |
|
|
| qa: dict | None = None |
| if (question or "").strip(): |
| label_html, answer, citation, reasoning = bank_qa(combined_text, (question or "").strip()) |
| qa = { |
| "label_html": label_html, |
| "answer": answer, |
| "citation": citation, |
| "reasoning": reasoning, |
| } |
|
|
| return { |
| "summary_markdown": summary_md, |
| "combined_text": combined_text, |
| "summary_json": summary_json, |
| "qa": qa, |
| } |
|
|
| bank_api_btn.click( |
| fn=bank_analyze_api, |
| inputs=[ |
| bank_paste_input, |
| bank_pdf_input, |
| bank_pdf_password_input, |
| bank_csv_input, |
| bank_txt_input, |
| bank_xlsx_input, |
| bank_ofx_input, |
| bank_api_question, |
| ], |
| outputs=[bank_api_output], |
| api_name="bank_analyze", |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch(show_error=True, theme=gr.themes.Base(), css=CHEX_CSS, ssr_mode=False) |
|
|