Spaces:

Vincsipe
/

paperhawk

Running

App Files Files Community

Nándorfi Vince commited on 3 days ago

Commit

7ff7119

0 Parent(s):

Initial paperhawk push to HF Space (LFS for binaries)

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +36 -0
.env.example +58 -0
.gitattributes +7 -0
.gitignore +57 -0
ARCHITECTURE.md +214 -0
CLAUDE.md +191 -0
Dockerfile +48 -0
LICENSE +21 -0
Makefile +66 -0
NOTICE.md +34 -0
README.md +168 -0
app/__init__.py +0 -0
app/async_runtime.py +126 -0
app/main.py +931 -0
app/streaming.py +97 -0
app/tabs/__init__.py +0 -0
config.py +129 -0
data/sanctions_snapshot.json +114 -0
docker-compose.yml +52 -0
docs/HF_SPACE_DEFAULT_GETTING_STARTED.md +193 -0
docs/SUBMISSION.md +170 -0
docs/hf-space-deployment.md +124 -0
docs/qwen-vllm-deployment.md +68 -0
docs/slides/01_cover.png +3 -0
docs/slides/PaperHawk_Slides.pdf +3 -0
docs/slides/PaperHawk_Slides.pptx +3 -0
docs/slides/README.md +104 -0
docs/slides/png/slide_01.png +3 -0
docs/slides/png/slide_02.png +3 -0
docs/slides/png/slide_03.png +3 -0
docs/slides/png/slide_04.png +3 -0
docs/slides/png/slide_05.png +3 -0
docs/slides/png/slide_06.png +3 -0
docs/slides/png/slide_07.png +3 -0
docs/slides/png/slide_08.png +3 -0
docs/slides/png/slide_09.png +3 -0
docs/slides/png/slide_10.png +3 -0
docs/slides/slides.html +897 -0
docs/social-posts/post-1-build-window-opens.md +165 -0
domain_checks/__init__.py +140 -0
domain_checks/base.py +60 -0
domain_checks/check_01_invoice_mandatory.py +123 -0
domain_checks/check_02_tax_cdv.py +108 -0
domain_checks/check_03_contract_completeness.py +85 -0
domain_checks/check_04_proportionality.py +68 -0
domain_checks/check_05_rounded_amounts.py +96 -0
domain_checks/check_06_evidence_score.py +53 -0
domain_checks/check_07_materiality.py +60 -0
domain_checks/check_08_gdpr_28.py +202 -0
domain_checks/check_09_dd_red_flags.py +118 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,36 @@

+# Git
+.git
+.gitignore
+# Env
+.env
+.env.*
+!.env.example
+# Python
+__pycache__
+*.pyc
+*.pyo
+.pytest_cache
+.ruff_cache
+.venv
+venv
+# IDE
+.vscode
+.idea
+.DS_Store
+# Perzisztens runtime adat (mount-oljuk, ne image-be sütjük)
+chroma_db/
+data/checkpoints.sqlite*
+# Tervek és dokumentáció (image-be felesleges)
+tervek/
+dokumentacio/
+# Test results
+test_results/
+# Node
+node_modules/

.env.example ADDED Viewed

	@@ -0,0 +1,58 @@

+# =============================================================================
+# LLM Provider
+# =============================================================================
+# Profile: vllm (default, AMD MI300X) | ollama (local fallback) | dummy (CI/eval)
+LLM_PROFILE=vllm
+# vLLM (AMD Developer Cloud MI300X) — DEFAULT
+# Point this at the public URL of your AMD MI300X vLLM endpoint.
+# Local dev: http://localhost:8000/v1
+VLLM_BASE_URL=http://localhost:8000/v1
+VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct
+VLLM_API_KEY=
+# VLLM_API_KEY left blank = client sends "EMPTY" (vLLM no-auth mode)
+# In production set a real key and start vLLM with --api-key <key>
+VLLM_TEMPERATURE=0.0
+VLLM_MAX_TOKENS=4096
+# Ollama (optional local fallback, only when LLM_PROFILE=ollama)
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_MODEL=qwen2.5:7b-instruct
+# =============================================================================
+# Embedding (sentence-transformers / Hugging Face, runs locally on CPU)
+# =============================================================================
+# Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual incl. EN/HU/DE/FR/...)
+# Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only)
+EMBEDDING_MODEL=BAAI/bge-m3
+# =============================================================================
+# Storage
+# =============================================================================
+CHROMA_PATH=./chroma_db
+CHROMA_COLLECTION=documents
+CHECKPOINT_DB_PATH=./data/checkpoints.sqlite
+# =============================================================================
+# Pipeline tuning
+# =============================================================================
+CHUNK_MAX_CHARS=15000
+CHUNK_OVERLAP_CHARS=500
+SINGLE_CALL_THRESHOLD=30000
+# Agentic loop guards
+CHAT_MAX_ITERATIONS=10
+VALIDATOR_MAX_RETRIES=2
+DD_SUPERVISOR_MAX_ITERATIONS=4
+# =============================================================================
+# LangSmith observability (optional)
+# =============================================================================
+# LANGCHAIN_TRACING_V2=true
+# LANGCHAIN_API_KEY=lsv2_pt_XXXXXXXXXXXXXXXXXXXXXXX
+# LANGCHAIN_PROJECT=document-intelligence-amd
+# =============================================================================
+# Streamlit
+# =============================================================================
+STREAMLIT_PORT=8501

.gitattributes ADDED Viewed

	@@ -0,0 +1,7 @@

+*.png filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.pptx filter=lfs diff=lfs merge=lfs -text
+*.docx filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,57 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+# Virtuális környezet (nem hordozható)
+.venv/
+venv/
+env/
+ENV/
+# Disztribúció
+build/
+dist/
+*.egg-info/
+*.egg
+.eggs/
+# Tesztelés
+.pytest_cache/
+.coverage
+.coverage.*
+htmlcov/
+.tox/
+.nox/
+# Környezeti változók
+.env
+.env.local
+.env.*.local
+!.env.example
+# Perzisztens runtime adat (auto-generálódik)
+chroma_db/
+data/checkpoints.sqlite
+data/checkpoints.sqlite-*
+*.log
+# HuggingFace / sentence-transformers cache
+.cache/
+# IDE / OS
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+Thumbs.db
+# Node (defenzív)
+node_modules/
+# Test results
+test_results/

ARCHITECTURE.md ADDED Viewed

	@@ -0,0 +1,214 @@

+# Architecture
+LangGraph-native Document Intelligence platform. This document goes beyond
+the README — it covers design decisions, the subgraph hierarchy, state
+design, and the anti-hallucination stack.
+## 1. High-level architecture
+### 4 compiled LangGraph artifacts
+The system is organized around four graphs sharing a common `AsyncSqliteSaver`
+checkpointer:
+| # | Graph | Entry point | When |
+|---|-------|-------------|------|
+| 1 | `pipeline_graph` | `app.run_pipeline(files)` | on upload |
+| 2 | `chat_graph` | `app.ask(question)` | chat tab |
+| 3 | `dd_graph` | `app.dd_report(thread_id)` | DD tab button |
+| 4 | `package_insights_graph` | `app.package_insights(thread_id, pkg_type)` | demo button |
+Chat tools read from the persisted pipeline state — they do not re-read
+files. They access the in-memory `ChatToolContext`, which holds the
+HybridStore and a documents snapshot.
+### Pipeline graph topology
+```
+START
+  → start_timer
+  → dispatch_ingest          (Send API: per-doc fan-out)
+  → ingest_per_doc           (PDF/DOCX/PNG/TXT loader subgraph)
+  → ingest_join              (fan-in)
+  → dispatch_classify        (Send API)
+  → classify_per_doc         (regex/keyword classifier in dummy mode;
+                              vision-aware in vLLM mode)
+  → classify_join
+  → dispatch_extract         (Send API)
+  → extract_per_doc          (regex extractor in dummy mode +
+                              flatten_universal; structured LLM in vLLM mode)
+  → extract_join
+  → quote_validator          (anti-hallucination layer #7)
+  → dispatch_rag_index       (Send API)
+  → rag_index_per_doc        (chunker + batched embed + Chroma+BM25 upsert)
+  → rag_join
+  → compare_node             (three-way matching, sync)
+  → risk_subgraph            (basic + 14 domain × Send + plausibility +
+                              LLM ensemble + duplicate)
+  → finish_timer
+  → report_node              (10-section JSON structure)
+  → END
+```
+The per-doc Send fan-out yields a 5–8× speedup in a CPU-bound environment.
+### Risk subgraph topology
+```
+risk_subgraph (input: PipelineState):
+  → basic_risk_dispatch         (Send: per-doc basic risk)
+  → basic_risk / noop_basic
+  → domain_dispatch_node        (Send: per-doc × per-applicable-check, ~30 parallel)
+  → apply_domain_check
+  → [if llm provided] llm_risk_dispatch  (Send: per-doc LLM risk + 3-filter chain)
+  → llm_risk_per_doc / noop_llm
+  → plausibility_dispatch       (Send: per-doc plausibility)
+  → plausibility / noop_plaus
+  → evidence_score_node         (per-doc info)
+  → duplicate_detector_node     (package-level, sync, ISA 240)
+END
+```
+The full anti-hallucination 5+1 layer chain runs inside `llm_risk_per_doc`:
+`llm_risk → filter_llm_risks → drop_business_normal → drop_repeats`.
+### DD multi-agent supervisor graph
+```
+dd_graph:
+  START
+  → contract_filter_node      (keep only contract-type docs)
+  → per_contract_summary_node (Python-deterministic per-contract DDContractSummary)
+  → supervisor_node           (LLM router or heuristic; Command(goto=...))
+        ├─ → audit_specialist     (pricing anomalies, overcharging)
+        ├─ → legal_specialist     (red flags, change-of-control, non-compete)
+        ├─ → compliance_specialist (GDPR, AML, data protection)
+        └─ → financial_specialist (monthly obligations, expirations)
+  ↺ (loops back to supervisor up to dd_supervisor_max_iterations)
+  → dd_synthesizer            (one LLM call: executive_summary +
+                               top_red_flags + per-contract risk_level rating)
+  END
+```
+### Package insights graph
+A simple 1-LLM-call graph: ingests the full document package and produces
+cross-doc findings using a perspective-driven prompt
+(`audit | dd | compliance | general`).
+## 2. State design
+### `PipelineState` (TypedDict)
+Read-mostly fields with **reducer-driven Send fan-in**:
+- `files: list[tuple[str, bytes]]` — raw upload
+- `documents: Annotated[list[ProcessedDocument], merge_doc_results]` —
+  per-doc field-level merge keyed by `file_name`
+- `risks: Annotated[list[Risk], merge_risks]` — dedup by description
+- `comparison: ComparisonReport | None`
+- `report: dict`
+- `package_insights: PackageInsights | None`
+- `dd_report: DDPortfolioReport | None`
+- `started_at`, `finished_at`, `processing_seconds`
+- `progress_events: Annotated[list[str], add]` — Streamlit progress feed
+### `Risk` (Pydantic)
+The single risk type used everywhere:
+- `description: str`
+- `severity: str` (`"high" | "medium" | "low" | "info"`)
+- `rationale: str`
+- `kind: str` (`"validation" | "domain_rule" | "plausibility" | "llm_analysis" | "cross_check"`)
+- `regulation: str | None` (e.g. `"HU VAT Act §169"`, `"ISA 240"`, `"GDPR Article 28"`)
+- `affected_document: str | None`
+- `source_check_id: str | None`
+## 3. Anti-hallucination stack (5+1 layers)
+1. **`temperature=0`** — every LLM call is deterministic-ish.
+2. **`_quotes` schema field** — verbatim source citations.
+3. **`_confidence` schema field** — per-field reliability (high|medium|low).
+4. **`validate_plausibility()`** — Python deterministic plausibility checks
+   (negative VAT, non-standard rates, future dates, etc.).
+5. **3-filter LLM risk pipeline** —
+   `filter_llm_risks` (formal: ≥5 words, ≥2 domain terms, ≥1 concrete fact)
+   → `drop_business_normal_risks` (semantic: cross-check vs extracted_data,
+   6 known false-positive patterns)
+   → `drop_repeats_of_basic` (textual dedup vs basic risks, 70% threshold).
+6. **Quote validator** — final cross-check that every `_quotes` entry
+   actually appears in the source `full_text` (whitespace + diacritic +
+   case normalized). If invalid, downgrades confidence.
+## 4. Domain checks (14 deterministic rules)
+| # | check_id | Regulation | HU-specific? | Applies to |
+|---|----------|-----------|--------------|------------|
+| 01 | `check_01_invoice_mandatory` | HU VAT Act §169 | yes | invoice |
+| 02 | `check_02_tax_cdv` | HU Tax Procedure Act §22 mod-11 | yes | invoice + contract + ... |
+| 03 | `check_03_contract_completeness` | Universal contract completeness | no | contract |
+| 04 | `check_04_proportionality` | Universal contract proportionality (>31.7%) | no | contract |
+| 05 | `check_05_rounded_amounts` | ISA 240 (Journal of Accountancy 2018) | no | invoice |
+| 06 | `check_06_evidence_score` | ISA 500 | no | (separate entry, info-only) |
+| 07 | `check_07_materiality` | ISA 320 | no | invoice + contract + financial_report |
+| 08 | `check_08_gdpr_28` | GDPR Article 28 | no (EU) | contract |
+| 09 | `check_09_dd_red_flags` | M&A DD best practice | no | contract |
+| 10 | `check_10_incoterms` | Incoterms 2020 | no | contract |
+| 11 | `check_11_ifrs_har` | IFRS / national GAAP comparison | no | financial_report |
+| 12 | `check_12_duplicate_invoice` | ISA 240 (duplicate invoice) | no | (separate entry, package-level) |
+| 13 | `check_13_aml_sanctions` | AML / Sanctions screening | no | invoice + contract + ... |
+| 14 | `check_14_contract_dates` | Contract date best practice | no | contract |
+The dispatch in `domain_dispatch_node` skips `check_06` and `check_12` (they
+have separate entry points) and filters `is_hu_specific=True` out for non-HU
+documents.
+## 5. Provider system
+Three providers via `configurable_alternatives`:
+- **`vllm`** — `ChatOpenAI` with `base_url=VLLM_BASE_URL` pointing at the
+  AMD MI300X vLLM endpoint. Production default.
+- **`ollama`** — `ChatOllama` with a local Ollama daemon (Qwen 2.5 7B
+  Instruct). Development fallback.
+- **`dummy`** — `DummyChatModel` (deterministic stub, no network).
+  CI / eval / load.
+Provider selection is **runtime-switchable** without restart:
+```python
+graph.invoke(state, config={"configurable": {"llm_profile": "dummy"}})
+```
+## 6. Embedding
+`BAAI/bge-m3` (2.27 GB, 1024 dim, multilingual) by default.
+Sentence-transformers loads it on first call via `@lru_cache`.
+Pre-downloaded at Docker build time so runtime has no network call.
+## 7. Hybrid retrieval (Chroma + BM25)
+`store/hybrid_store.py` runs vector search and BM25 in parallel and merges
+with Reciprocal Rank Fusion (RRF). The chunker uses natural break points
+(paragraph + sentence boundaries), tuned to ~15K-char chunks with 500-char
+overlap.
+## 8. Async-first runtime
+LangGraph 0.6 is async-first. The Streamlit app runs the entire async layer
+on a long-lived background event loop (`app/async_runtime.py`'s `AsyncRuntime`
+singleton). This keeps the ChromaDB connection, the Anthropic / OpenAI HTTP
+session, and the `AsyncSqliteSaver` SQLite pool persistent across user
+interactions — they do not rebuild per request.
+## 9. Multilingual support
+The codebase is English-first but multilingual-tolerant:
+- The classifier matches HU/EN/DE keyword patterns.
+- Risk filters tolerate HU/DE business terms.
+- The OCR layer keeps `eng + hun + deu` as Tesseract languages.
+- Demo data may include mixed-language documents.
+The output (UI, exec summary, DOCX report) is **always English**.

CLAUDE.md ADDED Viewed

	@@ -0,0 +1,191 @@

+# CLAUDE.md — paperhawk
+Project-level instructions for Claude Code working in this repository. Any
+session that starts in this folder reads this file automatically.
+**Last updated:** 2026-05-03
+---
+## 1. Project overview
+A LangGraph-native, multi-agent Document Intelligence platform built for the
+**AMD Developer Hackathon × lablab.ai** (May 2026). MIT-licensed, English-only
+codebase, designed to run on **AMD Instinct MI300X** GPUs via the vLLM runtime
+serving **Qwen 2.5 Instruct** open-source models.
+The system processes business document packages (invoices, contracts, delivery
+notes, purchase orders, financial reports) end-to-end:
+1. **Ingest** — PDF / DOCX / image with vision-first scanned fallback
+2. **Classify** — 6-way doc-type classifier (LLM with structured output)
+3. **Extract** — typed Pydantic schema extraction with anti-hallucination
+4. **Cross-reference** — three-way matching (invoice + delivery + PO)
+5. **Risk analysis** — basic + 14 domain rules + LLM ensemble + 3 filters
+6. **Report** — DOCX export, JSON API, executive summary
+The chat layer is a 5-tool agentic ReAct loop with explicit `[Source: filename]`
+citations and an anti-hallucination validator.
+---
+## 2. Workflow rules
+### Language
+- **English everywhere** — code, comments, docstrings, prompts, UI, error
+  messages, log lines.
+- **Multilingual fallback** — for legacy interop and the multilingual demo:
+  some loaders, classifiers, and regex filters accept HU/DE input. EN is
+  always the primary path.
+- Two HU reference documents are kept under `docs/` with `_HU.md` suffix
+  (`Teljes-rendszer-attekintes-langgraph_HU.md`, `MUKODESI_LEIRAS_HU.md`).
+  These are read-only references; do not edit.
+### License + IP
+- **MIT licensed** — see `LICENSE`.
+- `NOTICE.md` is a non-binding author request (no legal force).
+- Never paste proprietary code from outside this repo.
+### Provider
+- The default chat provider is `vllm` (Qwen 2.5 14B Instruct on AMD MI300X
+  through the OpenAI-compatible vLLM endpoint).
+- `ollama` is a local dev fallback (Qwen 2.5 7B Instruct on a laptop GPU/CPU).
+- `dummy` is the deterministic CI / eval / smoke provider (no network, no LLM).
+- Never re-introduce a Claude / Anthropic provider here — that path is
+  out of scope for the AMD edition.
+### Git
+- The AI **NEVER** runs git operations on `main` (no commit, no push, no
+  cherry-pick, no merge). The user runs all `main`-branch git operations.
+- The AI MAY commit on non-`main` feature branches when explicitly asked.
+- The AI **NEVER** pushes — push is the user's task only.
+### Build hygiene
+- Do not commit `.env`, `chroma_db/`, `data/checkpoints.sqlite`, `__pycache__/`.
+- Magyar / English commit messages are both fine; English preferred for the
+  public history of an MIT repo.
+### Anti-hallucination is sacred
+- The 5+1 layers (`temperature=0`, `_quotes`, `_confidence`, plausibility
+  filters, LLM-risk 3 filters, quote validator) are not optional. Every
+  LLM-generated piece of data is cross-checked.
+- Source citations in the chat use the canonical `[Source: filename]` format
+  (validator enforces this).
+---
+## 3. Repo layout
+```
+paperhawk/
+├── app/                   # Streamlit UI (5 tabs) + async runtime
+├── config.py              # Pydantic Settings (env-bound)
+├── domain_checks/         # 14 deterministic rules + base + registry
+├── eval/                  # Eval harness (questions + run_eval)
+├── graph/                 # 4 compiled graphs (pipeline / chat / dd /
+│                          # package_insights) + 6 states + checkpointer
+├── ingest/                # PDF / DOCX / image / OCR / tables / txt
+├── infra/vllm/            # AMD MI300X deployment (Dockerfile + serve.sh + README)
+├── load/                  # Load benchmarks
+├── nodes/                 # Per-stage node functions:
+│   ├── chat/              #   chat agent + 5 tools
+│   ├── dd/                #   DD specialists + supervisor + synthesizer
+│   ├── extract/           #   extract + dummy + quote validator
+│   ├── ingest/            #   ingest helpers
+│   ├── pipeline/          #   classify / compare / duplicate / report / docx
+│   └── risk/              #   basic / domain dispatch / LLM risk + 3 filters
+├── providers/             # vLLM / Ollama / Dummy LLM providers + embeddings
+├── schemas/               # 6 JSON schemas + pydantic_models + flatten_universal
+├── store/                 # ChromaDB + BM25 hybrid + chunking
+├── subgraphs/             # 6 reusable subgraphs (Send API parallelism)
+├── tests/                 # unit + integration + e2e_api + e2e_screenshot
+├── tools/                 # 5 chat tools + ChatToolContext
+├── utils/                 # dates + numbers + docx_export
+└── validation/            # anti-halluc layers (5+1)
+```
+---
+## 4. Hot files
+When fixing bugs or adding features, these are the most-edited files:
+- `graph/states/pipeline_state.py` — `Risk`, `Classification`, `ExtractedData`,
+  `merge_risks`, `merge_doc_results` reducers.
+- `domain_checks/__init__.py` — the 14-check registry.
+- `domain_checks/check_*_*.py` — individual deterministic rules.
+- `nodes/risk/_prompts.py` — `RISK_SYSTEM_PROMPT` (anti-halluc 9+6+4 examples).
+- `nodes/chat/_prompts.py` — `AGENTIC_SYSTEM_PROMPT` (17 rules).
+- `validation/llm_risk_filters.py` — 3-filter chain.
+- `app/main.py` — Streamlit UI (5 tabs).
+---
+## 5. Testing
+```bash
+# Fast: unit + integration (dummy LLM)
+LLM_PROFILE=dummy pytest tests/unit tests/integration -x --tb=short
+# Slow: end-to-end with real LLM
+LLM_PROFILE=vllm pytest tests/e2e_api -m e2e -x --tb=short
+# UI Playwright (real LLM, slow)
+LLM_PROFILE=vllm pytest tests/e2e_screenshot -x --tb=short
+```
+`LLM_PROFILE=dummy` works without any external service. `LLM_PROFILE=vllm`
+requires `VLLM_BASE_URL` to point at a running vLLM endpoint.
+---
+## 6. Deploy targets
+- **Hugging Face Space** — Streamlit Space under
+  `huggingface.co/spaces/lablab-ai-amd-developer-hackathon/<your-space>`.
+  See `docs/hf-space-deployment.md`.
+- **AMD Developer Cloud MI300X** — vLLM serving Qwen 2.5 14B (or 32B).
+  See `docs/qwen-vllm-deployment.md` and `infra/vllm/README.md`.
+---
+## 7. Pitch positioning
+When writing project descriptions, the README, video, or social posts:
+- **Beyond simple RAG** — multi-agent platform with 14 deterministic checks
+  + an LLM ensemble. The 5-tool chat is *agentic*, not retrieval-only.
+- **Track 1** (AI Agents & Agentic Workflows) is the target track.
+- **Cross-track**: Build in Public is in scope (AMD GPU prize).
+- **HF Special Prize** is in scope (Reachy Mini robot — like-vote driven).
+---
+## 8. The Glossary (HU → EN field names)
+The full per-field rename map is in
+`pwc-ai-verseny/document-intelligence-agentic-langgraph-amd/ATIRASI_TERV.md`
+sections **32 (field names) and 33 (severity literals)**. Keep that file
+open when editing extraction schemas, domain checks, or anything that
+touches the `Risk` Pydantic.
+---
+## 9. Common pitfalls
+- **Severity literals**: always `"high" | "medium" | "low" | "info"` —
+  never `"magas" | "kozepes" | "alacsony"`. Many `_normalize_severity()`
+  helpers map HU → EN if legacy data sneaks in, but new code emits EN.
+- **Risk fields**: `description`, `severity`, `rationale`, `kind`,
+  `regulation`, `affected_document`, `source_check_id`. NOT
+  `leiras / sulyossag / indoklas / tipus / jogszabaly / erinto_dokumentum / forras_check_id`.
+- **Doc types**: `"invoice" | "delivery_note" | "purchase_order" | "contract" | "financial_report" | "other"`.
+- **`_quotes` alias** (not `_idezetek`) — both in JSON schemas and Pydantic models.
+- **Multilingual fallback**: read-only in classifiers and regex filters;
+  never emit HU in new code.

Dockerfile ADDED Viewed

	@@ -0,0 +1,48 @@

+# syntax=docker/dockerfile:1.6
+FROM python:3.12-slim AS base
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+# OS-level dependencies:
+# - tesseract-ocr (eng + hun + deu): scanned PDF OCR fallback (multilingual demo support)
+# - poppler-utils: pdfplumber table extraction
+# - libmupdf-dev: PyMuPDF native lib
+# - curl: healthcheck
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        tesseract-ocr \
+        tesseract-ocr-eng \
+        tesseract-ocr-hun \
+        tesseract-ocr-deu \
+        poppler-utils \
+        libmupdf-dev \
+        curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Python deps — CPU-only torch first (smaller image), then the rest
+COPY requirements.txt .
+RUN pip install --upgrade pip \
+    && pip install --index-url https://download.pytorch.org/whl/cpu torch \
+    && pip install -r requirements.txt
+# Sentence-transformers model pre-download (no runtime network call).
+# BAAI/bge-m3 = 2.27 GB, 1024 dim, multilingual (EN/HU/DE/FR/...).
+RUN python -c "from sentence_transformers import SentenceTransformer; \
+    SentenceTransformer('BAAI/bge-m3')"
+# Source code
+COPY . .
+# Streamlit healthcheck — port 7860 for HF Space deployment (HF expects this)
+EXPOSE 7860
+HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:7860/_stcore/health || exit 1
+CMD ["streamlit", "run", "app/main.py", \
+     "--server.address=0.0.0.0", \
+     "--server.port=7860", \
+     "--server.headless=true"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2026 Nándorfi Vince, Vitai Tamás, Murcsik Gábor
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

Makefile ADDED Viewed

	@@ -0,0 +1,66 @@

+.PHONY: install run run-local stop test test-fast eval load samples lint clean help
+PYTHON := python3.12
+VENV := .venv
+ACTIVATE := . $(VENV)/bin/activate
+help:  ## Megjeleníti a parancsokat
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2}'
+install:  ## Lokális venv + függőségek
+	$(PYTHON) -m venv $(VENV)
+	$(ACTIVATE) && pip install --upgrade pip
+	$(ACTIVATE) && pip install --index-url https://download.pytorch.org/whl/cpu torch
+	$(ACTIVATE) && pip install -r requirements.txt
+run:  ## Docker compose: app indítás (Claude default)
+	docker compose up -d --build langgraph-app
+	@echo "App: http://localhost:8501"
+run-local:  ## Docker compose: app + Ollama (lokális LLM)
+	docker compose --profile ollama up -d --build
+	@echo "App: http://localhost:8501  |  Ollama: http://localhost:11434"
+	@echo "Első indítás:  docker compose exec ollama ollama pull llama3.1:8b"
+stop:  ## Docker compose leállítás
+	docker compose down
+dev:  ## Streamlit lokálisan (.venv-et feltételez)
+	$(ACTIVATE) && streamlit run app/main.py
+test:  ## Pytest teljes (lassúak nélkül)
+	$(ACTIVATE) && pytest tests/ -m "not slow" -v
+test-fast:  ## Smoke + unit tesztek dummy LLM-mel (< 30s)
+	$(ACTIVATE) && pytest tests/unit/ tests/integration/ -m "not slow" -q
+test-e2e:  ## E2E forgatókönyvek (10 db, dummy LLM)
+	$(ACTIVATE) && pytest tests/e2e/ -v
+eval:  ## 14 chat kérdés + 10 forgatókönyv eval
+	$(ACTIVATE) && python eval/run_eval.py --llm dummy
+eval-claude:  ## Eval valódi Claude LLM-mel (lassú, API-költség)
+	$(ACTIVATE) && python eval/run_eval.py --llm claude
+load:  ## Load test: 100 chat query async-gather (dummy)
+	$(ACTIVATE) && python load/benchmark.py --n 100
+load-parallel:  ## Pipeline parallel test: 20 doksi egyszerre
+	$(ACTIVATE) && python load/parallel_pipeline_bench.py --n 20
+samples:  ## 75 minta fájl (PDF+DOCX+PNG) generálása
+	$(ACTIVATE) && python test_data/generate_samples.py
+lint:  ## Ruff lint + formatter
+	$(ACTIVATE) && ruff check .
+	$(ACTIVATE) && ruff format --check .
+format:  ## Ruff auto-format
+	$(ACTIVATE) && ruff format .
+clean:  ## Cache + perzisztens runtime adat törlés
+	find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+	find . -type d -name .pytest_cache -exec rm -rf {} + 2>/dev/null || true
+	find . -type d -name .ruff_cache -exec rm -rf {} + 2>/dev/null || true
+	rm -rf chroma_db/ data/checkpoints.sqlite*

NOTICE.md ADDED Viewed

	@@ -0,0 +1,34 @@

+# NOTICE
+This project is released under the **MIT License** (see `LICENSE`).
+## Author intent (non-binding request)
+The codebase originated from a research project conducted in Hungarian
+under a proprietary license. We have re-licensed it under MIT for the
+**AMD Developer Hackathon × lablab.ai** (May 2026).
+The authors kindly request that:
+1. **AI/LLM training** — if you use this codebase or its derivatives in
+   training data for AI models, please credit the original authors
+   (Nándorfi Vince, Vitai Tamás, Murcsik Gábor) and link to the
+   original repository.
+2. **Re-translation / re-implementation** — if you produce derivative
+   works in other languages, a reference to the original authors is
+   appreciated.
+3. **Substantial reuse** — if you build a commercial product on top of
+   this codebase, a courtesy attribution is appreciated.
+These are **kind requests, not legal restrictions** — the MIT license
+governs all rights and permissions.
+## Built by
+Team **csimpicsirkek** for the AMD Developer Hackathon × lablab.ai (2026):
+- Nándorfi Vince
+- Vitai Tamás
+- Murcsik Gábor

README.md ADDED Viewed

	@@ -0,0 +1,168 @@

+---
+title: PaperHawk
+emoji: 🦅
+colorFrom: red
+colorTo: orange
+sdk: docker
+pinned: false
+license: mit
+short_description: Real-DI-Audit/14 rules/6 anti-halluc/LangGraph/Qwen/MI300X
+---
+<p align="center">
+  <img src="paperhawk.jpeg" alt="PaperHawk" width="900">
+</p>
+<h1 align="center">PaperHawk</h1>
+<p align="center">
+  <strong>Agentic document intelligence on AMD MI300X</strong><br>
+  Multi-document due diligence with deterministic domain checks and agentic LLM workflows.
+</p>
+<p align="center">
+  <a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT"></a>
+  <img src="https://img.shields.io/badge/python-3.12+-blue.svg" alt="Python">
+  <img src="https://img.shields.io/badge/LangGraph-0.6-green.svg" alt="LangGraph">
+  <img src="https://img.shields.io/badge/AMD-MI300X-red.svg" alt="AMD MI300X">
+</p>
+<p align="center">
+  Built for the <a href="https://lablab.ai/event/amd-developer-hackathon"><strong>AMD Developer Hackathon × lablab.ai</strong></a> (May 2026).
+</p>
+---
+## What is this?
+A working AI system that ingests multiple business documents (invoices,
+contracts, delivery notes, purchase orders, financial reports) and:
+- **Extracts structured data** with anti-hallucination layers (5+1 stack)
+- **Detects risks** via 14 deterministic domain rules + LLM ensemble
+- **Cross-references documents** (three-way matching for audits, M&A DD)
+- **Answers questions** via 5-tool agentic chat with source citations
+- **Generates audit-ready reports** (DOCX export, JSON API)
+This is **not "just another RAG"** — it is a multi-agent orchestration of
+specialist nodes (audit / legal / compliance / financial) over a deterministic
++ LLM ensemble, with explicit anti-hallucination layers.
+## Stack
+| Layer | Technology |
+|-------|------------|
+| Orchestration | **LangGraph 0.6** (4 graphs, 6 subgraphs, async-first, AsyncSqliteSaver) |
+| LLM | **Qwen 2.5 14B Instruct** via vLLM on **AMD Instinct MI300X** |
+| Embedding | **BAAI/bge-m3** (multilingual, 1024 dim, sentence-transformers) |
+| Vector store | **ChromaDB + BM25** hybrid (Reciprocal Rank Fusion) |
+| UI | **Streamlit** (5 tabs) — deployable as a **Hugging Face Space** |
+| Testing | pytest + Playwright |
+## Architecture
+```
+                    ┌─────────────────────────────────┐
+                    │    Streamlit UI (5 tabs)        │
+                    └────────────┬────────────────────┘
+                                 │
+        ┌────────────────────────┼────────────────────────┐
+        │                        │                        │
+┌───────▼──────┐        ┌────────▼────────┐       ┌──────▼──────┐
+│ pipeline     │        │  chat_graph     │       │  dd_graph   │
+│ _graph       │        │  (5 tools, 17   │       │  (multi-    │
+│ (6 subgraphs)│        │  rule prompt)   │       │  agent      │
+└───────┬──────┘        └─────────────────┘       │  super-     │
+        │                                         │  visor)     │
+        │  ┌─────────────────────────┐            └─────────────┘
+        ├──▶ ingest_subgraph         │
+        ├──▶ classify (per-doc)      │
+        ├──▶ extract_subgraph        │
+        ├──▶ rag_index_subgraph      │
+        ├──▶ compare_node (3-way)    │
+        └──▶ risk_subgraph           │
+             ├─ basic risk           │
+             ├─ 14 domain checks     │
+             ├─ LLM risk + 3 filters │
+             ├─ plausibility         │
+             └─ duplicate (ISA 240)  │
+```
+See [ARCHITECTURE.md](ARCHITECTURE.md) for the full architecture.
+## Quick start
+### 1. Local dev (Ollama or dummy mode)
+```bash
+git clone https://github.com/<YOUR_GH_USER>/document-intelligence-agentic-langgraph-amd
+cd document-intelligence-agentic-langgraph-amd
+python -m venv .venv && source .venv/bin/activate
+pip install -r requirements.txt
+cp .env.example .env
+# Edit .env: set LLM_PROFILE=dummy (no LLM) or LLM_PROFILE=ollama (Qwen 7B local)
+streamlit run app/main.py
+```
+### 2. Production (Qwen on AMD MI300X via vLLM)
+```bash
+# On the AMD Developer Cloud MI300X instance:
+docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video \
+    --ipc=host --shm-size 16g \
+    -p 8000:8000 \
+    -e VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct \
+    rocm/vllm:latest \
+    sh -c 'vllm serve $VLLM_MODEL --host 0.0.0.0 --port 8000 \
+        --tensor-parallel-size 1 --max-model-len 32768'
+# On your machine (.env):
+LLM_PROFILE=vllm
+VLLM_BASE_URL=http://<mi300x-public-ip>:8000/v1
+VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct
+streamlit run app/main.py
+```
+See [docs/qwen-vllm-deployment.md](docs/qwen-vllm-deployment.md) for the full
+walkthrough including cost monitoring and a Plan B (Ollama fallback).
+### 3. Hugging Face Space deploy
+See [docs/hf-space-deployment.md](docs/hf-space-deployment.md).
+## Demo packages
+Three pre-built demo packages bundled in `test_data/`:
+- **Audit Demo** — 3 invoices from the same supplier; the March one is 50%
+  pricier (over-billing pattern detected by the package-level analyzer).
+- **DD Demo** — NDA + service agreement + amendment in an acquisition
+  scenario (change-of-control + auto-renewal red flags).
+- **Compliance Demo** — 2 contracts; one is missing the GDPR Article 28 clause.
+Click the corresponding button on the **Upload** tab.
+## Documentation
+- [ARCHITECTURE.md](ARCHITECTURE.md) — architecture overview (English)
+- [docs/qwen-vllm-deployment.md](docs/qwen-vllm-deployment.md) — Qwen on AMD MI300X (English)
+- [docs/hf-space-deployment.md](docs/hf-space-deployment.md) — Hugging Face Space deploy (English)
+- [docs/LANGGRAPH_ONBOARDING.md](docs/LANGGRAPH_ONBOARDING.md) — onboarding for contributors (English)
+- [CLAUDE.md](CLAUDE.md) — project-level Claude Code instructions
+- [NOTICE.md](NOTICE.md) — author intent (non-binding)
+- `docs/Teljes-rendszer-attekintes-langgraph_HU.md` — legacy Hungarian system overview (reference)
+- `docs/MUKODESI_LEIRAS_HU.md` — legacy Hungarian operations manual (reference)
+## Built by
+**Team CsimpiCsirkek** for the AMD Developer Hackathon × lablab.ai (2026):
+- Nándorfi Vince
+- Vitai Tamás
+- Murcsik Gábor
+## License
+**MIT** — see [LICENSE](LICENSE).

app/__init__.py ADDED Viewed

File without changes

app/async_runtime.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""AsyncRuntime — long-lived background event loop for the Streamlit thread.
+PROBLEM:
+  * Streamlit runs a synchronous event loop (uvloop) that CANNOT be patched
+    with ``nest_asyncio``.
+  * LangGraph (and every async resource: ChromaDB connections, the LLM HTTP
+    session, AsyncSqliteSaver checkpointers) assumes a LONG-LIVED async context.
+  * Opening a new loop per invoke means async-bound resources never amortize:
+    every chat message rebuilds the SQLite pool, the Chroma client, and the
+    HTTP session.
+SOLUTION:
+  * A DEDICATED background thread that runs a single ``asyncio.new_event_loop()``
+    with ``run_forever`` for the entire app lifetime.
+  * The Streamlit thread (sync) hands coroutines to the background loop via
+    ``asyncio.run_coroutine_threadsafe(coro, loop)``; the returned Future
+    blocks the Streamlit thread until the result is ready.
+  * Singleton — started once, same instance reused.
+This is the classic "embedded async runtime" pattern (see LangChain,
+JupyterLab, ipykernel implementations). Robust and scales well.
+"""
+from __future__ import annotations
+import asyncio
+import atexit
+import threading
+from collections.abc import AsyncIterator
+from typing import Any, TypeVar
+T = TypeVar("T")
+class AsyncRuntime:
+    """Singleton background event loop. Thread-safe submit + stream API."""
+    _instance: AsyncRuntime | None = None
+    _lock = threading.Lock()
+    def __init__(self) -> None:
+        # Lazy start: the loop and thread start on the first submit()
+        self._loop: asyncio.AbstractEventLoop | None = None
+        self._thread: threading.Thread | None = None
+        self._started = threading.Event()
+    @classmethod
+    def get(cls) -> AsyncRuntime:
+        """Singleton accessor — created on first call, same instance after."""
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = AsyncRuntime()
+        return cls._instance
+    def _ensure_started(self) -> None:
+        """Start the background loop if not already running."""
+        if self._started.is_set():
+            return
+        with self._lock:
+            if self._started.is_set():
+                return
+            ready = threading.Event()
+            def _run() -> None:
+                # Inside the thread, create the loop and run it
+                self._loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(self._loop)
+                ready.set()
+                try:
+                    self._loop.run_forever()
+                finally:
+                    self._loop.close()
+            self._thread = threading.Thread(
+                target=_run,
+                name="async-runtime",
+                daemon=True,  # auto-stops when the app exits
+            )
+            self._thread.start()
+            ready.wait(timeout=5.0)  # wait until the loop is actually running
+            self._started.set()
+            # Cleanup at app shutdown
+            atexit.register(self._shutdown)
+    def submit(self, coro) -> Any:
+        """Submit a coroutine to the background loop, block on the result.
+        This is the Streamlit thread's main API: synchronous-looking, but the
+        coroutine runs on a long-lived loop so async resources (Chroma,
+        SqliteSaver, embeddings) stay PERSISTENT across calls.
+        """
+        self._ensure_started()
+        assert self._loop is not None
+        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        return future.result()
+    def submit_iter(self, async_gen: AsyncIterator[T]):
+        """Async generator → sync iterator wrapper for Streamlit st.write_stream.
+        The Streamlit thread iterates over the (token-)stream from the astream call.
+        """
+        self._ensure_started()
+        assert self._loop is not None
+        # We drive the async generator on the background loop by submitting
+        # ``__anext__()`` calls one at a time.
+        while True:
+            try:
+                future = asyncio.run_coroutine_threadsafe(
+                    async_gen.__anext__(), self._loop
+                )
+                yield future.result()
+            except StopAsyncIteration:
+                break
+    def _shutdown(self) -> None:
+        """atexit handler — gracefully stop the background loop."""
+        if self._loop is None or not self._started.is_set():
+            return
+        try:
+            self._loop.call_soon_threadsafe(self._loop.stop)
+        except Exception:
+            pass

app/main.py ADDED Viewed

	@@ -0,0 +1,931 @@

+"""Streamlit UI — Agentic Document Intelligence (LangGraph).
+5 tabs: Upload, Results, Chat, DD Assistant, Report.
+LangGraph is async-first; the Streamlit (uvloop) compatibility is handled by
+the ``app.async_runtime.AsyncRuntime`` singleton with a long-lived background
+event loop. The caller invokes via the synchronous ``run_async()`` wrapper.
+"""
+from __future__ import annotations
+# Streamlit runs app/main.py directly so the project root is added explicitly
+# to sys.path; that lets ``from app.streaming`` and ``from config`` resolve.
+import sys
+from pathlib import Path
+_PROJECT_ROOT = Path(__file__).resolve().parent.parent
+if str(_PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(_PROJECT_ROOT))
+import json  # noqa: E402
+import traceback  # noqa: E402
+import uuid  # noqa: E402
+from collections import defaultdict  # noqa: E402
+from datetime import datetime  # noqa: E402
+import streamlit as st  # noqa: E402
+from langchain_core.messages import HumanMessage  # noqa: E402
+from app.streaming import run_async, run_with_progress  # noqa: E402
+from config import settings  # noqa: E402
+from graph.chat_graph import build_chat_graph  # noqa: E402
+from graph.dd_graph import build_dd_graph  # noqa: E402
+from graph.package_insights_graph import build_package_insights_graph  # noqa: E402
+from graph.pipeline_graph import build_pipeline_graph  # noqa: E402
+from providers import get_chat_model, get_dummy_handle  # noqa: E402
+from store import HybridStore  # noqa: E402
+from tools import ChatToolContext  # noqa: E402
+from utils.docx_export import build_docx_sync  # noqa: E402
+# ---------------------------------------------------------------------------
+# Page config
+# ---------------------------------------------------------------------------
+st.set_page_config(
+    page_title="Agentic Document Intelligence — LangGraph",
+    layout="wide",
+)
+# ---------------------------------------------------------------------------
+# Session state init
+# ---------------------------------------------------------------------------
+def _init_session_state() -> None:
+    if "thread_id" not in st.session_state:
+        st.session_state.thread_id = f"st_{uuid.uuid4().hex[:12]}"
+    if "store" not in st.session_state:
+        st.session_state.store = HybridStore()
+    if "tool_context" not in st.session_state:
+        st.session_state.tool_context = ChatToolContext(store=st.session_state.store)
+    if "pipeline_state" not in st.session_state:
+        st.session_state.pipeline_state = None
+    if "dd_contracts_summary" not in st.session_state:
+        st.session_state.dd_contracts_summary = []
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    if "docx_bytes" not in st.session_state:
+        st.session_state.docx_bytes = None
+_init_session_state()
+# ---------------------------------------------------------------------------
+# Sidebar — 3 buttons (Reset, Clear chat history, Clear vector store)
+# ---------------------------------------------------------------------------
+with st.sidebar:
+    st.header("Settings")
+    st.info(f"LLM Provider: **{settings.llm_profile}**")
+    if st.session_state.pipeline_state:
+        n_docs = len(st.session_state.pipeline_state.get("documents") or [])
+        st.success(f"Documents processed: {n_docs}")
+        st.metric("Indexed chunks", st.session_state.store.chunk_count)
+    st.divider()
+    if st.button(
+        "Full reset",
+        help="Clear everything: uploaded documents, vector store, chat history, results.",
+    ):
+        for key in list(st.session_state.keys()):
+            del st.session_state[key]
+        st.rerun()
+    if st.button(
+        "Clear chat history",
+        help="Only clears the chat conversation. Documents and results are kept.",
+    ):
+        st.session_state.chat_history = []
+        st.rerun()
+    if st.button(
+        "Clear vector store",
+        help="Clears the search index (ChromaDB). Chat will not be able to answer "
+             "until you upload documents again. Results are preserved.",
+    ):
+        try:
+            run_async(st.session_state.store.clear())
+        except Exception:
+            # Fallback: new instance if clear() fails
+            st.session_state.store = HybridStore()
+            st.session_state.tool_context = ChatToolContext(store=st.session_state.store)
+        st.session_state.chat_history = []
+        st.rerun()
+# ---------------------------------------------------------------------------
+# Title
+# ---------------------------------------------------------------------------
+st.title("Agentic Document Intelligence Platform")
+st.caption("Multi-document cross-analysis for audit and legal use")
+# ---------------------------------------------------------------------------
+# 5 Tabs
+# ---------------------------------------------------------------------------
+tab_upload, tab_results, tab_chat, tab_dd, tab_report = st.tabs(
+    ["Upload", "Results", "Chat", "DD Assistant", "Report"]
+)
+# =============================================================================
+# Demo package handler
+# =============================================================================
+DEMO_ROOT = _PROJECT_ROOT / "test_data" / "demo_packages"
+DEMO_PACKAGES = [
+    {
+        "key": "audit_demo",
+        "label": "Audit Demo",
+        "package_type": "audit",
+        "description": "3 invoices from the same supplier; the March one is 50% pricier.",
+    },
+    {
+        "key": "dd_demo",
+        "label": "Due Diligence Demo",
+        "package_type": "dd",
+        "description": "NDA + service agreement + amendment in an acquisition scenario.",
+    },
+    {
+        "key": "compliance_demo",
+        "label": "Compliance Demo",
+        "package_type": "compliance",
+        "description": "2 contracts; one is missing the GDPR Article 28 clause.",
+    },
+]
+def _process_demo_package(pkg: dict) -> None:
+    """Process a demo package end-to-end: pipeline + package_insights + (optional) DD."""
+    pkg_dir = DEMO_ROOT / pkg["key"]
+    if not pkg_dir.exists():
+        # Backward-compat: fall back to old HU directory name
+        legacy = _PROJECT_ROOT / "test_data" / "demo_csomagok" / pkg["key"]
+        if legacy.exists():
+            pkg_dir = legacy
+        else:
+            st.error(f"Demo package directory not found: {pkg_dir}")
+            return
+    pdf_files = sorted(pkg_dir.glob("*.pdf"))
+    if not pdf_files:
+        st.error(f"No PDFs in the {pkg['label']} package: {pkg_dir}")
+        return
+    demo_files = [(p.name, p.read_bytes()) for p in pdf_files]
+    if settings.is_dummy:
+        get_dummy_handle().set_docs_hint([fn for fn, _ in demo_files])
+    try:
+        # 1) Pipeline with progress bar
+        pipeline = build_pipeline_graph(st.session_state.store, llm=get_chat_model())
+        progress_bar = st.progress(0.0, text=f"{pkg['label']}: starting pipeline...")
+        total_steps = max(len(demo_files) * 4 + 6, 12)
+        def _on_pipeline_progress(step: int, total: int, label: str) -> None:
+            progress_bar.progress(
+                min(step / total, 1.0),
+                text=f"[{step}/{total}] {label}",
+            )
+        state = run_with_progress(
+            pipeline,
+            {"files": demo_files},
+            on_progress=_on_pipeline_progress,
+            total_steps=total_steps,
+        )
+        progress_bar.progress(1.0, text="Pipeline done — running package-level analysis...")
+        # 2) Package insights — opt-in, runs only on demo buttons
+        pkg_graph = build_package_insights_graph(llm=get_chat_model())
+        pkg_state = run_async(pkg_graph.ainvoke({
+            "documents": state.get("documents") or [],
+            "package_type": pkg["package_type"],
+        }))
+        insights = pkg_state.get("final_insights")
+        if insights is not None:
+            state["package_insights"] = insights
+        # 3) DD report — only if the package contains contracts
+        contracts = [
+            d for d in (state.get("documents") or [])
+            if d.classification and d.classification.doc_type == "contract"
+        ]
+        if contracts:
+            progress_bar.progress(1.0, text="DD analysis...")
+            dd_graph = build_dd_graph(llm=get_chat_model())
+            dd_state = run_async(dd_graph.ainvoke({"documents": contracts}))
+            state["dd_report"] = dd_state.get("dd_report")
+            st.session_state.dd_contracts_summary = dd_state.get("contracts") or []
+        progress_bar.progress(1.0, text="Processing complete!")
+        st.session_state.pipeline_state = state
+        for pd in state.get("documents") or []:
+            st.session_state.tool_context.add_document(pd)
+        n_docs = len(state.get("documents") or [])
+        n_risks = len(state.get("risks") or [])
+        elapsed = state.get("processing_seconds", 0)
+        st.success(
+            f"{pkg['label']} loaded: {n_docs} documents in {elapsed:.1f} sec, "
+            f"{n_risks} risks identified. Open the Results / DD Assistant tab."
+        )
+        st.rerun()
+    except Exception as exc:
+        st.error(f"Error processing the demo package: {exc}")
+        with st.expander("Developer details (full traceback)"):
+            st.code(traceback.format_exc(), language="python")
+# =============================================================================
+# TAB 1: Upload
+# =============================================================================
+with tab_upload:
+    st.subheader("Upload documents")
+    if st.session_state.pipeline_state:
+        n_docs = len(st.session_state.pipeline_state.get("documents") or [])
+        st.info(
+            f"Currently {n_docs} documents are processed. "
+            "Open the Results tab, or upload more files."
+        )
+    uploaded = st.file_uploader(
+        "Drop documents here (PDF, DOCX, image, or text)",
+        type=["pdf", "docx", "png", "jpg", "jpeg", "txt"],
+        accept_multiple_files=True,
+    )
+    if uploaded and st.button("Start processing", type="primary"):
+        files = [(f.name, f.read()) for f in uploaded]
+        if settings.is_dummy:
+            get_dummy_handle().set_docs_hint([fn for fn, _ in files])
+        try:
+            graph = build_pipeline_graph(st.session_state.store, llm=get_chat_model())
+            progress_bar = st.progress(0.0, text="Starting...")
+            total_steps = max(len(files) * 4 + 6, 12)
+            def _on_progress(step: int, total: int, label: str) -> None:
+                progress_bar.progress(
+                    min(step / total, 1.0),
+                    text=f"[{step}/{total}] {label}",
+                )
+            state = run_with_progress(
+                graph,
+                {"files": files},
+                on_progress=_on_progress,
+                total_steps=total_steps,
+            )
+            progress_bar.progress(1.0, text="Processing complete!")
+            st.session_state.pipeline_state = state
+            st.session_state.dd_contracts_summary = []  # reset DD on manual flow
+            for pd in state.get("documents") or []:
+                st.session_state.tool_context.add_document(pd)
+            n_docs = len(state.get("documents") or [])
+            n_risks = len(state.get("risks") or [])
+            elapsed = state.get("processing_seconds", 0)
+            st.success(
+                f"Processed {n_docs} documents in {elapsed:.1f} sec; "
+                f"{n_risks} risks identified."
+            )
+            st.rerun()
+        except Exception as exc:
+            st.error(f"Processing error: {exc}")
+            with st.expander("Developer details (full traceback)"):
+                st.code(traceback.format_exc(), language="python")
+    st.divider()
+    st.subheader("Quick demo")
+    st.caption(
+        "Pre-built scenarios for the pitch. One click loads and processes the "
+        "matching documents (pipeline + package-level analysis + DD if there are contracts)."
+    )
+    cols = st.columns(len(DEMO_PACKAGES))
+    for col, pkg in zip(cols, DEMO_PACKAGES, strict=False):
+        with col:
+            st.markdown(f"**{pkg['label']}**")
+            st.caption(pkg["description"])
+            if st.button("Run", key=f"demo_{pkg['key']}"):
+                _process_demo_package(pkg)
+# =============================================================================
+# TAB 2: Results
+# =============================================================================
+with tab_results:
+    state = st.session_state.pipeline_state
+    if state is None:
+        st.info("Upload documents on the Upload tab to see results.")
+    else:
+        report = state.get("report") or {}
+        perf = report.get("performance") or {}
+        # 4 metrics
+        c1, c2, c3, c4 = st.columns(4)
+        with c1:
+            st.metric("Processing time", f"{perf.get('processing_seconds', 0):.1f} sec")
+        with c2:
+            st.metric("Documents", perf.get("documents", 0))
+        with c3:
+            st.metric("Manual estimate", f"{perf.get('manual_estimate_minutes', 0)} min")
+        with c4:
+            st.metric("Speedup", f"{perf.get('speedup', 0):.1f}x")
+        st.divider()
+        st.subheader("Classification")
+        from domain_checks import get_evidence_score
+        for pd_doc in state.get("documents") or []:
+            if pd_doc.ingested is None:
+                continue
+            cls = pd_doc.classification
+            col1, col2, col3 = st.columns([3, 2, 1])
+            with col1:
+                st.write(f"**{pd_doc.ingested.file_name}**")
+            with col2:
+                doc_type_display = cls.doc_type_display if cls else "Other"
+                st.write(f"{doc_type_display}")
+            with col3:
+                conf = cls.confidence if cls else 0.0
+                doc_type = cls.doc_type if cls else "other"
+                ev_score = get_evidence_score(doc_type)
+                label = "confident" if conf > 0.8 else "uncertain"
+                st.write(f"{label} ({conf:.0%}) | ISA 500: {ev_score}/10")
+        st.divider()
+        st.subheader("Extracted data")
+        for pd in state.get("documents") or []:
+            file_name = pd.ingested.file_name if pd.ingested else "?"
+            doc_type_display = (
+                pd.classification.doc_type_display if pd.classification else "Other"
+            )
+            with st.expander(f"{file_name} — {doc_type_display}"):
+                if pd.extracted is None:
+                    st.warning("No extracted data.")
+                    continue
+                # Confidence indicators
+                confidence = pd.extracted.confidence or {}
+                if confidence:
+                    low_fields = [k for k, v in confidence.items() if v == "low"]
+                    medium_fields = [k for k, v in confidence.items() if v == "medium"]
+                    if low_fields:
+                        st.warning(
+                            f"Low-confidence fields (verify in source): {', '.join(low_fields)}"
+                        )
+                    if medium_fields:
+                        st.info(f"Fields needing interpretation: {', '.join(medium_fields)}")
+                # Quotes
+                quotes = pd.extracted.quotes or []
+                if quotes:
+                    with st.expander("Source quotes (anti-hallucination)"):
+                        for q in quotes:
+                            st.caption(f'"{q}"')
+                display_data = {
+                    k: v for k, v in pd.extracted.raw.items()
+                    if k not in ("_source", "_quotes", "_confidence")
+                }
+                st.json(display_data)
+        # Cross-document checks
+        comp = state.get("comparison")
+        if comp:
+            st.divider()
+            st.subheader("Cross-document checks (three-way matching)")
+            ok = sum(1 for m in (comp.matches or []) if m.get("severity") == "ok")
+            warn = sum(1 for m in (comp.matches or []) if m.get("severity") == "warning")
+            crit = sum(1 for m in (comp.matches or []) if m.get("severity") == "critical")
+            miss = sum(1 for m in (comp.matches or []) if m.get("severity") == "missing")
+            mc1, mc2, mc3, mc4 = st.columns(4)
+            mc1.metric("OK", ok)
+            mc2.metric("Warning", warn)
+            mc3.metric("Critical", crit)
+            mc4.metric("Missing", miss)
+            for m in (comp.matches or []):
+                sev = m.get("severity", "ok")
+                msg = m.get("message", "") or m.get("field", "")
+                if sev == "critical":
+                    st.error(f"CRITICAL: {msg}")
+                elif sev == "warning":
+                    st.warning(f"WARNING: {msg}")
+                elif sev == "missing":
+                    st.info(f"MISSING: {msg}")
+            if comp.summary:
+                st.caption(comp.summary)
+        # Risks — split rule-based vs AI observations
+        risks = state.get("risks") or []
+        basic = [r for r in risks if r.kind != "llm_analysis" and r.severity != "info"]
+        info_r = [r for r in risks if r.severity == "info"]
+        ai_r = [r for r in risks if r.kind == "llm_analysis"]
+        if basic or info_r or ai_r:
+            st.divider()
+        if basic:
+            st.subheader("Risks (rule-based)")
+            st.caption("Deterministic checks — math, logic, plausibility, regulations.")
+            by_sev = defaultdict(list)
+            for r in basic:
+                by_sev[r.severity].append(r)
+            for sev_label, sev_key in (("HIGH", "high"), ("MEDIUM", "medium"),
+                                       ("LOW", "low")):
+                items = by_sev.get(sev_key, [])
+                if not items:
+                    continue
+                for r in items:
+                    label = f"**{sev_label}: {r.description}**"
+                    if r.rationale:
+                        label += f"\n\n*Rationale:* {r.rationale}"
+                    if r.regulation:
+                        label += f"\n\n*Regulation:* {r.regulation}"
+                    if sev_key == "high":
+                        st.error(label)
+                    elif sev_key == "medium":
+                        st.warning(label)
+                    else:
+                        st.info(label)
+        if ai_r:
+            st.subheader("AI observations")
+            st.caption(
+                "LLM-based analysis — contextual patterns, unusual relationships. "
+                "Verify against the source before making decisions."
+            )
+            for r in ai_r:
+                label = r.description
+                if r.rationale:
+                    label += f"\n\n*Rationale:* {r.rationale}"
+                if r.severity == "high":
+                    st.error(f"**HIGH:** {label}")
+                elif r.severity == "medium":
+                    st.warning(f"**MEDIUM:** {label}")
+                else:
+                    st.info(f"**LOW:** {label}")
+        if info_r and not basic and not ai_r:
+            st.subheader("Information")
+            for r in info_r:
+                st.info(r.description)
+        if not risks:
+            st.divider()
+            st.success("No risk indicators found.")
+        # Package-level analysis — only on demo packages (opt-in)
+        insights = state.get("package_insights")
+        if insights is not None:
+            st.divider()
+            st.subheader("Package-level analysis")
+            st.caption(
+                "Beyond the automatic pipeline, the AI also reviews the full document "
+                "package together from a cross-doc perspective. It looks for patterns "
+                "visible only when the documents are reviewed together."
+            )
+            if insights.executive_summary:
+                st.markdown("**Executive summary**")
+                st.write(insights.executive_summary)
+            if insights.findings:
+                st.markdown("**Package-level risks**")
+                for f in insights.findings:
+                    sev = (f.get("severity") or f.get("sulyossag") or "low").lower()
+                    description = f.get("description") or f.get("leiras", "")
+                    rationale = f.get("rationale") or f.get("indoklas", "")
+                    affected = f.get("affected_documents") or f.get("erinto_dokumentumok") or []
+                    label = description
+                    if rationale:
+                        label += f"\n\n*Rationale:* {rationale}"
+                    if affected:
+                        label += f"\n\n*Affected documents:* {', '.join(affected)}"
+                    if sev in ("high", "magas"):
+                        st.error(f"**HIGH:** {label}")
+                    elif sev in ("medium", "kozepes", "közepes"):
+                        st.warning(f"**MEDIUM:** {label}")
+                    else:
+                        st.info(f"**LOW:** {label}")
+            if insights.key_observations:
+                st.markdown("**Key observations**")
+                for obs in insights.key_observations:
+                    st.write(f"- {obs}")
+# =============================================================================
+# TAB 3: Chat
+# =============================================================================
+with tab_chat:
+    st.subheader("Ask about your documents")
+    if st.session_state.pipeline_state is None:
+        st.info("Upload and process documents to use the chat.")
+    else:
+        st.caption(
+            "Agentic mode — the AI uses tools to answer "
+            "(search, extraction, comparison, validation)."
+        )
+        # History
+        for msg in st.session_state.chat_history:
+            with st.chat_message(msg["role"]):
+                st.markdown(msg["content"])
+                if msg.get("sources"):
+                    with st.expander("Sources"):
+                        for src in msg["sources"]:
+                            st.write(f"- {src}")
+        if prompt := st.chat_input("Ask anything about the uploaded documents..."):
+            st.session_state.chat_history.append({"role": "user", "content": prompt})
+            with st.chat_message("user"):
+                st.markdown(prompt)
+            llm = get_chat_model()
+            chat_graph = build_chat_graph(llm, st.session_state.tool_context)
+            with st.chat_message("assistant"):
+                with st.spinner("Analyzing..."):
+                    try:
+                        result_state = run_async(chat_graph.ainvoke({
+                            "messages": [HumanMessage(content=prompt)],
+                        }))
+                        answer = result_state.get("final_answer", "(empty answer)")
+                        sources = result_state.get("sources_cited") or []
+                    except Exception as exc:
+                        answer = f"Chat error: {exc}"
+                        sources = []
+                st.markdown(answer)
+                if sources:
+                    with st.expander("Sources"):
+                        for src in sources:
+                            st.write(f"- {src}")
+            st.session_state.chat_history.append({
+                "role": "assistant",
+                "content": answer,
+                "sources": sources,
+            })
+# =============================================================================
+# TAB 4: DD Assistant
+# =============================================================================
+with tab_dd:
+    st.subheader("Due Diligence assistant")
+    st.caption(
+        "Contract portfolio analysis from an acquisition / DD perspective: "
+        "near-term expirations, change-of-control clauses, GDPR risks, monthly "
+        "obligations and critical red flags. Multi-agent supervisor "
+        "(audit + legal + compliance + financial)."
+    )
+    state = st.session_state.pipeline_state
+    if state is None:
+        st.info("Upload and process contracts to start a DD analysis.")
+    else:
+        contracts = [
+            d for d in (state.get("documents") or [])
+            if d.classification and d.classification.doc_type == "contract"
+        ]
+        if not contracts:
+            st.warning(
+                f"Of the {len(state.get('documents') or [])} processed documents "
+                "none are contracts. The DD assistant operates on contract-type "
+                "documents only. Try the demo package."
+            )
+        else:
+            st.success(f"{len(contracts)} contracts in the portfolio.")
+            if st.button("Start DD analysis", type="primary"):
+                try:
+                    dd_graph = build_dd_graph(llm=get_chat_model())
+                    with st.spinner("Multi-agent supervisor running..."):
+                        dd_state = run_async(dd_graph.ainvoke({"documents": contracts}))
+                    state["dd_report"] = dd_state.get("dd_report")
+                    st.session_state.dd_contracts_summary = dd_state.get("contracts") or []
+                    st.session_state.pipeline_state = state
+                    st.rerun()
+                except Exception as exc:
+                    st.error(f"DD analysis error: {exc}")
+                    with st.expander("Developer details (full traceback)"):
+                        st.code(traceback.format_exc(), language="python")
+            report = state.get("dd_report")
+            contracts_summary = st.session_state.dd_contracts_summary
+            if report is not None:
+                st.divider()
+                st.subheader("Executive summary")
+                st.write(report.executive_summary)
+                mc1, mc2, mc3, mc4 = st.columns(4)
+                mc1.metric("Contracts", report.contract_count)
+                mc2.metric("High-risk", len(report.high_risk_contracts))
+                mc3.metric("Expiring soon (12 mo)", len(report.expiring_soon))
+                mc4.metric("Top red flags", len(report.top_red_flags))
+                if report.total_monthly_obligations:
+                    st.subheader("Monthly obligations (estimated)")
+                    obl_cols = st.columns(min(len(report.total_monthly_obligations), 4))
+                    for col, (cur, amt) in zip(
+                        obl_cols, report.total_monthly_obligations.items(), strict=False
+                    ):
+                        col.metric(cur, f"{amt:,.0f}")
+                if report.top_red_flags:
+                    st.subheader("Top red flags")
+                    for i, flag in enumerate(report.top_red_flags, start=1):
+                        st.error(f"{i}. {flag}")
+                if report.expiring_soon:
+                    st.subheader("Expiring soon (within 12 months)")
+                    for fname in report.expiring_soon:
+                        st.warning(f"- {fname}")
+                if contracts_summary:
+                    st.subheader("Contract details")
+                    for c in contracts_summary:
+                        with st.expander(
+                            f"{c.file_name} — {c.risk_level.upper()} risk"
+                        ):
+                            st.write(f"**Type:** {c.contract_type}")
+                            if c.parties:
+                                st.write(f"**Parties:** {', '.join(c.parties)}")
+                            if c.effective_date or c.expiry_date:
+                                st.write(
+                                    f"**Validity:** {c.effective_date or '?'} — "
+                                    f"{c.expiry_date or '?'}"
+                                )
+                            if c.total_value:
+                                st.write(
+                                    f"**Value:** {c.total_value:,.0f} {c.currency}"
+                                )
+                            if c.monthly_fee:
+                                st.write(
+                                    f"**Monthly fee:** {c.monthly_fee:,.0f} {c.monthly_fee_currency}"
+                                )
+                            if c.risk_elements:
+                                st.write("**Risk elements:**")
+                                for k in c.risk_elements:
+                                    st.write(f"- {k}")
+                            if c.red_flags:
+                                st.write("**Red flags:**")
+                                for p in c.red_flags:
+                                    st.write(f"- {p}")
+# =============================================================================
+# TAB 5: Report
+# =============================================================================
+with tab_report:
+    state = st.session_state.pipeline_state
+    report = (state or {}).get("report") or {} if state else {}
+    if not state or not report:
+        st.info("Upload and process documents to generate a report.")
+    else:
+        st.subheader("Report")
+        if report.get("generated_at"):
+            st.write(f"**Generated at:** {report['generated_at']}")
+        st.write(f"**Document count:** {report.get('document_count', 0)}")
+        # Executive summary (LLM)
+        if report.get("executive_summary"):
+            st.subheader("Executive summary")
+            st.write(report["executive_summary"])
+        # Cross-document section
+        comp = report.get("comparison")
+        if comp:
+            st.subheader("Cross-document checks")
+            matches = comp.get("matches") or []
+            ok = sum(1 for m in matches if m.get("severity") == "ok")
+            warn = sum(1 for m in matches if m.get("severity") == "warning")
+            crit = sum(1 for m in matches if m.get("severity") == "critical")
+            mc1, mc2, mc3 = st.columns(3)
+            mc1.metric("OK", ok)
+            mc2.metric("Warning", warn)
+            mc3.metric("Critical", crit)
+        # Risks split — rule-based vs AI observations
+        risk_buckets = report.get("risks") or {}
+        all_risks = (
+            (risk_buckets.get("high") or [])
+            + (risk_buckets.get("medium") or [])
+            + (risk_buckets.get("low") or [])
+            + (risk_buckets.get("info") or [])
+        )
+        if all_risks:
+            basic_r = [r for r in all_risks if r.get("kind") != "llm_analysis"]
+            ai_r = [r for r in all_risks if r.get("kind") == "llm_analysis"]
+            if basic_r:
+                st.subheader("Risks (rule-based)")
+                for r in basic_r:
+                    sev = r.get("severity", "low")
+                    description = r.get("description", "")
+                    if sev == "high":
+                        st.error(f"HIGH: {description}")
+                    elif sev == "medium":
+                        st.warning(f"MEDIUM: {description}")
+                    elif sev == "info":
+                        st.info(f"INFO: {description}")
+                    else:
+                        st.info(f"LOW: {description}")
+            if ai_r:
+                st.subheader("AI observations")
+                st.caption("Verify against the source before making decisions.")
+                for r in ai_r:
+                    sev = r.get("severity", "low")
+                    description = r.get("description", "")
+                    rationale = r.get("rationale", "")
+                    label = description if not rationale else f"{description} — {rationale}"
+                    if sev == "high":
+                        st.error(f"HIGH: {label}")
+                    elif sev == "medium":
+                        st.warning(f"MEDIUM: {label}")
+                    else:
+                        st.info(f"LOW: {label}")
+        # Package-level analysis section
+        package_section = report.get("package_insights")
+        if package_section:
+            st.divider()
+            st.subheader("Package-level analysis")
+            st.caption(
+                "Beyond the automatic pipeline, the AI reviewed the full document "
+                "package as a whole from a cross-doc perspective."
+            )
+            if package_section.get("executive_summary"):
+                st.markdown("**Executive summary**")
+                st.write(package_section["executive_summary"])
+            package_findings = package_section.get("findings") or []
+            if package_findings:
+                st.markdown("**Package-level risks**")
+                for f in package_findings:
+                    sev = (f.get("severity") or f.get("sulyossag") or "low").lower()
+                    description = f.get("description") or f.get("leiras", "")
+                    rationale = f.get("rationale") or f.get("indoklas", "")
+                    affected = f.get("affected_documents") or f.get("erinto_dokumentumok") or []
+                    label = description
+                    if rationale:
+                        label += f"\n\n*Rationale:* {rationale}"
+                    if affected:
+                        label += f"\n\n*Affected documents:* {', '.join(affected)}"
+                    if sev in ("high", "magas"):
+                        st.error(f"**HIGH:** {label}")
+                    elif sev in ("medium", "kozepes", "közepes"):
+                        st.warning(f"**MEDIUM:** {label}")
+                    else:
+                        st.info(f"**LOW:** {label}")
+            observations = package_section.get("key_observations") or []
+            if observations:
+                st.markdown("**Key observations**")
+                for obs in observations:
+                    st.write(f"- {obs}")
+        # DD analysis section
+        dd_section = report.get("dd_analysis")
+        if dd_section:
+            st.divider()
+            st.subheader("Due Diligence analysis")
+            st.caption("Contract portfolio analysis from an acquisition / DD perspective.")
+            if dd_section.get("executive_summary"):
+                st.markdown("**Executive summary**")
+                st.write(dd_section["executive_summary"])
+            red_flags = dd_section.get("top_red_flags") or []
+            if red_flags:
+                st.markdown("**Top red flags**")
+                for flag in red_flags:
+                    st.error(flag)
+            contracts_list = dd_section.get("contracts") or []
+            if contracts_list:
+                st.markdown("**Per-contract risk level**")
+                for c in contracts_list:
+                    if hasattr(c, "model_dump"):
+                        c = c.model_dump()
+                    level = c.get("risk_level") or c.get("kockazati_szint", "low")
+                    file_name = c.get("file_name", "")
+                    contract_type = c.get("contract_type") or c.get("szerzodes_tipusa", "")
+                    parties = ", ".join(c.get("parties") or c.get("felek") or [])
+                    label = f"{file_name} ({contract_type})"
+                    if parties:
+                        label += f" — Parties: {parties}"
+                    if level in ("high", "magas"):
+                        st.error(f"**HIGH:** {label}")
+                    elif level in ("medium", "kozepes", "közepes"):
+                        st.warning(f"**MEDIUM:** {label}")
+                    else:
+                        st.info(f"**LOW:** {label}")
+            obligations = dd_section.get("total_monthly_obligations") or {}
+            if obligations:
+                st.markdown("**Monthly obligations (estimated)**")
+                obl_cols = st.columns(min(len(obligations), 4))
+                for col, (currency, amount) in zip(
+                    obl_cols, obligations.items(), strict=False
+                ):
+                    col.metric(currency, f"{amount:,.0f}")
+        # JSON view (debug)
+        st.divider()
+        with st.expander("JSON view (raw)"):
+            st.json(report)
+        # Export
+        st.subheader("Export")
+        col_json, col_docx = st.columns(2)
+        with col_json:
+            report_json = json.dumps(report, ensure_ascii=False, indent=2, default=str)
+            st.download_button(
+                label="Download report (JSON)",
+                data=report_json,
+                file_name=f"report_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
+                mime="application/json",
+                help="Raw data in JSON form — for machine processing or archival.",
+            )
+        with col_docx:
+            if st.button("Generate DOCX report", type="primary"):
+                try:
+                    docx_bytes = build_docx_sync(state)
+                    st.session_state.docx_bytes = docx_bytes
+                    st.success("DOCX ready — click the download button.")
+                except Exception as exc:
+                    st.error(f"DOCX generation error: {exc}")
+                    with st.expander("Developer details"):
+                        st.code(traceback.format_exc(), language="python")
+            if st.session_state.docx_bytes:
+                st.download_button(
+                    label="Download DOCX",
+                    data=st.session_state.docx_bytes,
+                    file_name=f"report_{datetime.now().strftime('%Y%m%d_%H%M')}.docx",
+                    mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                    help="Formatted Word document — for printing, presentations, or client handoff.",
+                )
+# ---------------------------------------------------------------------------
+# Applied standards footer (dynamic — only the actually triggered standards)
+# ---------------------------------------------------------------------------
+if st.session_state.pipeline_state:
+    _state = st.session_state.pipeline_state
+    _risks = _state.get("risks") or []
+    if _risks:
+        from domain_checks import get_applied_standards
+        _standards = get_applied_standards(_risks)
+        if _standards:
+            st.divider()
+            st.caption(
+                "**Applied standards and methods:** "
+                + " | ".join(_standards)
+            )
+# ---------------------------------------------------------------------------
+# Footer (MIT-licensed; see LICENSE)
+# ---------------------------------------------------------------------------
+st.divider()
+st.caption(
+    "Built by Team CsimpiCsirkek for the AMD Developer Hackathon × lablab.ai (2026). "
+    "MIT licensed — see LICENSE. Powered by LangGraph + Qwen on AMD MI300X."
+)

app/streaming.py ADDED Viewed

	@@ -0,0 +1,97 @@

+"""Streamlit + asyncio integration helper.
+Bridges Streamlit (uvloop) and LangGraph (asyncio) via a long-lived background
+event loop (see app/async_runtime.py).
+``run_async()`` and ``stream_async()`` are simple wrappers — every call uses
+the same background loop, so persistent resources (ChromaDB, AsyncSqliteSaver,
+sentence-transformers cache) are NOT rebuilt per call.
+``run_with_progress()`` produces per-event progress-bar updates from the
+``astream(stream_mode="updates")`` event stream.
+"""
+from __future__ import annotations
+from collections.abc import AsyncIterator
+from typing import Any, Callable
+from app.async_runtime import AsyncRuntime
+def run_async(coro):
+    """Sync wrapper: run a coroutine on the long-lived background loop."""
+    return AsyncRuntime.get().submit(coro)
+def stream_async(async_gen: AsyncIterator[Any]):
+    """Async generator → sync iterator (compatible with Streamlit st.write_stream)."""
+    yield from AsyncRuntime.get().submit_iter(async_gen)
+_PROGRESS_LABEL_MAP = {
+    "start_timer": "Starting",
+    "ingest_per_doc": "Loading documents",
+    "ingest_join": "Loading documents (join)",
+    "classify_per_doc": "Classifying",
+    "classify_join": "Classifying (join)",
+    "extract_per_doc": "Extracting structured data",
+    "extract_join": "Extracting (join)",
+    "quote_validator": "Quote verification",
+    "rag_index_per_doc": "Indexing",
+    "rag_join": "Indexing (join)",
+    "compare": "Cross-document checks",
+    "risk": "Risk analysis",
+    "report": "Generating report",
+    "finish_timer": "Done",
+}
+def run_with_progress(
+    graph,
+    input_state: dict,
+    on_progress: Callable[[int, int, str], None] | None = None,
+    total_steps: int | None = None,
+) -> dict:
+    """LangGraph ``astream`` → progress-bar callback + final state.
+    The background event loop drives the async generator; the ``on_progress``
+    callback runs on the CALLER thread (Streamlit main thread) after every
+    event — so ``st.progress(...)`` widgets can be updated safely.
+    Args:
+        graph: a CompiledStateGraph (or anything supporting astream).
+        input_state: the graph entry state.
+        on_progress: optional callback ``(step, total, label)``. Streamlit
+                     widget calls are safe here (caller thread).
+        total_steps: optional progress-bar denominator.
+    Returns:
+        The graph's final state (same as ``ainvoke()``).
+    """
+    async def _astream_events():
+        """Async generator: split multi-stream-mode into (stream_mode, event) pairs."""
+        async for stream_mode, event in graph.astream(
+            input_state, stream_mode=["updates", "values"]
+        ):
+            yield (stream_mode, event)
+    final_state: dict = {}
+    step = 0
+    # ``submit_iter`` turns an async iterator into a sync one on the caller thread,
+    # so the progress callback runs on the Streamlit main thread.
+    for stream_mode, event in AsyncRuntime.get().submit_iter(_astream_events()):
+        if stream_mode == "updates":
+            for node_name in (event or {}).keys():
+                step += 1
+                label = _PROGRESS_LABEL_MAP.get(node_name, node_name)
+                if on_progress is not None:
+                    total = total_steps if total_steps is not None else max(step, 12)
+                    on_progress(step, total, label)
+        elif stream_mode == "values":
+            if isinstance(event, dict):
+                final_state = event
+    return final_state

app/tabs/__init__.py ADDED Viewed

File without changes

config.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""Central configuration — Pydantic BaseSettings env-bound.
+Single source of truth: the ``settings = Settings()`` singleton. Every module
+imports this. The ``.env`` file is automatically loaded (python-dotenv) if it
+exists in the project root.
+Profiles:
+  * ``LLM_PROFILE=vllm``    — Qwen 2.5 on AMD MI300X via vLLM (OpenAI-compat). Production default.
+  * ``LLM_PROFILE=ollama``  — local Ollama (Qwen 2.5 7B Instruct). Dev / data-privacy.
+  * ``LLM_PROFILE=dummy``   — deterministic stub (CI / eval / load).
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Literal
+from pydantic import Field, computed_field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+# Project root absolute path — independent of where we are launched from
+PROJECT_ROOT = Path(__file__).resolve().parent
+class Settings(BaseSettings):
+    """Full application runtime configuration.
+    Every field reads from .env or env vars, with defaults. If .env does not
+    exist, the defaults run.
+    """
+    model_config = SettingsConfigDict(
+        env_file=PROJECT_ROOT / ".env",
+        env_file_encoding="utf-8",
+        case_sensitive=False,
+        extra="ignore",  # don't raise on unknown env vars (e.g. LANGCHAIN_*)
+    )
+    # ---------------------------------------------------------------------
+    # LLM provider selection
+    # ---------------------------------------------------------------------
+    llm_profile: Literal["vllm", "ollama", "dummy"] = "vllm"
+    """Default LLM profile. Runtime override:
+    ``graph.invoke(state, config={"configurable": {"llm_profile": "dummy"}})``."""
+    # vLLM (AMD Developer Cloud MI300X) — production default
+    vllm_base_url: str = "http://localhost:8000/v1"
+    """vLLM endpoint URL. In production: http://<mi300x-public-ip>:8000/v1"""
+    vllm_model: str = "Qwen/Qwen2.5-14B-Instruct"
+    """Model id served by vLLM. Alternatives: Qwen/Qwen2.5-32B-Instruct, Qwen/Qwen2.5-7B-Instruct."""
+    vllm_api_key: str | None = None
+    """Optional API key for vLLM. If unset, sent as 'EMPTY' (vLLM no-auth mode).
+    In production set a real key and start vLLM with --api-key <key>."""
+    vllm_temperature: float = 0.0
+    vllm_max_tokens: int = 4096
+    # Ollama — local fallback
+    ollama_base_url: str = "http://localhost:11434"
+    ollama_model: str = "qwen2.5:7b-instruct"
+    ollama_temperature: float = 0.0
+    # ---------------------------------------------------------------------
+    # Embedding model — sentence-transformers, runs locally on CPU
+    # ---------------------------------------------------------------------
+    embedding_model: str = "BAAI/bge-m3"
+    """Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual EN/HU/DE/FR/...).
+    Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only)."""
+    # ---------------------------------------------------------------------
+    # Storage
+    # ---------------------------------------------------------------------
+    chroma_path: Path = Field(default=PROJECT_ROOT / "chroma_db")
+    chroma_collection: str = "documents"
+    checkpoint_db_path: Path = Field(default=PROJECT_ROOT / "data" / "checkpoints.sqlite")
+    # ---------------------------------------------------------------------
+    # Pipeline tuning
+    # ---------------------------------------------------------------------
+    chunk_max_chars: int = 15_000
+    chunk_overlap_chars: int = 500
+    single_call_threshold: int = 30_000
+    """If doc.full_text < this many chars, a single LLM call is enough (no chunking)."""
+    # Loop guards
+    chat_max_iterations: int = 10
+    """Chat agent ↔ tools loop max iterations — infinite-loop guard."""
+    validator_max_retries: int = 2
+    """Chat validator → agent retry count when source citations are missing."""
+    dd_supervisor_max_iterations: int = 4
+    """DD supervisor max iterations before forced synthesizer fallback."""
+    # ---------------------------------------------------------------------
+    # Streamlit
+    # ---------------------------------------------------------------------
+    streamlit_port: int = 8501
+    # ---------------------------------------------------------------------
+    # LangSmith observability (optional)
+    # ---------------------------------------------------------------------
+    langchain_tracing_v2: bool = False
+    langchain_api_key: str | None = None
+    langchain_project: str = "document-intelligence-amd"
+    # ---------------------------------------------------------------------
+    # Computed fields
+    # ---------------------------------------------------------------------
+    @computed_field
+    @property
+    def project_root(self) -> Path:
+        return PROJECT_ROOT
+    @computed_field
+    @property
+    def langsmith_enabled(self) -> bool:
+        return self.langchain_tracing_v2 and bool(self.langchain_api_key)
+    @computed_field
+    @property
+    def is_dummy(self) -> bool:
+        return self.llm_profile == "dummy"
+# Singleton — every module imports this
+settings = Settings()

data/sanctions_snapshot.json ADDED Viewed

	@@ -0,0 +1,114 @@

+{
+  "metadata": {
+    "source": "EU Consolidated Sanctions List + OFAC SDN (snapshot)",
+    "date": "2026-04-10",
+    "note": "Statikus demo lista -- nem elo API. Frissitendo periodikusan."
+  },
+  "entities": [
+    {
+      "name": "Gazprom",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "Rosneft",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "Sberbank",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "VTB Bank",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "Rostec",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "Almaz-Antey",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "Kalashnikov Concern",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "Russian Direct Investment Fund",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "Novatek",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "Sovcomflot",
+      "country": "RU",
+      "type": "entity"
+    },
+    {
+      "name": "Belaruskali",
+      "country": "BY",
+      "type": "entity"
+    },
+    {
+      "name": "Belneftekhim",
+      "country": "BY",
+      "type": "entity"
+    },
+    {
+      "name": "National Iranian Oil Company",
+      "country": "IR",
+      "type": "entity"
+    },
+    {
+      "name": "Bank Melli Iran",
+      "country": "IR",
+      "type": "entity"
+    },
+    {
+      "name": "Bank Saderat Iran",
+      "country": "IR",
+      "type": "entity"
+    },
+    {
+      "name": "Korea Mining Development Trading Corporation",
+      "country": "KP",
+      "type": "entity"
+    },
+    {
+      "name": "Commercial Bank of Syria",
+      "country": "SY",
+      "type": "entity"
+    },
+    {
+      "name": "Volga Industrial Holdings",
+      "country": "RU",
+      "type": "entity"
+    }
+  ],
+  "high_risk_countries": [
+    "RU",
+    "BY",
+    "IR",
+    "KP",
+    "SY",
+    "CU",
+    "VE",
+    "PA",
+    "VG",
+    "KY",
+    "BZ",
+    "SC",
+    "VU"
+  ]
+}

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,52 @@

+services:
+  # ---------------------------------------------------------------------------
+  # Streamlit + LangGraph backend
+  # ---------------------------------------------------------------------------
+  langgraph-app:
+    build: .
+    image: paperhawk:latest
+    container_name: document-intelligence-amd
+    ports:
+      - "8501:8501"
+    env_file:
+      - .env
+    environment:
+      # Default vLLM — overridable via .env or shell export
+      - LLM_PROFILE=${LLM_PROFILE:-vllm}
+      - VLLM_BASE_URL=${VLLM_BASE_URL:-http://localhost:8000/v1}
+      - VLLM_MODEL=${VLLM_MODEL:-Qwen/Qwen2.5-14B-Instruct}
+      - OLLAMA_BASE_URL=http://ollama:11434
+    volumes:
+      # AsyncSqliteSaver checkpointer persists across restarts
+      - ./data:/app/data
+      # ChromaDB persistent vector store
+      - ./chroma_db:/app/chroma_db
+    depends_on:
+      ollama:
+        condition: service_healthy
+        required: false
+    restart: unless-stopped
+  # ---------------------------------------------------------------------------
+  # Ollama LLM server (OPTIONAL profile — local dev fallback)
+  # ---------------------------------------------------------------------------
+  # Start:  docker compose --profile ollama up -d
+  # Model:  docker compose exec ollama ollama pull qwen2.5:7b-instruct
+  ollama:
+    image: ollama/ollama:latest
+    container_name: document-intelligence-amd-ollama
+    profiles: ["ollama"]
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama_models:/root/.ollama
+    healthcheck:
+      test: ["CMD", "ollama", "list"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+      start_period: 30s
+    restart: unless-stopped
+volumes:
+  ollama_models:

docs/HF_SPACE_DEFAULT_GETTING_STARTED.md ADDED Viewed

	@@ -0,0 +1,193 @@

+# HF Space Default Getting Started — Snapshot 2026-05-05
+A `lablab-ai-amd-developer-hackathon/paperhawk` Space létrehozása után a HF Spaces egy default "Get Started" útmutatót mutat. Ezt mentjük el itt referenciaként, mert a default Dockerfile-mintája hasznos referencia a paperhawk Dockerfile átírásához (port 8501 → 7860, user-setup pattern).
+**Forrás**: a Space oldal alján, a default-README után jelent meg.
+**URL**: https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk
+**Kontextus**: a Space frissen létrehozva, Docker SDK + Blank template + `Real-DI-Audit/14 rules/6 anti-halluc/LangGraph/Qwen/MI300X` short description.
+---
+## Get started with your Docker Space!
+Your space has been created, follow these steps to get started (or read the full [documentation](https://huggingface.co/docs/hub/spaces-sdks-docker))
+### Start by cloning this repo by using:
+**HTTPS:**
+```bash
+git clone https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk
+```
+**SSH:**
+```bash
+git clone git@hf.co:spaces/lablab-ai-amd-developer-hackathon/paperhawk
+```
+### Make sure you're CLI v2.x.x or above:
+```bash
+curl -LsSf https://hf.co/cli/install.sh | sh
+```
+### Download the Space:
+```bash
+hf download lablab-ai-amd-developer-hackathon/paperhawk --repo-type=space
+```
+---
+## Let's create a simple Python app using FastAPI
+### `requirements.txt`
+```
+fastapi
+uvicorn[standard]
+```
+> **Hint:** You can also create the requirements file directly in your browser.
+### `app.py`
+```python
+from fastapi import FastAPI
+app = FastAPI()
+@app.get("/")
+def greet_json():
+    return {"Hello": "World!"}
+```
+> **Hint:** You can also create the app file directly in your browser.
+---
+## Create your Dockerfile
+```dockerfile
+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
+```
+> **Hint:** Alternatively, you can create the Dockerfile file directly in your browser.
+---
+## Then commit and push
+```bash
+git add requirements.txt app.py Dockerfile
+git commit -m "Add application file"
+git push
+```
+> Finally, your Space should be running on this page after a few moments!
+---
+## App port
+> Your Docker Space needs to listen on port `7860`.
+## Personalize your Space
+Make your Space stand out by customizing its emoji, colors, and description by **editing metadata** in its `README.md` file.
+## Documentation
+Read the full documentation for Docker Spaces [here](https://huggingface.co/docs/hub/spaces-sdks-docker).
+---
+## Mit jelent ez nekünk (paperhawk-specifikus megjegyzések)
+### A default Dockerfile vs a paperhawk Dockerfile
+A paperhawk meglévő Dockerfile-ja **fejlettebb** mint a default-példa:
+| Aspektus | HF default | Paperhawk |
+|---|---|---|
+| Python version | `python:3.9` | `python:3.12-slim` (modernebb) |
+| User setup | `useradd -m -u 1000 user` + `USER user` (non-root, security best-practice) | NINCS (root user) |
+| OS-deps | nincs | `tesseract-ocr` + `poppler-utils` + `libmupdf-dev` (PDF + OCR) |
+| Pre-download | nincs | `BAAI/bge-m3` 2.27 GB (build-time) |
+| App | `uvicorn` FastAPI | `streamlit` |
+| Port | **`7860`** | **`8501`** → **átírva 7860-ra a HF Space-nek** (2026-05-05) |
+### A 2 fő átírás amit a paperhawk Dockerfile-on csinálni kellett
+1. **Port-átállítás 8501 → 7860** (kész, 2026-05-05):
+   - `EXPOSE 8501` → `EXPOSE 7860`
+   - `--server.port=8501` → `--server.port=7860`
+   - `HEALTHCHECK ... http://localhost:8501/_stcore/health` → `http://localhost:7860/_stcore/health`
+2. **(opcionális) User-setup hozzáadása** security best-practice szempontból:
+   - `RUN useradd -m -u 1000 user`
+   - `USER user`
+   - `ENV PATH="/home/user/.local/bin:$PATH"`
+   - `COPY --chown=user ...`
+   - **A HF Spaces NEM követeli kötelező módon**, és a paperhawk-stack root-ként is jól fut.
+### A README.md front-matter
+A HF Spaces megköveteli a `README.md` tetején egy YAML front-matter-t. A paperhawk `README.md` tetejére beillesztve (2026-05-05):
+```yaml
+---
+title: PaperHawk
+emoji: 🦅
+colorFrom: red
+colorTo: orange
+sdk: docker
+pinned: false
+license: mit
+short_description: Real-DI-Audit/14 rules/6 anti-halluc/LangGraph/Qwen/MI300X
+---
+```
+A meglévő paperhawk `README.md`-tartalom (project README) ezután következik. A front-matter csak a HF Space-nek szól, GitHub-on is renderelhető (a YAML-t code-block-ként mutatja).
+### A clone + push workflow a paperhawk-on
+A meglévő paperhawk GitHub-repón (`nandorfivince/paperhawk`) hozzáadunk egy új remote-ot:
+```bash
+cd ~/development/<host-paperhawk-path>
+git remote add space https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk
+git push space main
+```
+A push első futáskor authenticálni kér — a HF Hub-token-t kéri, amit a Vincsipe accountból lehet generálni a https://huggingface.co/settings/tokens-en (új Token, "Write" scope).
+### App port környezeti változó
+A HF Spaces a `7860`-as portot várja default. A paperhawk `streamlit` parancs ki van egészítve a `--server.port=7860` flag-gel a `Dockerfile`-ben (2026-05-05).
+### HF Spaces hardware
+CPU Basic = free tier, 16 GB RAM, 2 vCPU. Bőven elég a paperhawk-Streamlit-jéhez (~3-5 GB RAM-fogyasztás bge-m3 + ChromaDB + Streamlit). A vLLM az AMD MI300X-en fut **külön**, a Space `VLLM_BASE_URL` Secret-en keresztül hivatkozik rá.
+### Sleep mode
+A free Space 48 órás inaktivitás után alvó-módba kerül. Az első request a felébredés után 30-60 sec. A bíráskodás alatt érdemes **periodikusan** pingelni a Space-t (pl. UptimeRobot 30 perces intervallum), vagy a Build-in-Public posztokon megosztani hogy organic-traffic-al ébren tartsuk.

docs/SUBMISSION.md ADDED Viewed

	@@ -0,0 +1,170 @@

+# PaperHawk — Hackathon Submission Brief
+> One-pager for the **AMD Developer Hackathon × lablab.ai** (May 2026) submission form.
+> Every section below is ready to paste directly into the lablab.ai project page.
+---
+## Project Title
+**PaperHawk**
+---
+## Short Description
+> Multi-agent document intelligence that catches what RAG misses. 14 deterministic domain checks, 5+1 anti-hallucination layers, and a 5-tool agentic chat — running Qwen 2.5 on AMD Instinct MI300X via vLLM. Open source, MIT licensed.
+*(247 characters)*
+---
+## Long Description
+### The Problem
+RAG retrieves passages. Audit finds inconsistencies. Today's RAG chatbots can't do the second.
+When someone opens a folder of 25 invoices, three contracts, two purchase orders, and a financial report, they don't ask a chatbot to summarize the contract. They ask: *"Does the supplier in Invoice #7 match the vendor in PO #3? Is the VAT rate consistent across the package? Is there a hidden change-of-control clause? Is the math on the gross total correct? Are any of these counterparties on the EU/OFAC sanctions list?"*
+These are not retrieval questions. They are **reasoning, validation, and cross-reference** questions over multiple typed documents. A standard chunk-embed-retrieve-generate pipeline cannot answer them, because the question is not contained in any single chunk. It lives in the relationship between documents.
+PaperHawk is built specifically for this gap.
+### What We Built
+PaperHawk is a LangGraph 0.6-native system with **4 compiled graphs** (pipeline, chat, DD assistant, package insights) wired together with **Send-API parallelism**, an `AsyncSqliteSaver` checkpointer, and a `configurable_alternatives` provider that swaps cleanly between vLLM (production), Ollama (local dev), and a deterministic dummy (CI). It is not a single-agent retrieval pipeline.
+Concretely:
+- **6 reusable subgraphs** for ingest, classification, extraction, risk dispatch, LLM risk ensemble, and chat tool routing
+- **14 deterministic domain checks** wired into a registry — ISA 240/500/320 (audit standards), GDPR Article 28, Incoterms 2020, AML sanctions, tax-ID validation, contract completeness, materiality thresholds, and more. Every check is a Python `Protocol` implementation, not an LLM prompt.
+- **5+1 anti-hallucination layers**: `temperature=0`, a `_quotes` field for verbatim source citation, `_confidence` per extracted field, plausibility validators, a 3-layer LLM-risk filter chain, and a quote validator that drops any LLM output whose claimed source quote isn't found in the document.
+- **5-tool agentic chat** (`list_documents`, `get_extraction`, `search_documents`, `compare_documents`, `validate_document`) with strict `[Source: filename.pdf]` citations validated by a post-processor — answers without provenance never reach the user.
+- **Multi-agent DD assistant**: 4 specialist agents (audit / legal / compliance / financial) coordinated by a supervisor and a synthesizer, in the spirit of the LangGraph supervisor cookbook but production-shaped.
+- **Streamlit 5-tab UI**: Upload, Results, Chat, DD Assistant, Report — drivable in 30 seconds with three pre-bundled demo packages.
+The codebase ships with **61 tests passing in CI** without any LLM (the deterministic dummy provider), is MIT licensed, and is English-first with a multilingual fallback path for EN/HU/DE inputs.
+### Why AMD Instinct MI300X
+The MI300X gives us **192 GB of HBM3 memory** in a single accelerator — enough headroom to host Qwen 2.5 14B Instruct in BF16 with comfortable KV-cache space for our long agentic conversations. The DD supervisor plus four specialists in one session easily exceeds 32k tokens of context, and the MI300X handles it without paging.
+vLLM's continuous batching on ROCm lets the Streamlit UI fire concurrent requests during a multi-document upload without queueing artifacts. The FP8 / BF16 paths supported by the MI300X memory bandwidth open a clean upgrade route to Qwen 2.5 32B for finals night.
+We're using the AMD Developer Cloud — `infra/vllm/Dockerfile` and `infra/vllm/serve.sh` are committed in the repo and start vLLM with `--api-key`, `--max-model-len 32768`, and a configurable model tag. The whole inference stack is containerized; nothing is hand-rolled on the GPU node.
+### Why Qwen 2.5 Instruct
+Three reasons.
+First, **strong tool calling**. Qwen 2.5 14B handles our 5-tool chat router reliably; tool-routing accuracy in our integration tests is on par with the proprietary reference model we used in early development. The tool-call JSON is well-formed, parameters are typed correctly, and unnecessary tool calls are rare.
+Second, **structured output that holds**. `with_structured_output` returns valid Pydantic v2 JSON every time in our extraction subgraph, including the nested `_quotes` and `_confidence` fields. This is where many smaller open-source models fail under load — Qwen 2.5 doesn't.
+Third, **multilingual fluency**. Our pipeline often reads Hungarian, German, and English documents in the same package, and Qwen handles cross-lingual extraction without dropping accuracy. We don't fine-tune; we pull `Qwen/Qwen2.5-14B-Instruct` from Hugging Face directly into the vLLM container — clean, reproducible, and rerunnable by anyone.
+### The Pipeline (5-Step End-to-End)
+1. **Ingest** — PDF, DOCX, and image inputs go through three loaders. Scanned PDFs hit a vision-first fallback (the LLM reads the rendered page directly); native PDFs use PyMuPDF + pdfplumber for table-aware extraction; DOCX is parsed natively.
+2. **Classify** — A 6-way doc-type classifier (`invoice`, `delivery_note`, `purchase_order`, `contract`, `financial_report`, `other`) with structured output, calibrated for ISA 500 evidence-quality scoring.
+3. **Extract** — Per doc-type Pydantic schema, with a universal extraction subgraph as a fallback for unknown types. Every extracted field carries its own `_quotes` and `_confidence` — anti-hallucination is built into the type system, not a post-hoc check.
+4. **Cross-reference** — Three-way matching (invoice + delivery note + purchase order) for audit packages; multi-agent synthesis for DD packages; package-level analyzers for duplicate-invoice detection (ISA 240) and pricing anomalies.
+5. **Risk + Report** — Plausibility checks + 14 domain checks (deterministic, parallel via Send fan-out) + LLM risk ensemble + 3-layer filter that drops repeats, business-normal flags, and unsupported claims. Final output: a ranked risk list with severity, regulation source, and source citations; a downloadable DOCX report; structured JSON for API consumers.
+### Anti-Hallucination Is Non-Negotiable
+The system is designed so the LLM cannot lie about a document and have the lie pass through.
+Every LLM-generated extraction includes a `_quotes` array with the verbatim text the model cites as source. A post-processor scans each quote against the document body. If the quote isn't there, the field is rejected — period. The 3-layer LLM-risk filter rejects any risk claim whose quoted evidence isn't in the package, repeats a finding from the deterministic domain checks, or describes a normal business condition.
+This isn't a guardrail layer slapped on top — it's the trust contract between the model and the user, and it runs on every output. The `validation/` package is one of the most-edited folders in the repo precisely because we treat it as a first-class concern, not an afterthought.
+### Demo Packages
+Three pre-built scenarios are bundled in `test_data/demo_packages/`. Each is a one-click demo from the Upload tab:
+- **Audit Demo** — Three invoices from the same supplier; the March one is 50% pricier than January and February. The package-level analyzer flags it as an over-billing pattern, and the chat answers *"Why is the March invoice more expensive?"* with cited line items.
+- **DD Demo** — An NDA, a service agreement, and an amendment in an acquisition scenario. The DD assistant flags a hidden change-of-control trigger and an automatic-renewal red flag, and the synthesizer writes an executive summary in three paragraphs.
+- **Compliance Demo** — Two contracts; one is missing GDPR Article 28 sub-processor language. Domain check #8 detects it, and the report includes the exact regulatory citation.
+End-to-end demo time on AMD MI300X: **30–90 seconds** per package.
+### Track 1 + Build in Public + Hugging Face Special Prize
+**Track 1 — AI Agents & Agentic Workflows** is our primary submission. The track brief asks for projects that "move beyond simple RAG to build sophisticated AI agentic systems and workloads." PaperHawk fits the brief: 4 compiled graphs, 6 subgraphs, multi-agent DD orchestration, 5-tool agentic chat, and a registry-based deterministic check fabric. None of this is retrieval-only. The chat *is* an agent; the DD assistant is a multi-agent system; the pipeline is a typed-state orchestration.
+**Ship It + Build in Public** is a natural cross-track fit. The repo is MIT licensed and public on GitHub. We're publishing a technical walkthrough and at least two updates on X / LinkedIn — tagging `@AIatAMD` and `@lablab` — covering two design choices that don't usually appear in hackathon RAG demos: the LangGraph Send-API parallelism for the deterministic check fan-out, and the post-hoc citation validator for the chat tool outputs.
+**Hugging Face Special Prize**: deployed as a Streamlit Space under the `lablab-ai-amd-developer-hackathon` organization. Public, runnable in the browser, no signup required. The Space carries the same `paperhawk.jpeg` cover and points at our vLLM endpoint; visitors can drive the three demo packages from the front page.
+One codebase, one MIT license, three prize pools.
+### Tech Stack
+| Layer | Choice |
+|---|---|
+| **Orchestration** | LangGraph 0.6 (4 compiled graphs, 6 subgraphs, AsyncSqliteSaver) |
+| **LLM** | Qwen 2.5 14B Instruct on vLLM (AMD Instinct MI300X, ROCm) |
+| **Embedding** | BAAI/bge-m3 (multilingual, 1024-dim, sentence-transformers) |
+| **Retrieval** | ChromaDB + BM25 hybrid with Reciprocal Rank Fusion |
+| **Schemas** | Pydantic v2 with field aliases for the `_quotes` JSON contract |
+| **UI** | Streamlit 5-tab + async runtime + long-lived background event loop |
+| **Deploy** | Hugging Face Spaces (Streamlit SDK) + AMD Developer Cloud (vLLM container) |
+| **Testing** | pytest 8 (61 PASS in CI without any LLM), Playwright UI smoke tests |
+| **License** | MIT |
+### Built By
+**Team CsimpiCsirkek**:
+- **Vince Nándorfi** — Lead, LangGraph architecture, AMD adaptation
+- **Tamás Vitai**
+- **Gábor Murcsik**
+---
+## Technology & Category Tags
+`agentic-ai` · `multi-agent` · `langgraph` · `qwen` · `amd-mi300x` · `vllm` · `rocm` · `huggingface-spaces` · `document-intelligence` · `streamlit` · `python` · `mit-license`
+---
+## Tracks Targeted
+| Track / Prize | Status | Rationale |
+|---|---|---|
+| **Track 1 — AI Agents & Agentic Workflows** | Primary submission | Multi-agent system, 4 compiled graphs, 6 subgraphs, 5-tool agentic chat — well past the "simple RAG" line |
+| **Ship It + Build in Public** | Cross-track | MIT-licensed public GitHub repo + technical walkthrough + ≥2 social posts tagging `@AIatAMD` and `@lablab` |
+| **Hugging Face Special Prize** | Special category | Streamlit Space published under the `lablab-ai-amd-developer-hackathon` HF organization |
+---
+## Submission Checklist
+| Item | Status | Notes |
+|---|---|---|
+| Project Title | DONE | `PaperHawk` |
+| Short Description | DONE | 247 characters, A+C blend |
+| Long Description | DONE | 10 sections, builder-energy tone |
+| Cover Image | DONE | `paperhawk.jpeg` (2048 × 819 px) |
+| Technology & Category Tags | DONE | 12 tags |
+| Public GitHub Repository | DONE | `github.com/nandorfivince/paperhawk` |
+| Video Presentation | TODO | Demo walkthrough video |
+| Slide Presentation | TODO | 5–8 slide deck |
+| Demo Application URL | TODO | HF Space public URL |
+| HF Space URL | TODO | Under `lablab-ai-amd-developer-hackathon` org |
+---
+## Submission URLs (filled at submission time)
+- **GitHub repo**: https://github.com/nandorfivince/paperhawk
+- **Hugging Face Space**: *(to be added)*
+- **Demo video**: *(to be added)*
+- **Slide deck**: *(to be added)*
+- **Live application URL**: *(same as HF Space URL)*
+---
+*This document is the canonical submission brief. Paste sections directly into the lablab.ai project page when filing the submission.*

docs/hf-space-deployment.md ADDED Viewed

	@@ -0,0 +1,124 @@

+# Hugging Face Space deployment
+The Streamlit app deploys to a **Hugging Face Space** under the
+`lablab-ai-amd-developer-hackathon` organization. This is **mandatory** for
+the Hugging Face Special Prize and convenient as the public demo URL.
+## 1. Prerequisites
+- Hugging Face account
+- Membership in the **AMD Developer Hackathon** HF organization
+  ([join here](https://huggingface.co/login?next=%2Forganizations%2Flablab-ai-amd-developer-hackathon%2Fshare%2FELARrxoRIHvseSHRhANJYFEZQazsQIYhJf))
+- A running vLLM endpoint on the AMD MI300X (see `qwen-vllm-deployment.md`)
+## 2. Create the Space
+1. Hugging Face → Spaces → New Space
+2. Owner: `lablab-ai-amd-developer-hackathon`
+3. Space name: `paperhawk`
+4. License: MIT
+5. SDK: **Streamlit**
+6. Hardware: **CPU basic** (free) — vLLM runs on MI300X, the Space only hosts the UI
+## 3. Push the code
+```bash
+git remote add space https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk
+git push space main
+```
+The Space auto-builds from the repo using `requirements.txt` and runs
+`app.py` (or, in our layout, configures Streamlit to start `app/main.py`).
+## 4. Set Space env vars
+In the Space → Settings → Variables and secrets, add:
+```
+LLM_PROFILE=vllm
+VLLM_BASE_URL=http://<mi300x-public-ip>:8000/v1
+VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct
+VLLM_API_KEY=<the api key you set on the vLLM server>
+EMBEDDING_MODEL=BAAI/bge-m3
+```
+Mark `VLLM_API_KEY` as a **secret** (not a regular variable).
+## 5. Space front-matter
+Edit the `README.md` to start with the HF Spaces front-matter:
+```yaml
+---
+title: Document Intelligence (AMD Edition)
+emoji: 🔍
+colorFrom: red
+colorTo: yellow
+sdk: streamlit
+sdk_version: 1.40.0
+app_file: app/main.py
+pinned: false
+license: mit
+short_description: Multi-document due diligence with LangGraph + Qwen on AMD MI300X
+tags:
+  - langgraph
+  - agentic
+  - rag
+  - qwen
+  - amd
+  - document-intelligence
+---
+```
+(The current README.md is the project README; this front-matter goes on top
+when the repo is mirrored to the HF Space.)
+## 6. Verify the Space
+After the build finishes (~3-5 minutes):
+1. Open `https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk`
+2. Click the **Audit Demo** button → it should run end-to-end and produce
+   risks + a report.
+3. Open the **Chat** tab → ask a question → the answer should include
+   `[Source: filename.pdf]` citations.
+## 7. Resource tier
+The free CPU basic tier (16 GB RAM, 2 vCPU) handles:
+- BGE-m3 embedding (~2.3 GB on first load)
+- ChromaDB (small index)
+- Streamlit UI
+The vLLM model runs on the MI300X, **not** here. The Space just renders the
+UI and proxies requests to the vLLM endpoint.
+If the free tier is too tight on memory, upgrade to **CPU upgrade** ($0.03/h).
+## 8. Sleep mode mitigation
+A free Space sleeps after 48 hours of inactivity. The first request after
+sleep takes ~30-60 seconds to wake. Mitigations:
+- Share the Space link in your Build-in-Public posts → continuous traffic →
+  less likely to sleep.
+- Set up a 30-minute external ping (e.g. UptimeRobot) the day before
+  judging.
+## 9. The HF Special Prize is like-driven
+Once the Space is live:
+1. Share the URL on X / LinkedIn (tag `@lablab` and `@AIatAMD`).
+2. Ask your followers to like the Space.
+3. The Space with the most likes at the end of the hackathon wins:
+   - 1st: Reachy Mini Wireless robot + 6 months HF PRO + $500 HF credit
+   - 2nd: 3 months HF PRO + $300 credit
+   - 3rd: 2 months HF PRO + $200 credit
+## 10. Submission to lablab
+When submitting on lablab.ai, paste the Space URL into the **Application
+URL** and **Hugging Face Space link** fields. This is mandatory for the HF
+prize qualification.

docs/qwen-vllm-deployment.md ADDED Viewed

	@@ -0,0 +1,68 @@

+# Qwen on AMD MI300X — vLLM deployment
+This guide covers the production deployment path: running Qwen 2.5 Instruct
+(14B or 32B) via [vLLM](https://github.com/vllm-project/vllm) on an
+**AMD Instinct MI300X** through the AMD Developer Cloud, with the Streamlit
+app calling the vLLM endpoint over the OpenAI-compatible REST API.
+For the canonical step-by-step (including the docker run command and a
+benchmark table), see [`infra/vllm/README.md`](../infra/vllm/README.md).
+## Why this stack?
+- **Open source LLM** — Qwen 2.5 is Apache-2 licensed; safe for the MIT
+  open-source license here, and a partner-prize bonus on the hackathon.
+- **Multilingual** — Qwen 2.5 handles HU/DE/EN well, which matters for our
+  multilingual demo data.
+- **AMD-native** — vLLM has a ROCm build (`rocm/vllm:latest`) optimized for
+  the MI300X. No CUDA, no NVIDIA dependency.
+- **OpenAI-compatible API** — `langchain-openai`'s `ChatOpenAI` adapter
+  works out of the box with a custom `base_url`. Tool-calling, structured
+  output, and streaming all behave the same as the public OpenAI endpoint.
+- **No vendor lock-in** — the same code runs against Ollama (locally) and
+  against any OpenAI-compatible inference server.
+## Cost monitoring
+AMD Developer Cloud pricing (May 2026 ballpark):
+- ~$4-8/hour pay-as-you-go for an MI300X instance.
+- Each team member gets `$100` in cloud credits → 60 hours of MI300X uptime
+  at $5/h. With 3 team members, ~180 hours total.
+**Discipline:**
+1. Only run during demo / test / build sessions; **stop the instance when
+   idle**.
+2. Keep one teammate's credit untouched as a final-day buffer.
+3. Run end-to-end smoke tests early — a hot fix on deadline day burns hours
+   you can't get back.
+## Plan B: Ollama fallback
+If the AMD credit doesn't arrive in time, or the MI300X has a network issue
+on demo day:
+```bash
+LLM_PROFILE=ollama OLLAMA_MODEL=qwen2.5:7b-instruct streamlit run app/main.py
+```
+Pull the model first:
+```bash
+ollama pull qwen2.5:7b-instruct
+```
+Quality drops (7B vs 14B/32B), but the demo flow stays alive on a laptop
+GPU or even CPU.
+## Production hardening (post-hackathon)
+For an actual production deployment beyond the hackathon scope:
+- TLS termination (Caddy / Nginx in front of vLLM)
+- API-key rotation (`--api-key` flag with a periodic rotation script)
+- Prometheus + Grafana on vLLM `/metrics`
+- `--quantization fp8` to fit a larger model on smaller hardware
+- `--enable-prefix-caching` for repeated long system prompts
+- Multi-GPU / multi-region scaling via SkyPilot or vLLM Production Stack

docs/slides/01_cover.png ADDED Viewed

Git LFS Details

SHA256: 9a7cc84b3ee3d544e006e461bc135a0708e44a57e789400ac4f3ffa9a788c8c3
Pointer size: 131 Bytes
Size of remote file: 179 kB

docs/slides/PaperHawk_Slides.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:302aa5982d5cace9bd4e154d97d5feabe3ded8c42fffdaa61857d8aaec89d492
+size 1328878

docs/slides/PaperHawk_Slides.pptx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba00be30282e781f49d84117bcebbaa584a02ef1725eee714944ff0468e09dc1
+size 771365

docs/slides/README.md ADDED Viewed

	@@ -0,0 +1,104 @@

+# PaperHawk — Slide Deck
+The 10-slide deck for the AMD Developer Hackathon × lablab.ai submission.
+- **Source**: `slides.html` (single self-contained HTML, ~1100 lines, no JS, no external assets except the repo's `paperhawk.jpeg`)
+- **Format**: 16:9 landscape (1280 × 720 px per slide)
+- **Palette**: AMD red `#ED1C24` + AMD orange `#FB6624` + PaperHawk black `#1A1A1A` + Qwen purple `#7C3AED` accent
+- **Typography**: Inter (Google Fonts), JetBrains Mono for code/labels
+- **License**: MIT (same as the repo)
+## Render to PDF (Playwright)
+```bash
+# One-time setup
+pip install playwright
+playwright install chromium
+# Render slides.html → PaperHawk_Slides.pdf
+python - <<'PY'
+import asyncio
+from pathlib import Path
+from playwright.async_api import async_playwright
+async def main():
+    src = Path("docs/slides/slides.html").resolve().as_uri()
+    out = Path("docs/slides/PaperHawk_Slides.pdf")
+    async with async_playwright() as p:
+        browser = await p.chromium.launch()
+        page = await browser.new_page(viewport={"width": 1280, "height": 720})
+        await page.goto(src, wait_until="networkidle")
+        await page.pdf(
+            path=str(out),
+            width="1280px",
+            height="720px",
+            print_background=True,
+            margin={"top": "0", "right": "0", "bottom": "0", "left": "0"},
+        )
+        await browser.close()
+asyncio.run(main())
+print("Wrote", "docs/slides/PaperHawk_Slides.pdf")
+PY
+```
+## Render the cover slide as PNG (HF Space hero)
+```bash
+python - <<'PY'
+import asyncio
+from pathlib import Path
+from playwright.async_api import async_playwright
+async def main():
+    src = Path("docs/slides/slides.html").resolve().as_uri()
+    out = Path("docs/slides/01_cover.png")
+    async with async_playwright() as p:
+        browser = await p.chromium.launch()
+        page = await browser.new_page(viewport={"width": 1280, "height": 720})
+        await page.goto(src, wait_until="networkidle")
+        # Screenshot the first .slide element only.
+        cover = page.locator(".slide").first
+        await cover.screenshot(path=str(out), omit_background=False)
+        await browser.close()
+asyncio.run(main())
+print("Wrote", "docs/slides/01_cover.png")
+PY
+```
+## Preview locally
+```bash
+# Open in your browser (renders identical to the PDF):
+xdg-open docs/slides/slides.html
+```
+## Iteration workflow
+1. Edit `slides.html` (CSS at the top, slides as `<section class="slide">` blocks)
+2. Reload the browser tab to preview
+3. When happy, re-run the Playwright PDF script
+4. Commit both `slides.html` and the generated PDF
+## Slide map
+| # | Title | Visual |
+|---|---|---|
+| 1 | Cover | `paperhawk.jpeg` hero + team + tagline |
+| 2 | The Problem | RAG-vs-audit split contrast |
+| 3 | What We Built | 5 big-number stat cards |
+| 4 | The Pipeline | 5-step ribbon (red→orange gradient) |
+| 5 | The 14 Domain Checks | 3-tier table (audit / compliance / standards) |
+| 6 | Anti-Halluc + DD | 5+1 layer stack | DD supervisor pattern |
+| 7 | The Stack | Vertical stack-row layout (AMD + Qwen highlighted) |
+| 8 | Demo Packages | 3 demo cards + timing banner |
+| 9 | Built for Builders | 3 builders cards + repo/HF/MIT meta |
+| 10 | Team + Closing | 3 team cards + closing tagline |
+## Notes
+- All copy is English, builder-energy tone, no PwC/Hungarian narrative residue
+- The `paperhawk.jpeg` reference is `../../paperhawk.jpeg` (relative to `docs/slides/`)
+- The gradient strip on every slide top is `linear-gradient(90deg, AMD-red → AMD-orange → Qwen-purple)` — a visual signature
+- "Team CsimpiCsirkek" appears in the cover meta + final footer; "Built to ship" closing tagline carries the winner-team subtext without being on-the-nose

docs/slides/png/slide_01.png ADDED Viewed

Git LFS Details

SHA256: 9a7cc84b3ee3d544e006e461bc135a0708e44a57e789400ac4f3ffa9a788c8c3
Pointer size: 131 Bytes
Size of remote file: 179 kB

docs/slides/png/slide_02.png ADDED Viewed

Git LFS Details

SHA256: 2570275891e9075c6ae1d5dd748be65b97e12e6bea4349574b14f922e1c22c84
Pointer size: 130 Bytes
Size of remote file: 62.9 kB

docs/slides/png/slide_03.png ADDED Viewed

Git LFS Details

SHA256: 1a20ba1a513657703561384b15f8718f2461179756441202fd91e373b49cf30e
Pointer size: 130 Bytes
Size of remote file: 58.3 kB

docs/slides/png/slide_04.png ADDED Viewed

Git LFS Details

SHA256: feabfef2a7c353d12a4994a853cd11c96a0a1c8fc2f36e1e691b2f7207ab935f
Pointer size: 130 Bytes
Size of remote file: 66 kB

docs/slides/png/slide_05.png ADDED Viewed

Git LFS Details

SHA256: dfe874a6755c065f5efbae5db7a2b14d3eb245272fa877beafd2e6bfa82d4d4f
Pointer size: 130 Bytes
Size of remote file: 81.9 kB

docs/slides/png/slide_06.png ADDED Viewed

Git LFS Details

SHA256: 67de912da1ae5df5544a34954e15d3361a10ad1b7db16d500b1127976cfded9a
Pointer size: 130 Bytes
Size of remote file: 68.5 kB

docs/slides/png/slide_07.png ADDED Viewed

Git LFS Details

SHA256: ab86eba5eddca00bb85712f21151c855fcc1a8cb2ed0448f08129279b074211d
Pointer size: 130 Bytes
Size of remote file: 72.2 kB

docs/slides/png/slide_08.png ADDED Viewed

Git LFS Details

SHA256: 26f0474dd0e0dec91640b71c38f384018b55843ae6852de530c3787bb6076add
Pointer size: 130 Bytes
Size of remote file: 61.7 kB

docs/slides/png/slide_09.png ADDED Viewed

Git LFS Details

SHA256: 9508c9f923635f732e80738bc25e42658aeacf571709b960394fe77ee70132cc
Pointer size: 130 Bytes
Size of remote file: 61.8 kB

docs/slides/png/slide_10.png ADDED Viewed

Git LFS Details

SHA256: 70f8ec06f30fb4cbf070bc615690c7677bf29f2eabdc3384eb9eae8dd1efe6fb
Pointer size: 130 Bytes
Size of remote file: 69.6 kB

docs/slides/slides.html ADDED Viewed

	@@ -0,0 +1,897 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>PaperHawk — AMD Developer Hackathon Slide Deck</title>
+  <link rel="preconnect" href="https://fonts.googleapis.com">
+  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
+  <style>
+    :root {
+      --amd-red: #ED1C24;
+      --amd-orange: #FB6624;
+      --paperhawk-black: #1A1A1A;
+      --paperhawk-gold: #D4A857;
+      --qwen-purple: #7C3AED;
+      --bg-light: #FFFFFF;
+      --bg-cream: #FFF6E5;
+      --bg-purple-light: #F5EFFF;
+      --text-dark: #1A1A1A;
+      --text-muted: #6B6B6B;
+      --text-light: #FFFFFF;
+      --border-soft: rgba(0, 0, 0, 0.08);
+    }
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    html, body {
+      font-family: 'Inter', -apple-system, sans-serif;
+      color: var(--text-dark);
+      background: #E5E5E5;
+      line-height: 1.5;
+      -webkit-font-smoothing: antialiased;
+    }
+    .slide {
+      width: 1280px;
+      height: 720px;
+      padding: 64px 80px 64px 80px;
+      background: white;
+      position: relative;
+      overflow: hidden;
+      page-break-after: always;
+      margin: 30px auto;
+      box-shadow: 0 4px 30px rgba(0,0,0,0.12);
+    }
+    /* Top gradient bar — AMD red → orange → Qwen purple */
+    .slide::before {
+      content: "";
+      position: absolute;
+      top: 0; left: 0; right: 0;
+      height: 6px;
+      background: linear-gradient(90deg, var(--amd-red) 0%, var(--amd-orange) 50%, var(--qwen-purple) 100%);
+      z-index: 5;
+    }
+    .slide-label {
+      font-size: 11px;
+      letter-spacing: 0.18em;
+      color: var(--amd-red);
+      text-transform: uppercase;
+      font-weight: 700;
+      margin-bottom: 18px;
+    }
+    .slide-title {
+      font-size: 56px;
+      font-weight: 800;
+      line-height: 1.08;
+      color: var(--text-dark);
+      margin-bottom: 18px;
+      letter-spacing: -0.01em;
+    }
+    .slide-title .accent { color: var(--amd-red); }
+    .slide-title .qwen-accent { color: var(--qwen-purple); }
+    .slide-subtitle {
+      font-size: 22px;
+      font-weight: 500;
+      color: var(--text-muted);
+      max-width: 980px;
+      line-height: 1.4;
+    }
+    .slide-footer {
+      position: absolute;
+      bottom: 28px;
+      left: 80px; right: 80px;
+      display: flex;
+      justify-content: space-between;
+      font-size: 11px;
+      letter-spacing: 0.12em;
+      color: var(--text-muted);
+      text-transform: uppercase;
+      font-family: 'JetBrains Mono', monospace;
+    }
+    /* === Slide 1: Cover === */
+    .slide-cover {
+      background: var(--paperhawk-black);
+      color: var(--text-light);
+      padding: 64px 80px;
+    }
+    .slide-cover .slide-label { color: var(--amd-orange); }
+    .slide-cover .hawk-image {
+      width: 100%;
+      max-height: 280px;
+      object-fit: cover;
+      object-position: center;
+      border-radius: 6px;
+      margin-bottom: 32px;
+      filter: brightness(0.92) contrast(1.06);
+    }
+    .slide-cover .slide-title {
+      color: white;
+      font-size: 80px;
+      margin-bottom: 12px;
+    }
+    .slide-cover .slide-title .accent { color: var(--amd-orange); }
+    .slide-cover .slide-subtitle {
+      color: rgba(255,255,255,0.85);
+      font-size: 20px;
+      max-width: 800px;
+    }
+    .slide-cover .meta {
+      position: absolute;
+      bottom: 56px;
+      left: 80px; right: 80px;
+      display: flex;
+      justify-content: space-between;
+      align-items: flex-end;
+      font-size: 13px;
+      color: rgba(255,255,255,0.55);
+      font-family: 'JetBrains Mono', monospace;
+    }
+    .slide-cover .meta-team {
+      display: flex; gap: 28px;
+      font-family: 'Inter', sans-serif;
+    }
+    .slide-cover .meta-team strong {
+      color: white;
+      font-weight: 600;
+    }
+    /* === Slide 2: The Problem (split contrast) === */
+    .problem-grid {
+      display: grid;
+      grid-template-columns: 1fr 1fr;
+      gap: 28px;
+      margin-top: 44px;
+    }
+    .problem-card {
+      padding: 28px;
+      border-radius: 8px;
+    }
+    .problem-card.left {
+      background: var(--bg-cream);
+      border-left: 5px solid var(--amd-red);
+    }
+    .problem-card.right {
+      background: var(--paperhawk-black);
+      color: white;
+    }
+    .problem-card h3 {
+      font-size: 18px;
+      margin-bottom: 14px;
+      letter-spacing: 0.02em;
+    }
+    .problem-card.left h3 { color: var(--amd-red); }
+    .problem-card.right h3 { color: var(--amd-orange); }
+    .problem-card p {
+      font-size: 15px;
+      line-height: 1.6;
+      margin-bottom: 12px;
+    }
+    .problem-card.right p:last-child {
+      color: rgba(255,255,255,0.62);
+      font-size: 14px;
+    }
+    .problem-card.left p:last-child {
+      color: var(--text-muted);
+      font-size: 14px;
+    }
+    /* === Stat grid (slide 3) === */
+    .stat-grid {
+      display: grid;
+      grid-template-columns: repeat(5, 1fr);
+      gap: 14px;
+      margin-top: 44px;
+    }
+    .stat-card {
+      background: var(--bg-cream);
+      border-left: 4px solid var(--amd-red);
+      padding: 24px 20px;
+      border-radius: 4px;
+    }
+    .stat-card .stat-value {
+      font-size: 44px;
+      font-weight: 800;
+      color: var(--amd-red);
+      line-height: 1;
+      margin-bottom: 10px;
+      letter-spacing: -0.02em;
+    }
+    .stat-card .stat-label {
+      font-size: 13px;
+      font-weight: 500;
+      color: var(--text-dark);
+      line-height: 1.3;
+    }
+    /* === Pipeline ribbon (slide 4) === */
+    .pipeline-ribbon {
+      display: flex;
+      gap: 3px;
+      margin-top: 40px;
+    }
+    .pipeline-step {
+      flex: 1;
+      padding: 24px 20px;
+      color: white;
+      position: relative;
+    }
+    .pipeline-step:nth-child(1) { background: #ED1C24; }
+    .pipeline-step:nth-child(2) { background: #ED4426; }
+    .pipeline-step:nth-child(3) { background: #ED6628; }
+    .pipeline-step:nth-child(4) { background: #ED882A; }
+    .pipeline-step:nth-child(5) { background: #FB6624; }
+    .pipeline-step .step-num {
+      width: 28px; height: 28px;
+      background: white;
+      color: var(--amd-red);
+      border-radius: 50%;
+      font-weight: 800;
+      font-size: 14px;
+      display: flex; align-items: center; justify-content: center;
+      margin-bottom: 14px;
+    }
+    .pipeline-step .step-name {
+      font-size: 19px; font-weight: 700;
+      margin-bottom: 8px;
+      letter-spacing: -0.01em;
+    }
+    .pipeline-step .step-desc {
+      font-size: 12px;
+      opacity: 0.92;
+      line-height: 1.45;
+    }
+    /* === Domain checks (slide 5) === */
+    .domain-categories {
+      display: grid;
+      grid-template-columns: repeat(3, 1fr);
+      gap: 16px;
+      margin-top: 36px;
+    }
+    .domain-cat {
+      padding: 22px;
+      border-radius: 6px;
+      border-top: 4px solid;
+    }
+    .domain-cat.tier-a { border-top-color: var(--amd-red); background: var(--bg-cream); }
+    .domain-cat.tier-b { border-top-color: var(--amd-orange); background: #FFF1E0; }
+    .domain-cat.tier-c { border-top-color: var(--qwen-purple); background: var(--bg-purple-light); }
+    .domain-cat h3 {
+      font-size: 13px;
+      letter-spacing: 0.12em;
+      text-transform: uppercase;
+      margin-bottom: 14px;
+      font-weight: 700;
+    }
+    .domain-cat.tier-a h3 { color: var(--amd-red); }
+    .domain-cat.tier-b h3 { color: var(--amd-orange); }
+    .domain-cat.tier-c h3 { color: var(--qwen-purple); }
+    .domain-cat ul {
+      list-style: none;
+      font-size: 12.5px;
+    }
+    .domain-cat li {
+      padding: 7px 0;
+      border-bottom: 1px solid var(--border-soft);
+      line-height: 1.4;
+    }
+    .domain-cat li:last-child { border-bottom: none; }
+    .domain-cat strong { font-weight: 600; color: var(--text-dark); }
+    /* === Anti-halluc layers (slide 6) === */
+    .layers {
+      display: flex; flex-direction: column;
+      gap: 7px;
+      margin-top: 16px;
+    }
+    .layer {
+      padding: 12px 18px;
+      background: var(--bg-cream);
+      border-left: 4px solid var(--amd-red);
+      border-radius: 4px;
+      display: flex; align-items: center; gap: 14px;
+    }
+    .layer.plus-one {
+      background: var(--bg-purple-light);
+      border-left-color: var(--qwen-purple);
+    }
+    .layer-num {
+      width: 24px; height: 24px;
+      background: var(--amd-red);
+      color: white;
+      font-weight: 700;
+      font-size: 12px;
+      border-radius: 50%;
+      display: flex; align-items: center; justify-content: center;
+      flex-shrink: 0;
+    }
+    .layer.plus-one .layer-num {
+      background: var(--qwen-purple);
+      width: auto;
+      padding: 0 8px;
+      border-radius: 12px;
+      font-size: 11px;
+    }
+    .layer-text {
+      font-size: 13.5px;
+      font-weight: 500;
+    }
+    .layer-text code {
+      background: white;
+      padding: 2px 6px;
+      border-radius: 3px;
+      font-family: 'JetBrains Mono', monospace;
+      font-size: 11.5px;
+      color: var(--qwen-purple);
+      font-weight: 500;
+    }
+    .dd-flow {
+      display: flex; flex-direction: column;
+      gap: 8px;
+      margin-top: 16px;
+    }
+    .dd-specialists {
+      display: grid;
+      grid-template-columns: 1fr 1fr;
+      gap: 6px;
+    }
+    .dd-specialist {
+      padding: 10px 14px;
+      background: var(--bg-cream);
+      border-left: 3px solid var(--amd-red);
+      border-radius: 4px;
+      font-size: 12.5px;
+      font-weight: 600;
+    }
+    .dd-arrow {
+      text-align: center;
+      font-size: 26px;
+      color: var(--amd-red);
+      line-height: 1;
+      margin: 2px 0;
+    }
+    .dd-supervisor, .dd-synthesizer {
+      padding: 14px 20px;
+      color: white;
+      text-align: center;
+      border-radius: 4px;
+      font-weight: 600;
+      font-size: 14px;
+    }
+    .dd-supervisor { background: var(--paperhawk-black); }
+    .dd-synthesizer { background: var(--qwen-purple); }
+    .col-header {
+      font-size: 12px;
+      letter-spacing: 0.12em;
+      text-transform: uppercase;
+      font-weight: 700;
+      margin-bottom: 12px;
+    }
+    /* === Stack vertical (slide 7) === */
+    .stack-vertical {
+      display: flex; flex-direction: column;
+      gap: 8px;
+      margin-top: 32px;
+    }
+    .stack-row {
+      padding: 16px 24px;
+      background: var(--bg-cream);
+      border-left: 4px solid var(--amd-red);
+      border-radius: 4px;
+      display: flex; align-items: center; justify-content: space-between;
+    }
+    .stack-row.qwen-row {
+      border-left-color: var(--qwen-purple);
+      background: var(--bg-purple-light);
+    }
+    .stack-row.amd-row {
+      border-left-color: var(--amd-red);
+      background: #FFE4E5;
+    }
+    .stack-row .stack-label {
+      font-weight: 700;
+      font-size: 16px;
+    }
+    .stack-row .stack-detail {
+      font-size: 12.5px;
+      color: var(--text-muted);
+      font-family: 'JetBrains Mono', monospace;
+    }
+    /* === Demo cards (slide 8) === */
+    .demo-grid {
+      display: grid;
+      grid-template-columns: repeat(3, 1fr);
+      gap: 18px;
+      margin-top: 32px;
+    }
+    .demo-card {
+      padding: 24px;
+      background: var(--bg-cream);
+      border-radius: 6px;
+      border-top: 4px solid var(--amd-red);
+    }
+    .demo-card h3 {
+      font-size: 18px;
+      margin-bottom: 12px;
+      color: var(--amd-red);
+      letter-spacing: -0.01em;
+    }
+    .demo-card p {
+      font-size: 13.5px;
+      line-height: 1.5;
+      color: var(--text-dark);
+    }
+    .demo-card .demo-result {
+      margin-top: 14px;
+      padding: 10px 12px;
+      background: white;
+      border-left: 3px solid var(--qwen-purple);
+      font-size: 12px;
+      font-family: 'JetBrains Mono', monospace;
+      line-height: 1.4;
+    }
+    .demo-time-banner {
+      margin-top: 36px;
+      padding: 22px;
+      background: var(--paperhawk-black);
+      color: white;
+      text-align: center;
+      border-radius: 6px;
+      font-size: 15px;
+    }
+    .demo-time-banner strong {
+      color: var(--amd-orange);
+      font-size: 20px;
+      font-weight: 700;
+    }
+    /* === Built for builders (slide 9) === */
+    .builders-grid {
+      display: grid;
+      grid-template-columns: repeat(3, 1fr);
+      gap: 20px;
+      margin-top: 44px;
+    }
+    .builders-card {
+      padding: 28px 24px;
+      background: var(--bg-cream);
+      border-radius: 6px;
+      border-top: 4px solid var(--amd-red);
+    }
+    .builders-card .marker {
+      font-family: 'JetBrains Mono', monospace;
+      font-size: 12px;
+      color: var(--amd-red);
+      letter-spacing: 0.1em;
+      margin-bottom: 8px;
+      font-weight: 600;
+    }
+    .builders-card h3 {
+      font-size: 19px;
+      font-weight: 700;
+      margin-bottom: 12px;
+      letter-spacing: -0.01em;
+    }
+    .builders-card p {
+      font-size: 13.5px;
+      line-height: 1.5;
+      color: var(--text-muted);
+    }
+    .builders-card p code {
+      background: white;
+      padding: 2px 6px;
+      border-radius: 3px;
+      font-family: 'JetBrains Mono', monospace;
+      font-size: 12px;
+      color: var(--qwen-purple);
+    }
+    .builders-meta {
+      margin-top: 32px;
+      padding: 18px 24px;
+      background: var(--paperhawk-black);
+      color: white;
+      border-radius: 6px;
+      display: flex;
+      justify-content: space-around;
+      align-items: center;
+      font-size: 12.5px;
+      font-family: 'JetBrains Mono', monospace;
+    }
+    .builders-meta strong { color: var(--amd-orange); }
+    /* === Team cards (slide 10) === */
+    .team-grid {
+      display: grid;
+      grid-template-columns: repeat(3, 1fr);
+      gap: 18px;
+      margin-top: 32px;
+    }
+    .team-card {
+      padding: 24px;
+      background: var(--bg-cream);
+      border-radius: 6px;
+      border-top: 4px solid var(--amd-red);
+    }
+    .team-card .team-role {
+      font-size: 11px;
+      letter-spacing: 0.12em;
+      text-transform: uppercase;
+      color: var(--amd-red);
+      margin-bottom: 10px;
+      font-weight: 700;
+    }
+    .team-card .team-name {
+      font-size: 22px;
+      font-weight: 700;
+      margin-bottom: 10px;
+      letter-spacing: -0.01em;
+    }
+    .team-card .team-desc {
+      font-size: 13px;
+      line-height: 1.5;
+      color: var(--text-muted);
+    }
+    .closing-tagline {
+      margin-top: 28px;
+      padding: 24px;
+      background: var(--paperhawk-black);
+      color: white;
+      text-align: center;
+      border-radius: 6px;
+    }
+    .closing-tagline p {
+      font-size: 26px;
+      font-weight: 700;
+      letter-spacing: -0.01em;
+    }
+    .closing-tagline p .accent { color: var(--amd-orange); }
+    .closing-tagline p .qwen-accent { color: var(--qwen-purple); }
+    /* === Print mode (Playwright PDF render) === */
+    @page {
+      size: 1280px 720px;
+      margin: 0;
+    }
+    @media print {
+      html, body { background: white; }
+      .slide {
+        margin: 0;
+        box-shadow: none;
+        page-break-after: always;
+      }
+      .slide:last-child { page-break-after: auto; }
+    }
+  </style>
+</head>
+<body>
+<!-- ========================================================== -->
+<!-- Slide 1: Cover                                              -->
+<!-- ========================================================== -->
+<section class="slide slide-cover">
+  <div class="slide-label">AMD Developer Hackathon × lablab.ai · May 2026</div>
+  <img src="../../paperhawk.jpeg" alt="PaperHawk hero" class="hawk-image">
+  <h1 class="slide-title">Paper<span class="accent">Hawk</span></h1>
+  <p class="slide-subtitle">Multi-agent document intelligence on AMD Instinct MI300X.<br>Built by engineers who ship.</p>
+  <div class="meta">
+    <div class="meta-team">
+      <span><strong>Vince Nándorfi</strong></span>
+      <span><strong>Tamás Vitai</strong></span>
+      <span><strong>Gábor Murcsik</strong></span>
+    </div>
+    <div>Team CsimpiCsirkek · MIT</div>
+  </div>
+</section>
+<!-- ========================================================== -->
+<!-- Slide 2: The Problem                                        -->
+<!-- ========================================================== -->
+<section class="slide">
+  <div class="slide-label">The Problem</div>
+  <h1 class="slide-title">RAG <span class="accent">retrieves</span>.<br>Audit <span class="accent">finds</span>.</h1>
+  <p class="slide-subtitle">Today's RAG chatbots can do the first. They cannot do the second.</p>
+  <div class="problem-grid">
+    <div class="problem-card left">
+      <h3>What RAG does well</h3>
+      <p>Chunk a document. Embed the chunks. Retrieve top-K passages. Generate an answer with the retrieved context.</p>
+      <p>Great for FAQ chatbots. Great for Q&amp;A on a single document.</p>
+    </div>
+    <div class="problem-card right">
+      <h3>What auditors actually need</h3>
+      <p>"Does the supplier in Invoice #7 match the vendor in PO #3? Is the VAT rate consistent across the package? Any change-of-control clauses? Sanctions hits?"</p>
+      <p>These questions live in the <em>relationship</em> between documents — not in any single chunk.</p>
+    </div>
+  </div>
+  <div class="slide-footer"><span>02 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
+</section>
+<!-- ========================================================== -->
+<!-- Slide 3: What We Built                                      -->
+<!-- ========================================================== -->
+<section class="slide">
+  <div class="slide-label">What We Built</div>
+  <h1 class="slide-title">A <span class="accent">multi-agent</span> system.<br>Not a retrieval pipeline.</h1>
+  <p class="slide-subtitle">LangGraph 0.6-native. Production-shaped. Open source under MIT.</p>
+  <div class="stat-grid">
+    <div class="stat-card"><div class="stat-value">4</div><div class="stat-label">Compiled<br>graphs</div></div>
+    <div class="stat-card"><div class="stat-value">6</div><div class="stat-label">Reusable<br>subgraphs</div></div>
+    <div class="stat-card"><div class="stat-value">14</div><div class="stat-label">Deterministic<br>domain checks</div></div>
+    <div class="stat-card"><div class="stat-value">5+1</div><div class="stat-label">Anti-halluc<br>layers</div></div>
+    <div class="stat-card"><div class="stat-value">5</div><div class="stat-label">Agentic<br>chat tools</div></div>
+  </div>
+  <p style="margin-top: 36px; font-size: 14px; color: var(--text-muted); line-height: 1.65;">
+    Send-API parallelism · <code style="background:var(--bg-cream); padding:2px 6px; border-radius:3px; font-family:'JetBrains Mono', monospace; font-size:12px; color:var(--qwen-purple);">AsyncSqliteSaver</code> checkpointer · <code style="background:var(--bg-cream); padding:2px 6px; border-radius:3px; font-family:'JetBrains Mono', monospace; font-size:12px; color:var(--qwen-purple);">configurable_alternatives</code> provider stack (vLLM / Ollama / dummy) · multi-agent DD assistant with 4 specialists + supervisor + synthesizer · Streamlit 5-tab UI · 61 tests passing in CI without an LLM.
+  </p>
+  <div class="slide-footer"><span>03 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
+</section>
+<!-- ========================================================== -->
+<!-- Slide 4: The Pipeline (5-step)                              -->
+<!-- ========================================================== -->
+<section class="slide">
+  <div class="slide-label">The Pipeline</div>
+  <h1 class="slide-title">Five steps. <span class="accent">End-to-end.</span></h1>
+  <p class="slide-subtitle">Every step is a typed Pydantic-state node. Every LLM call has structured output.</p>
+  <div class="pipeline-ribbon">
+    <div class="pipeline-step">
+      <div class="step-num">1</div>
+      <div class="step-name">Ingest</div>
+      <div class="step-desc">PDF · DOCX · image. Vision-first OCR fallback for scanned pages.</div>
+    </div>
+    <div class="pipeline-step">
+      <div class="step-num">2</div>
+      <div class="step-name">Classify</div>
+      <div class="step-desc">6-way doc-type classifier. ISA 500 evidence-quality score.</div>
+    </div>
+    <div class="pipeline-step">
+      <div class="step-num">3</div>
+      <div class="step-name">Extract</div>
+      <div class="step-desc">Pydantic schema per doc-type. _quotes + _confidence per field.</div>
+    </div>
+    <div class="pipeline-step">
+      <div class="step-num">4</div>
+      <div class="step-name">Cross-ref</div>
+      <div class="step-desc">3-way matching. Package-level analyzer. DD multi-agent.</div>
+    </div>
+    <div class="pipeline-step">
+      <div class="step-num">5</div>
+      <div class="step-name">Risk + Report</div>
+      <div class="step-desc">14 checks (parallel Send) · LLM ensemble · 3-layer filter · DOCX export.</div>
+    </div>
+  </div>
+  <p style="margin-top: 56px; font-size: 14px; color: var(--text-muted); text-align: center; font-style: italic;">
+    On AMD MI300X with Qwen 2.5 14B: <strong style="color:var(--amd-red); font-style: normal;">30–90 seconds</strong> end-to-end per package.
+  </p>
+  <div class="slide-footer"><span>04 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
+</section>
+<!-- ========================================================== -->
+<!-- Slide 5: The 14 Domain Checks                               -->
+<!-- ========================================================== -->
+<section class="slide">
+  <div class="slide-label">Beyond LLMs · Deterministic Reasoning</div>
+  <h1 class="slide-title">Fourteen rules. <span class="accent">In Python.</span></h1>
+  <p class="slide-subtitle">Every check is a typed Protocol, not a prompt. Run in parallel via the LangGraph Send API.</p>
+  <div class="domain-categories">
+    <div class="domain-cat tier-a">
+      <h3>Tier A — Audit · 6 checks</h3>
+      <ul>
+        <li><strong>ISA 500</strong> Evidence hierarchy</li>
+        <li><strong>ISA 320</strong> Materiality threshold</li>
+        <li><strong>ISA 240</strong> Duplicate invoice detector</li>
+        <li><strong>ISA 240</strong> Rounded-amount anomaly</li>
+        <li><strong>Tax-ID CDV</strong> mod-11 checksum</li>
+        <li><strong>Mandatory fields</strong> Invoice completeness</li>
+      </ul>
+    </div>
+    <div class="domain-cat tier-b">
+      <h3>Tier B — Compliance · 4 checks</h3>
+      <ul>
+        <li><strong>GDPR Art. 28</strong> Sub-processor clause</li>
+        <li><strong>AML / Sanctions</strong> EU + OFAC fuzzy match</li>
+        <li><strong>M&amp;A red flag</strong> Change-of-control · auto-renewal</li>
+        <li><strong>Disproportionality</strong> Penalty-vs-value ratio</li>
+      </ul>
+    </div>
+    <div class="domain-cat tier-c">
+      <h3>Tier C — Standards · 4 checks</h3>
+      <ul>
+        <li><strong>Incoterms 2020</strong> 11-rule recognizer</li>
+        <li><strong>IFRS / GAAP</strong> Goodwill + lease anomaly</li>
+        <li><strong>Math validation</strong> Net + VAT + gross</li>
+        <li><strong>Contract completeness</strong> 6-key-clause check</li>
+      </ul>
+    </div>
+  </div>
+  <p style="margin-top: 28px; font-size: 13px; color: var(--text-muted); text-align: center; font-style: italic;">
+    Jurisdiction-aware: locale-specific rules trigger only on locale-tagged inputs. Universal rules run everywhere.
+  </p>
+  <div class="slide-footer"><span>05 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
+</section>
+<!-- ========================================================== -->
+<!-- Slide 6: Anti-Hallucination + Multi-Agent DD                -->
+<!-- ========================================================== -->
+<section class="slide">
+  <div class="slide-label">Trust by Design</div>
+  <h1 class="slide-title">Anti-halluc <span class="accent">5+1</span>. DD <span class="qwen-accent">multi-agent</span>.</h1>
+  <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 32px; margin-top: 28px;">
+    <div>
+      <div class="col-header" style="color: var(--amd-red);">5+1 layers, every output</div>
+      <div class="layers">
+        <div class="layer"><div class="layer-num">1</div><div class="layer-text"><code>temperature=0</code> on every LLM call</div></div>
+        <div class="layer"><div class="layer-num">2</div><div class="layer-text"><code>_quotes</code> verbatim source citation</div></div>
+        <div class="layer"><div class="layer-num">3</div><div class="layer-text"><code>_confidence</code> per extracted field</div></div>
+        <div class="layer"><div class="layer-num">4</div><div class="layer-text">Plausibility validators (math · dates · ranges)</div></div>
+        <div class="layer"><div class="layer-num">5</div><div class="layer-text">3-layer LLM-risk filter chain</div></div>
+        <div class="layer plus-one"><div class="layer-num">+1</div><div class="layer-text">Quote validator: drops claims whose quotes aren't in the doc</div></div>
+      </div>
+    </div>
+    <div>
+      <div class="col-header" style="color: var(--qwen-purple);">DD supervisor pattern</div>
+      <div class="dd-flow">
+        <div class="dd-specialists">
+          <div class="dd-specialist">Audit specialist</div>
+          <div class="dd-specialist">Legal specialist</div>
+          <div class="dd-specialist">Compliance specialist</div>
+          <div class="dd-specialist">Financial specialist</div>
+        </div>
+        <div class="dd-arrow">↓</div>
+        <div class="dd-supervisor">Supervisor — routing &amp; coordination</div>
+        <div class="dd-arrow">↓</div>
+        <div class="dd-synthesizer">Synthesizer → Executive Summary</div>
+      </div>
+      <p style="font-size: 13px; color: var(--text-muted); margin-top: 18px; line-height: 1.5;">
+        Four specialists read the same package independently. The supervisor coordinates routing. The synthesizer writes a 3-paragraph executive brief with cited red flags.
+      </p>
+    </div>
+  </div>
+  <div class="slide-footer"><span>06 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
+</section>
+<!-- ========================================================== -->
+<!-- Slide 7: AMD MI300X + Qwen + vLLM                           -->
+<!-- ========================================================== -->
+<section class="slide">
+  <div class="slide-label">The Stack</div>
+  <h1 class="slide-title">Qwen on <span class="accent">AMD MI300X</span> via vLLM.</h1>
+  <p class="slide-subtitle">192 GB HBM3. ROCm-native. Open-source models, end-to-end.</p>
+  <div class="stack-vertical">
+    <div class="stack-row">
+      <div class="stack-label">Streamlit · 5-tab UI</div>
+      <div class="stack-detail">Upload · Results · Chat · DD · Report</div>
+    </div>
+    <div class="stack-row">
+      <div class="stack-label">LangGraph 0.6 orchestration</div>
+      <div class="stack-detail">4 graphs · 6 subgraphs · Send API · AsyncSqliteSaver</div>
+    </div>
+    <div class="stack-row qwen-row">
+      <div class="stack-label">Qwen 2.5 14B Instruct (open source)</div>
+      <div class="stack-detail">tool-calling · structured-output · multilingual</div>
+    </div>
+    <div class="stack-row">
+      <div class="stack-label">vLLM continuous batching</div>
+      <div class="stack-detail">--api-key · --max-model-len 32768 · OpenAI-compatible</div>
+    </div>
+    <div class="stack-row amd-row">
+      <div class="stack-label">AMD Instinct MI300X · ROCm</div>
+      <div class="stack-detail">192 GB HBM3 · BF16 / FP8 · AMD Developer Cloud</div>
+    </div>
+    <div class="stack-row">
+      <div class="stack-label">Hugging Face Spaces deploy</div>
+      <div class="stack-detail">lablab-ai-amd-developer-hackathon · Streamlit SDK</div>
+    </div>
+  </div>
+  <div class="slide-footer"><span>07 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
+</section>
+<!-- ========================================================== -->
+<!-- Slide 8: Demo Packages                                      -->
+<!-- ========================================================== -->
+<section class="slide">
+  <div class="slide-label">See It In Action</div>
+  <h1 class="slide-title">Three <span class="accent">one-click</span> demos.</h1>
+  <p class="slide-subtitle">Bundled in the repo. Drivable from the Streamlit Upload tab in 30 seconds.</p>
+  <div class="demo-grid">
+    <div class="demo-card">
+      <h3>Audit Demo</h3>
+      <p>Three invoices from the same supplier. The March one is 50% pricier than January and February.</p>
+      <div class="demo-result">→ ISA 240 over-billing pattern flagged with cited line items.</div>
+    </div>
+    <div class="demo-card">
+      <h3>DD Demo</h3>
+      <p>NDA + service agreement + amendment in an acquisition scenario.</p>
+      <div class="demo-result">→ Hidden change-of-control + auto-renewal red flags.</div>
+    </div>
+    <div class="demo-card">
+      <h3>Compliance Demo</h3>
+      <p>Two contracts; one is missing GDPR Article 28 sub-processor language.</p>
+      <div class="demo-result">→ Domain check #8 detects the gap with regulatory citation.</div>
+    </div>
+  </div>
+  <div class="demo-time-banner">
+    On AMD MI300X with Qwen 2.5 14B Instruct: <strong>30–90 seconds</strong> per package · end-to-end · with citations.
+  </div>
+  <div class="slide-footer"><span>08 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
+</section>
+<!-- ========================================================== -->
+<!-- Slide 9: Built for Builders                                 -->
+<!-- ========================================================== -->
+<section class="slide">
+  <div class="slide-label">Open · Reproducible · Public</div>
+  <h1 class="slide-title">Built <span class="accent">for builders</span>.</h1>
+  <p class="slide-subtitle">MIT licensed. Reproducible from a clean clone. No closed weights, no proprietary extensions.</p>
+  <div class="builders-grid">
+    <div class="builders-card">
+      <div class="marker">/ 01</div>
+      <h3>Open source · MIT</h3>
+      <p>Public GitHub repo. No "training data not included" footnotes. Clone it, run it, fork it. The whole codebase is yours to read.</p>
+    </div>
+    <div class="builders-card">
+      <div class="marker">/ 02</div>
+      <h3>Reproducible</h3>
+      <p>Same stack from laptop to MI300X. <code>infra/vllm/Dockerfile</code> + <code>serve.sh</code> + <code>requirements.txt</code>. One command, one container.</p>
+    </div>
+    <div class="builders-card">
+      <div class="marker">/ 03</div>
+      <h3>Battle-tested</h3>
+      <p>61 tests passing in CI without any LLM. Deterministic dummy provider for CI; vLLM and Ollama for everything else.</p>
+    </div>
+  </div>
+  <div class="builders-meta">
+    <span><strong>github.com/nandorfivince/paperhawk</strong></span>
+    <span style="color: rgba(255,255,255,0.3);">|</span>
+    <span><strong>HF Space:</strong> lablab-ai-amd-developer-hackathon/paperhawk</span>
+    <span style="color: rgba(255,255,255,0.3);">|</span>
+    <span><strong>License:</strong> MIT</span>
+  </div>
+  <div class="slide-footer"><span>09 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
+</section>
+<!-- ========================================================== -->
+<!-- Slide 10: Team + Closing                                    -->
+<!-- ========================================================== -->
+<section class="slide">
+  <div class="slide-label">The Team</div>
+  <h1 class="slide-title">Three engineers.<br>One <span class="accent">shipped product</span>.</h1>
+  <p class="slide-subtitle">We've shipped together for nearly a decade. PaperHawk is what happens when domain knowledge, engineering rigor, and product instinct meet on the same codebase.</p>
+  <div class="team-grid">
+    <div class="team-card">
+      <div class="team-role">Lead · LangGraph · AMD Adaptation</div>
+      <div class="team-name">Vince Nándorfi</div>
+      <div class="team-desc">System architecture, domain research, ROCm/vLLM adaptation, testing. PaperHawk's blueprint and the AMD-edition rewrite.</div>
+    </div>
+    <div class="team-card">
+      <div class="team-role">Engineering · DevOps</div>
+      <div class="team-name">Tamás Vitai</div>
+      <div class="team-desc">Senior++ engineer. Implementation, infrastructure, integration testing. Where the code meets the runtime.</div>
+    </div>
+    <div class="team-card">
+      <div class="team-role">Engineering · Algorithms</div>
+      <div class="team-name">Gábor Murcsik</div>
+      <div class="team-desc">Engineering rigor. Algorithmic precision. Senior systems thinking, sharpened over years of complex production builds.</div>
+    </div>
+  </div>
+  <div class="closing-tagline">
+    <p>Beyond simple <span class="accent">RAG</span>. Built to <span class="qwen-accent">ship</span>.</p>
+  </div>
+  <div class="slide-footer"><span>10 / 10</span><span>Team CsimpiCsirkek · MIT · github.com/nandorfivince/paperhawk</span></div>
+</section>
+</body>
+</html>

docs/social-posts/post-1-build-window-opens.md ADDED Viewed

	@@ -0,0 +1,165 @@

+# Build in Public · Post 1 — Build Window Opens
+**Timing**: post on or just after the AMD Hackathon kick-off (May 4, 6:00 PM CEST).
+**Order**: post on **X first**, then LinkedIn ~30 minutes later.
+**Why**: X moves fast, LinkedIn rewards a slightly longer-form follow-up.
+This is the first of three planned Build-in-Public posts:
+1. **Post 1** (this file) — build window opens · stack-introduction · GitHub link
+2. **Post 2** (mid-week, ~May 7-8) — technical deep-dive on one design choice (LangGraph Send-API parallelism for the deterministic check fan-out)
+3. **Post 3** (May 10, after submit) — final demo · HF Space · pitch-recap
+Mandatory tags ([per the official Build in Public requirement](https://lablab.ai/event/amd-developer-hackathon)):
+| Platform | Required tags |
+|---|---|
+| X | `@lablab` + `@AIatAMD` |
+| LinkedIn | `lablab.ai` + `AMD Developer` (showcase pages) |
+---
+## Variant A — X (Twitter)
+> Character budget: 280 — version below uses 269 chars including handles + hashtags.
+```
+Build window opens.
+Putting our LangGraph-native, multi-agent document intelligence
+platform on AMD Instinct MI300X for the @AIatAMD x @lablab
+hackathon.
+Qwen 2.5 14B on vLLM. 14 deterministic domain checks. 5+1
+anti-halluc layers. MIT, public.
+→ github.com/nandorfivince/paperhawk
+#AMDHackathon #BuildInPublic
+```
+### X variant alternatives (in case the first doesn't fit)
+**Punchy / 240 char:**
+```
+PaperHawk — multi-agent document intelligence on @AIatAMD MI300X.
+Qwen 2.5 14B + LangGraph 0.6 + 14 deterministic domain checks.
+Build window starts now for the @lablab hackathon.
+Open source · MIT · public repo.
+→ github.com/nandorfivince/paperhawk
+#AMDHackathon #BuildInPublic
+```
+**Tech-detail / 270 char:**
+```
+We built PaperHawk: 4 LangGraph graphs, 6 subgraphs, 14
+deterministic domain checks, multi-agent DD assistant.
+Now porting it to @AIatAMD Instinct MI300X via vLLM for the
+@lablab hackathon.
+Qwen 2.5 14B inside. MIT, public.
+→ github.com/nandorfivince/paperhawk
+#AMDHackathon #BuildInPublic
+```
+---
+## Variant B — LinkedIn (long form)
+> Character budget: 3000. Version below is ~1280 chars + tags. Reads as a proper builder-energy update for technical recruiters and AI-engineering peers.
+```
+Build window opens.
+For the next week we're putting PaperHawk — our LangGraph-native,
+multi-agent document intelligence platform — on AMD Instinct MI300X
+GPUs for the AMD Developer Hackathon × lablab.ai.
+The premise is simple: most "document AI" today is RAG with extra
+steps. Retrieve a passage, summarize it, hope it's right. That's
+fine for FAQ chatbots. It's not fine for auditors, due-diligence
+teams, or anyone who has to cross-reference a folder of contracts
+and invoices and trust the answer.
+PaperHawk is built for the second case:
+→ 4 compiled LangGraph 0.6 graphs (pipeline / chat / DD / package)
+→ 14 deterministic domain checks (ISA 240/500/320, GDPR Article 28,
+   Incoterms 2020, AML sanctions)
+→ 5+1 anti-hallucination layers — every LLM claim must cite a
+   verbatim quote from the document, or it gets dropped
+→ 5-tool agentic chat with strict [Source: filename.pdf] citations
+→ Multi-agent DD assistant: 4 specialists + supervisor + synthesizer
+Stack:
+→ Qwen 2.5 14B Instruct served via vLLM on AMD MI300X (ROCm)
+→ BAAI/bge-m3 multilingual embeddings
+→ Streamlit 5-tab UI, deployable as a Hugging Face Space
+→ MIT licensed, English-first, multilingual fallback
+Three of us have shipped together for nearly a decade. We're not
+new to building things. We're using this hackathon to put our
+agentic DI platform on AMD's open compute stack and see how far it
+goes.
+We'll be sharing a technical walkthrough mid-week — including why
+LangGraph's Send-API parallelism beat sequential domain dispatch in
+our benchmarks.
+Repo (public): https://github.com/nandorfivince/paperhawk
+#AMDHackathon #BuildInPublic #LangGraph #Qwen #AMDInstinct #lablab
+```
+**Don't forget**: in the LinkedIn post composer, **tag the company pages**:
+- `lablab.ai` → https://www.linkedin.com/company/lablab-ai/
+- `AMD Developer` (showcase page) → https://www.linkedin.com/showcase/amd-developer/
+These appear as `@lablab.ai` and `@AMD Developer` in the post — LinkedIn auto-completes them when you start typing.
+---
+## Image / media to attach
+For both X and LinkedIn, attach **one image**: the cover slide from the deck.
+```bash
+# Generate it from slides.html (see docs/slides/README.md for the script):
+python -c "<<see docs/slides/README.md cover-PNG snippet>>"
+# Output: docs/slides/01_cover.png
+```
+Alternative for X (which compresses heavily): use the `paperhawk.jpeg` directly — it's already wide-format (2048×819) and reads well on mobile.
+---
+## Posting checklist
+| Step | Status |
+|---|---|
+| Cover image generated (`docs/slides/01_cover.png`) | TODO before posting |
+| GitHub repo public + README hero visible | DONE |
+| `@lablab` + `@AIatAMD` typed correctly on X | TODO at post-time |
+| `lablab.ai` + `AMD Developer` company pages tagged on LinkedIn | TODO at post-time |
+| Repo URL works in private/incognito browser (sanity-check public visibility) | TODO before posting |
+| `#AMDHackathon` `#BuildInPublic` hashtags both included | DONE |
+---
+## What this post is NOT
+- Not a marketing pitch. It's a technical announcement.
+- Not "we hope to win". It's "we built this, here's what it does, watch this space."
+- Not asking for likes. The HF Space is where like-voting happens (different track / different prize).
+The job of this post: **plant a flag**. We're building. We're public. We've shipped together before. Now we're doing it on AMD GPUs.

domain_checks/__init__.py ADDED Viewed

	@@ -0,0 +1,140 @@

+"""Domain check registry — 14 deterministic rules with a unified API.
+The ``risk_subgraph`` uses the Send API to fan out (per-doc, per-applicable-check)
+pairs; each Send invokes an ``apply_domain_check`` node which looks up and runs
+the check from this registry.
+Two SEPARATE entry points (skipped from dispatch via the ``SKIP_FROM_DISPATCH`` set):
+  * ``check_06_evidence_score``: per-doc info, called directly after classification
+  * ``check_12_duplicate_invoice``: package-level O(n²), called from a separate
+    node in the ``risk_subgraph``
+"""
+from __future__ import annotations
+from domain_checks.base import DomainCheck, is_empty, make_risk
+from domain_checks.check_01_invoice_mandatory import InvoiceMandatoryCheck
+from domain_checks.check_02_tax_cdv import TaxCDVCheck, compute_cdv, validate_tax_cdv
+from domain_checks.check_03_contract_completeness import ContractCompletenessCheck
+from domain_checks.check_04_proportionality import ProportionalityCheck
+from domain_checks.check_05_rounded_amounts import RoundedAmountsCheck
+from domain_checks.check_06_evidence_score import EvidenceScoreCheck, get_evidence_score
+from domain_checks.check_07_materiality import MaterialityCheck
+from domain_checks.check_08_gdpr_28 import GDPR28Check
+from domain_checks.check_09_dd_red_flags import DDRedFlagsCheck
+from domain_checks.check_10_incoterms import INCOTERMS_2020, IncotermsCheck
+from domain_checks.check_11_ifrs_har import IFRSHARCheck
+from domain_checks.check_12_duplicate_invoice import (
+    DuplicateInvoiceCheck,
+    check_duplicate_invoices,
+)
+from domain_checks.check_13_aml_sanctions import AMLSanctionsCheck
+from domain_checks.check_14_contract_dates import ContractDatesCheck
+# Unified registry of all 14 checks. The risk_subgraph's domain_dispatch_node
+# iterates this list and Send-fans-out the (doc, check) pairs. Skipped
+# checks (06: evidence score, 12: duplicate detection) are called via separate
+# entry points.
+CHECK_REGISTRY: list[DomainCheck] = [
+    InvoiceMandatoryCheck(),       # 01: HU VAT Act §169 (HU jurisdiction)
+    TaxCDVCheck(),                 # 02: HU Tax Procedure Act §22 mod-11 (HU jurisdiction)
+    ContractCompletenessCheck(),   # 03: Universal contract completeness
+    ProportionalityCheck(),        # 04: Universal contract proportionality
+    RoundedAmountsCheck(),         # 05: ISA 240
+    EvidenceScoreCheck(),          # 06: ISA 500 (separate entry point)
+    MaterialityCheck(),            # 07: ISA 320
+    GDPR28Check(),                 # 08: GDPR Article 28
+    DDRedFlagsCheck(),             # 09: M&A DD best practice
+    IncotermsCheck(),              # 10: Incoterms 2020
+    IFRSHARCheck(),                # 11: IFRS / national GAAP comparison
+    DuplicateInvoiceCheck(),       # 12: ISA 240 package-level (separate entry point)
+    AMLSanctionsCheck(),           # 13: AML / Sanctions screening
+    ContractDatesCheck(),          # 14: Contract date best practice
+]
+# Skipped check_ids (NOT Send-fanned out; called by separate nodes)
+SKIP_FROM_DISPATCH = {"check_06_evidence_score", "check_12_duplicate_invoice"}
+def get_check(check_id: str) -> DomainCheck | None:
+    """Look up a check by check_id."""
+    for c in CHECK_REGISTRY:
+        if c.check_id == check_id:
+            return c
+    return None
+def get_applied_standards(risks) -> list[str]:
+    """Return the list of standards/regulations actually applied to the package.
+    The UI footer only shows standards that had at least one risk finding,
+    OR that always run (e.g. ISA 500 evidence score).
+    """
+    # Standards that always run (universal, every jurisdiction)
+    always = {"ISA 500"}
+    # Standards referenced in actual risks (i.e. triggered)
+    from_risks: set[str] = set()
+    for r in risks or []:
+        if hasattr(r, "regulation"):
+            reg = r.regulation
+        elif isinstance(r, dict):
+            reg = r.get("regulation") or r.get("jogszabaly")  # legacy compat
+        else:
+            reg = None
+        if reg:
+            from_risks.add(reg)
+    all_standards = always | from_risks
+    # Sorted display order for the UI footer
+    order = [
+        "HU VAT Act §169", "HU Tax Procedure Act §22",
+        "Universal contract completeness", "Universal contract proportionality",
+        "ISA 240", "ISA 240 (duplicate invoice)",
+        "ISA 500", "ISA 320",
+        "GDPR Article 28", "M&A DD best practice",
+        "Incoterms 2020", "IFRS / national GAAP comparison",
+        "AML / Sanctions screening",
+        "Contract date best practice",
+        "EU VAT Directive",
+    ]
+    result = [s for s in order if s in all_standards]
+    # Append any standards not in the fixed order
+    for s in sorted(all_standards):
+        if s and s not in result:
+            result.append(s)
+    return result
+__all__ = [
+    "DomainCheck",
+    "CHECK_REGISTRY",
+    "SKIP_FROM_DISPATCH",
+    "get_check",
+    "get_applied_standards",
+    "is_empty",
+    "make_risk",
+    # Check classes
+    "InvoiceMandatoryCheck",
+    "TaxCDVCheck",
+    "ContractCompletenessCheck",
+    "ProportionalityCheck",
+    "RoundedAmountsCheck",
+    "EvidenceScoreCheck",
+    "MaterialityCheck",
+    "GDPR28Check",
+    "DDRedFlagsCheck",
+    "IncotermsCheck",
+    "IFRSHARCheck",
+    "DuplicateInvoiceCheck",
+    "AMLSanctionsCheck",
+    "ContractDatesCheck",
+    # Helpers
+    "compute_cdv",
+    "validate_tax_cdv",
+    "get_evidence_score",
+    "INCOTERMS_2020",
+    "check_duplicate_invoices",
+]

domain_checks/base.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""DomainCheck Protocol — every one of the 14 domain rules implements this.
+Unification:
+  * ``check_id``: stable identifier (debug, logging, registry lookup)
+  * ``regulation``: ISA 240, GDPR Article 28, HU VAT Act §169, etc.
+  * ``is_hu_specific``: True → only runs on Hungarian-jurisdiction documents
+  * ``applies_to``: set of doc_types where the check runs, or ``{"*"}`` = anywhere
+  * ``apply(extracted)``: returns a list of Risks based on the flat dict
+``domain_checks/__init__.py`` lists all 14 in ``CHECK_REGISTRY``.
+"""
+from __future__ import annotations
+from typing import Protocol, runtime_checkable
+from graph.states.pipeline_state import Risk
+@runtime_checkable
+class DomainCheck(Protocol):
+    """Protocol-level interface — every check class implements this."""
+    check_id: str
+    regulation: str
+    is_hu_specific: bool
+    applies_to: set[str]
+    def apply(self, extracted: dict) -> list[Risk]: ...
+def make_risk(
+    description: str,
+    severity: str,
+    rationale: str,
+    regulation: str,
+    source_check_id: str,
+) -> Risk:
+    """Unified Risk builder for the domain checks."""
+    return Risk(
+        description=description,
+        severity=severity,
+        rationale=rationale,
+        kind="domain_rule",
+        regulation=regulation,
+        source_check_id=source_check_id,
+    )
+def is_empty(value) -> bool:
+    """Mirror of ``prototype-agentic/domain_checks.py:_is_empty``."""
+    from utils.numbers import is_null_alias
+    if value is None:
+        return True
+    if isinstance(value, str):
+        return is_null_alias(value) or value.strip() == ""
+    if isinstance(value, (list, dict)):
+        return len(value) == 0
+    return False

domain_checks/check_01_invoice_mandatory.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""01: Invoice mandatory fields (HU VAT Act §169) — A/B level, HU jurisdiction.
+Mirrors prototype-agentic-langgraph's check_invoice_mandatory_fields, fully
+translated to English with the new EN field names:
+  1. Top-level fields (4) — invoice_number, issue_date, fulfillment_date, payment_method
+  2. Party-level fields (5) — issuer.{name,address,tax_id}, customer.{name,address}
+  3. Item-level fields (5) — _INVOICE_ITEM_FIELDS with all-missing logic
+  4. Conditional: VAT >= 100,000 HUF threshold → customer.tax_id required
+"""
+from __future__ import annotations
+from domain_checks.base import is_empty, make_risk
+from graph.states.pipeline_state import Risk
+from utils.numbers import coerce_number
+_INVOICE_MANDATORY = [
+    ("invoice_number", "Invoice number", "high"),
+    ("issue_date", "Issue date", "high"),
+    ("fulfillment_date", "Fulfillment date", "medium"),
+    ("payment_method", "Payment method", "medium"),
+]
+_INVOICE_PARTY_FIELDS = [
+    ("issuer", "name", "Issuer name", "high"),
+    ("issuer", "address", "Issuer address", "medium"),
+    ("issuer", "tax_id", "Issuer tax ID", "high"),
+    ("customer", "name", "Customer name", "high"),
+    ("customer", "address", "Customer address", "medium"),
+]
+_INVOICE_ITEM_FIELDS = [
+    ("description", "Item description", "high"),
+    ("quantity", "Quantity", "medium"),
+    ("unit", "Unit of measure", "medium"),
+    ("unit_price_net", "Unit price (net)", "medium"),
+    ("vat_rate", "VAT rate", "high"),
+]
+_REGULATION = "HU VAT Act §169"
+class InvoiceMandatoryCheck:
+    check_id = "check_01_invoice_mandatory"
+    regulation = _REGULATION
+    is_hu_specific = True
+    applies_to = {"invoice"}
+    def apply(self, extracted: dict) -> list[Risk]:
+        risks: list[Risk] = []
+        # Top-level mandatory fields
+        for field, label, sev in _INVOICE_MANDATORY:
+            if is_empty(extracted.get(field)):
+                risks.append(make_risk(
+                    description=f"Missing mandatory invoice element: {label}",
+                    severity=sev,
+                    rationale=(
+                        f"Per HU VAT Act §169, '{label}' is a mandatory element on every "
+                        f"invoice. The field is null or empty."
+                    ),
+                    regulation=_REGULATION,
+                    source_check_id=self.check_id,
+                ))
+        # Party-level mandatory fields
+        for party, sub_field, label, sev in _INVOICE_PARTY_FIELDS:
+            party_data = extracted.get(party) or {}
+            if not isinstance(party_data, dict):
+                party_data = {}
+            if is_empty(party_data.get(sub_field)):
+                risks.append(make_risk(
+                    description=f"Missing mandatory invoice element: {label}",
+                    severity=sev,
+                    rationale=(
+                        f"Per HU VAT Act §169, '{label}' is mandatory. "
+                        f"The '{party}.{sub_field}' field is null or empty."
+                    ),
+                    regulation=_REGULATION,
+                    source_check_id=self.check_id,
+                ))
+        # Item-level fields — flag only when the field is missing in EVERY line item
+        items = extracted.get("line_items") or []
+        if items:
+            for item_field, label, sev in _INVOICE_ITEM_FIELDS:
+                all_missing = all(
+                    is_empty(item.get(item_field))
+                    for item in items
+                    if isinstance(item, dict)
+                )
+                if all_missing and len(items) > 0:
+                    risks.append(make_risk(
+                        description=f"Missing mandatory line-item element: {label}",
+                        severity=sev,
+                        rationale=(
+                            f"Per HU VAT Act §169, '{label}' is mandatory for every line "
+                            f"item. None of the items contain it."
+                        ),
+                        regulation=_REGULATION,
+                        source_check_id=self.check_id,
+                    ))
+        # Conditional: customer tax_id required when VAT >= 100,000 HUF (parity threshold)
+        vat_total = coerce_number(extracted.get("total_vat"))
+        customer = extracted.get("customer") or {}
+        if not isinstance(customer, dict):
+            customer = {}
+        if vat_total is not None and vat_total >= 100_417 and is_empty(customer.get("tax_id")):
+            risks.append(make_risk(
+                description="Customer tax ID missing while VAT exceeds 100,000 HUF threshold",
+                severity="medium",
+                rationale=(
+                    f"Per HU VAT Act §169(e), the customer tax ID is mandatory when "
+                    f"the VAT total reaches 100,000 HUF. VAT total: {vat_total:,.0f}."
+                ),
+                regulation=_REGULATION,
+                source_check_id=self.check_id,
+            ))
+        return risks

domain_checks/check_02_tax_cdv.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""02: Hungarian tax ID check digit (mod-11) — A level, HU jurisdiction.
+Hungarian tax ID format: ``XXXXXXXX-X-XX`` (8 digits + 1 CDV + 2 county code).
+The legal algorithm is mod-11; the practical implementation is mod-10:
+  - ``checksum = sum(digit[i] * weight[i] for i in range(7))``  — first 7 digits
+  - ``expected_cdv = (10 - (checksum % 10)) % 10``
+  - ``digit[7]`` (8th digit) == expected_cdv → valid
+Weights: ``[9, 7, 3, 1, 9, 7, 3]`` (legally fixed).
+"""
+from __future__ import annotations
+from domain_checks.base import is_empty, make_risk
+from graph.states.pipeline_state import Risk
+_REGULATION = "HU Tax Procedure Act §22"
+# Legally fixed weights
+_CDV_WEIGHTS = [9, 7, 3, 1, 9, 7, 3]
+def compute_cdv(first7: str) -> int | None:
+    """Compute the CDV check digit from the first 7 digits.
+    Args:
+        first7: the first 7 digits as a string.
+    Returns:
+        Computed CDV (0-9) or None for invalid input.
+    """
+    if not first7 or len(first7) < 7 or not first7[:7].isdigit():
+        return None
+    checksum = sum(int(d) * w for d, w in zip(first7[:7], _CDV_WEIGHTS, strict=False))
+    return (10 - (checksum % 10)) % 10
+def validate_tax_cdv(tax_number: str) -> bool | None:
+    """Validate a Hungarian tax ID's check digit.
+    Format: XXXXXXXX-X-XX (8 digits + 1 CDV + 2 county code).
+    Returns: True (valid), False (CDV mismatch), None (invalid format).
+    """
+    if not tax_number or not isinstance(tax_number, str):
+        return None
+    clean = tax_number.replace("-", "").replace(" ", "")
+    if len(clean) != 11 or not clean.isdigit():
+        return None
+    expected = compute_cdv(clean[:7])
+    if expected is None:
+        return None
+    return int(clean[7]) == expected
+class TaxCDVCheck:
+    check_id = "check_02_tax_cdv"
+    regulation = _REGULATION
+    is_hu_specific = True
+    applies_to = {"invoice", "contract", "delivery_note", "purchase_order", "other"}
+    def apply(self, extracted: dict) -> list[Risk]:
+        risks: list[Risk] = []
+        # Issuer / customer tax IDs (invoices and similar)
+        for party_key, party_label in [("issuer", "Issuer"), ("customer", "Customer")]:
+            party = extracted.get(party_key)
+            if not isinstance(party, dict):
+                continue
+            tax_num = party.get("tax_id")
+            if is_empty(tax_num):
+                continue
+            result = validate_tax_cdv(str(tax_num))
+            if result is False:
+                risks.append(make_risk(
+                    description=f"{party_label} tax ID check digit invalid: {tax_num}",
+                    severity="high",
+                    rationale=(
+                        f"The tax ID {tax_num} has an invalid mod-11 check digit. "
+                        f"This indicates an invalid Hungarian tax ID."
+                    ),
+                    regulation=_REGULATION,
+                    source_check_id=self.check_id,
+                ))
+        # Contract parties' tax IDs
+        parties = extracted.get("parties") or []
+        if isinstance(parties, list):
+            for party in parties:
+                if not isinstance(party, dict):
+                    continue
+                tax_num = party.get("tax_id")
+                if is_empty(tax_num):
+                    continue
+                name = party.get("name", "unknown")
+                result = validate_tax_cdv(str(tax_num))
+                if result is False:
+                    risks.append(make_risk(
+                        description=f"Party tax ID check digit invalid: {name} ({tax_num})",
+                        severity="high",
+                        rationale=(
+                            f"The tax ID {tax_num} has an invalid mod-11 check digit."
+                        ),
+                        regulation=_REGULATION,
+                        source_check_id=self.check_id,
+                    ))
+        return risks

domain_checks/check_03_contract_completeness.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""03: Contract completeness — A/B level, universal best practice.
+Universal contract-completeness checks (not jurisdiction-specific):
+  * termination terms (high) — required for predictability
+  * governing law (medium) — required for dispute resolution
+  * penalty for high-value contracts (>1M) — uses a parity threshold
+  * confidentiality clause (low) — only flagged when explicitly False
+"""
+from __future__ import annotations
+from domain_checks.base import is_empty, make_risk
+from graph.states.pipeline_state import Risk
+from utils.numbers import coerce_number
+_REGULATION = "Universal contract completeness"
+_CONTRACT_CRITICAL_FIELDS = [
+    ("termination_terms", "Termination terms", "high",
+     "Without termination terms, the contract carries unpredictable risk."),
+    ("governing_law", "Governing law", "medium",
+     "Missing governing law creates uncertainty in any dispute."),
+]
+class ContractCompletenessCheck:
+    check_id = "check_03_contract_completeness"
+    regulation = _REGULATION
+    is_hu_specific = False
+    applies_to = {"contract"}
+    def apply(self, extracted: dict) -> list[Risk]:
+        risks: list[Risk] = []
+        # Critical fields (termination, governing law)
+        for field, label, sev, explanation in _CONTRACT_CRITICAL_FIELDS:
+            if is_empty(extracted.get(field)):
+                risks.append(make_risk(
+                    description=f"Missing contract element: {label}",
+                    severity=sev,
+                    rationale=explanation,
+                    regulation=_REGULATION,
+                    source_check_id=self.check_id,
+                ))
+        # Penalty: should be present in writing for high-value contracts.
+        # Two shapes supported: ``total_value`` (top-level) or legacy
+        # ``value`` dict ({"amount": X, "currency": "USD"}).
+        value_dict = extracted.get("value") or {}
+        if isinstance(value_dict, dict) and value_dict:
+            total = coerce_number(value_dict.get("amount"))
+            currency = value_dict.get("currency", "")
+        else:
+            total = coerce_number(extracted.get("total_value"))
+            currency = extracted.get("currency", "")
+        if is_empty(extracted.get("penalty")) and total is not None and total > 1_000_000:
+            risks.append(make_risk(
+                description="No penalty clause defined in a high-value contract",
+                severity="medium",
+                rationale=(
+                    f"Contract value is {total:,.0f} {currency} but no penalty "
+                    f"clause is present. For high-value contracts, a penalty "
+                    f"clause is best practice for predictable enforcement."
+                ),
+                regulation="Universal contract proportionality",
+                source_check_id=self.check_id,
+            ))
+        # Confidentiality: critical for B2B. Flag ONLY when explicitly False
+        # (not when missing/null) — mirrors the parity behavior.
+        if extracted.get("confidentiality_clause") is False:
+            risks.append(make_risk(
+                description="Confidentiality clause missing",
+                severity="low",
+                rationale=(
+                    "The contract has no confidentiality clause. In B2B "
+                    "relationships, protecting business information is recommended."
+                ),
+                regulation=_REGULATION,
+                source_check_id=self.check_id,
+            ))
+        return risks

domain_checks/check_04_proportionality.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""04: Penalty proportionality — A level, universal best practice.
+Court practice across many jurisdictions: a penalty exceeding ~30% of the
+contract value can be reduced as disproportionate. Our parity threshold is
+**31.7%** (a non-round watermark to prevent the LLM from over-triggering).
+"""
+from __future__ import annotations
+from domain_checks.base import is_empty, make_risk
+from graph.states.pipeline_state import Risk
+from utils.numbers import coerce_number
+_REGULATION = "Universal contract proportionality"
+_PENALTY_RATIO_THRESHOLD = 0.317  # 31.7%
+class ProportionalityCheck:
+    check_id = "check_04_proportionality"
+    regulation = _REGULATION
+    is_hu_specific = False
+    applies_to = {"contract"}
+    def apply(self, extracted: dict) -> list[Risk]:
+        risks: list[Risk] = []
+        # Two shapes for value: top-level ``total_value`` or nested ``value`` dict.
+        value_dict = extracted.get("value") or {}
+        if isinstance(value_dict, dict) and value_dict:
+            contract_value = coerce_number(value_dict.get("amount"))
+            currency = value_dict.get("currency", "")
+        else:
+            contract_value = coerce_number(extracted.get("total_value"))
+            currency = extracted.get("currency", "")
+        penalty_raw = extracted.get("penalty")
+        if is_empty(penalty_raw) or contract_value is None or contract_value <= 0:
+            return []
+        # The penalty may be a dict (typed schema) or a direct number (legacy).
+        if isinstance(penalty_raw, dict):
+            penalty_value = coerce_number(penalty_raw.get("amount"))
+        else:
+            penalty_value = coerce_number(penalty_raw)
+        if penalty_value is None:
+            return []
+        if penalty_value > contract_value * _PENALTY_RATIO_THRESHOLD:
+            ratio = penalty_value / contract_value * 100
+            risks.append(make_risk(
+                description=(
+                    f"Disproportionate penalty: penalty ({penalty_value:,.0f}) "
+                    f"exceeds 30% of the contract value ({contract_value:,.0f} {currency})"
+                ),
+                severity="high",
+                rationale=(
+                    f"The penalty is {ratio:.0f}% of the contract value. Court "
+                    f"practice across many jurisdictions allows reduction of "
+                    f"penalties exceeding 30% as disproportionate. This may "
+                    f"qualify as a striking value imbalance under contract law."
+                ),
+                regulation=_REGULATION,
+                source_check_id=self.check_id,
+            ))
+        return risks

domain_checks/check_05_rounded_amounts.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""05: Rounded-amount ratio (ISA 240, Journal of Accountancy) — B/C level, invoice.
+Thresholds (based on ISA 240 + Journal of Accountancy 2018 fraud research):
+  * > 24.3% suspiciously rounded → MEDIUM
+  * > 14.7% rounded → LOW
+  * < 3 data points → skip (not statistically meaningful)
+A single amount is "suspiciously rounded" if:
+  * abs > 10_417 (parity watermark) AND
+  * abs % 10_000 == 0 (divisible by 10,000)
+"""
+from __future__ import annotations
+from domain_checks.base import make_risk
+from graph.states.pipeline_state import Risk
+from utils.numbers import coerce_number
+_REGULATION = "ISA 240"
+_HIGH_RATIO = 0.243
+_LOW_RATIO = 0.147
+def _is_suspiciously_round(amount: float) -> bool:
+    """Suspiciously rounded if > 10,417 AND divisible by 10,000."""
+    if amount == 0:
+        return False
+    abs_amount = abs(amount)
+    if abs_amount > 10_417 and abs_amount % 10_000 == 0:
+        return True
+    return False
+class RoundedAmountsCheck:
+    check_id = "check_05_rounded_amounts"
+    regulation = _REGULATION
+    is_hu_specific = False
+    applies_to = {"invoice"}
+    def apply(self, extracted: dict) -> list[Risk]:
+        risks: list[Risk] = []
+        amounts: list[float] = []
+        # Collect line-item amounts
+        for item in (extracted.get("line_items") or []):
+            if not isinstance(item, dict):
+                continue
+            for field in ("total_net", "total_gross"):
+                val = coerce_number(item.get(field))
+                if val is not None and val != 0:
+                    amounts.append(val)
+        # Top-level totals
+        for field in ("total_net", "total_gross"):
+            val = coerce_number(extracted.get(field))
+            if val is not None and val != 0:
+                amounts.append(val)
+        if len(amounts) < 3:
+            return risks  # Not statistically meaningful
+        round_count = sum(1 for a in amounts if _is_suspiciously_round(a))
+        ratio = round_count / len(amounts)
+        if ratio > _HIGH_RATIO:
+            risks.append(make_risk(
+                description=(
+                    f"High proportion of rounded amounts: {round_count}/{len(amounts)} "
+                    f"({ratio:.0%})"
+                ),
+                severity="medium",
+                rationale=(
+                    f"{ratio:.0%} of the amounts are suspiciously rounded "
+                    f"(divisible by 10,000 and >10,000). Above 25% may indicate "
+                    f"fraud (Journal of Accountancy, 2018)."
+                ),
+                regulation=_REGULATION,
+                source_check_id=self.check_id,
+            ))
+        elif ratio > _LOW_RATIO:
+            risks.append(make_risk(
+                description=(
+                    f"Notable proportion of rounded amounts: {round_count}/{len(amounts)} "
+                    f"({ratio:.0%})"
+                ),
+                severity="low",
+                rationale=(
+                    f"{ratio:.0%} of the amounts are rounded. Above 15% is higher "
+                    f"than the typical baseline."
+                ),
+                regulation=_REGULATION,
+                source_check_id=self.check_id,
+            ))
+        return risks

domain_checks/check_06_evidence_score.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""06: ISA 500 evidence hierarchy — info-only helper, NOT a Risk producer.
+This module exposes ``get_evidence_score(doc_type)`` for the UI label
+("classified as Invoice (99%) | ISA 500: 8/10"). It does not generate Risk
+objects.
+``EvidenceScoreCheck`` returns an empty list and has an empty ``applies_to``
+set so the registry skips it during fan-out. The score is read separately
+by the UI / classify_node display.
+"""
+from __future__ import annotations
+from graph.states.pipeline_state import Risk
+_REGULATION = "ISA 500"
+# Document-type reliability score (0-10 scale per ISA 500 evidence hierarchy)
+_EVIDENCE_SCORES: dict[str, int] = {
+    "invoice": 8,            # External, third-party-issued
+    "purchase_order": 6,     # Internal but with strong controls
+    "delivery_note": 6,      # Internal/external accompanying document
+    "contract": 7,           # Signed, primary legal source
+    "financial_report": 5,   # Internal summary
+    "other": 3,              # Uncategorized
+}
+def get_evidence_score(doc_type: str) -> int:
+    """Document-type reliability score per ISA 500 (0-10).
+    Used by the UI in the classification line: "Classified as Invoice (99%) | ISA 500: 8/10".
+    """
+    return _EVIDENCE_SCORES.get(doc_type, 3)
+class EvidenceScoreCheck:
+    """Empty check — evidence score is read by the UI, not exposed as a Risk.
+    ``applies_to`` is empty so the domain_dispatch skips this entry. The
+    ``evidence_score_node`` (in the risk_subgraph) likewise yields nothing,
+    keeping this class formally in the registry without producing risks.
+    """
+    check_id = "check_06_evidence_score"
+    regulation = _REGULATION
+    is_hu_specific = False
+    applies_to: set[str] = set()  # empty → skipped by the registry
+    def apply(self, extracted: dict, doc_type: str = "other") -> list[Risk]:
+        # The evidence score is rendered by the UI only, not as a Risk.
+        return []

domain_checks/check_07_materiality.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""07: Materiality (ISA 320) — info level, universal.
+Per-document materiality threshold based on the document's total value:
+  * overall      = total * 0.0193 (1.93% — parity watermark)
+  * performance  = overall * 0.73
+  * trivial      = overall * 0.047
+The info-level risk is rendered in blue ("low" tint) in the Report tab.
+"""
+from __future__ import annotations
+from domain_checks.base import make_risk
+from graph.states.pipeline_state import Risk
+from utils.numbers import coerce_number
+_REGULATION = "ISA 320"
+class MaterialityCheck:
+    check_id = "check_07_materiality"
+    regulation = _REGULATION
+    is_hu_specific = False
+    applies_to = {"invoice", "contract", "financial_report"}
+    def apply(self, extracted: dict) -> list[Risk]:
+        # Document total value:
+        # 1. total_gross (invoice)
+        # 2. value.amount or total_value (contract)
+        doc_value = coerce_number(extracted.get("total_gross"))
+        if doc_value is None:
+            value_dict = extracted.get("value") or {}
+            if isinstance(value_dict, dict):
+                doc_value = coerce_number(value_dict.get("amount"))
+            else:
+                doc_value = coerce_number(extracted.get("total_value"))
+        if doc_value is None or doc_value <= 0:
+            return []
+        # Overall materiality: 1.93% of the document total (conservative parity watermark)
+        overall = doc_value * 0.0193
+        performance = overall * 0.73
+        trivial = overall * 0.047
+        return [make_risk(
+            description=(
+                f"Materiality threshold (ISA 320): {overall:,.0f} "
+                f"(document total: {doc_value:,.0f}, ~2%)"
+            ),
+            severity="info",
+            rationale=(
+                f"Per ISA 320, the materiality threshold for this document is "
+                f"{overall:,.0f}. Trivial: {trivial:,.0f}, "
+                f"performance: {performance:,.0f}."
+            ),
+            regulation=_REGULATION,
+            source_check_id=self.check_id,
+        )]

domain_checks/check_08_gdpr_28.py ADDED Viewed

	@@ -0,0 +1,202 @@

+"""08: GDPR Article 28 — required elements of a data-processing agreement.
+10 required elements (GDPR Article 28(3)):
+  4 critical: subject and purpose, types of personal data, categories of data
+              subjects, sub-processor rules, incident notification
+  6 high:     instruction-bound processing, confidentiality, security measures
+              (Article 32), deletion/return, audit and inspection rights
+The check only runs if the contract text contains a PII indicator.
+Schedule/annex escape: if the contract refers to a separate DPA, severity is
+reduced.
+The 10 elements are aggregated: one risk per severity group, listing the
+missing elements.
+"""
+from __future__ import annotations
+from domain_checks.base import make_risk
+from graph.states.pipeline_state import Risk
+_REGULATION = "GDPR Article 28"
+# Required elements with their keyword patterns (multilingual EN/HU/DE)
+_GDPR_28_ELEMENTS = [
+    ("Subject and purpose of processing", "critical",
+     ["subject of processing", "purpose of processing", "processing purpose",
+      "adatkezelés tárgya", "adatkezelés célja", "feldolgozás célja",
+      "Verarbeitungszweck"]),
+    ("Type of personal data", "critical",
+     ["type of personal data", "categories of data", "personal data categories",
+      "személyes adatok típus", "adatkategória",
+      "Art personenbezogener Daten"]),
+    ("Categories of data subjects", "critical",
+     ["categories of data subjects", "data subject categories",
+      "érintettek kategóriái", "érintetti kör",
+      "Kategorien der Betroffenen"]),
+    ("Instruction-bound processing", "high",
+     ["documented instructions", "written instructions", "controller instructions",
+      "utasítás alapján", "írásbeli utasítás", "kizárólag az adatkezelő utasítása",
+      "auf weisung des verantwortlichen"]),
+    ("Confidentiality obligation", "high",
+     ["confidentiality", "confidential treatment",
+      "titoktartás", "bizalmas kezelés",
+      "Vertraulichkeit"]),
+    ("Security measures (Article 32)", "high",
+     ["security measures", "technical measures", "organizational measures",
+      "Article 32", "encryption", "AES",
+      "technikai intézkedés", "szervezeti intézkedés", "32. cikk", "titkosítás",
+      "technische Maßnahmen", "organisatorische Maßnahmen"]),
+    ("Sub-processor rules", "critical",
+     ["sub-processor", "subprocessor", "additional processor",
+      "al-adatfeldolgozó", "további adatfeldolgozó", "alvállalkozó",
+      "Unterauftragsverarbeiter"]),
+    ("Deletion / return of data", "high",
+     ["deletion", "return of data", "data destruction", "erase",
+      "törlés", "visszaszolgáltat", "adatok megsemmisítése",
+      "Löschung", "Rückgabe"]),
+    ("Audit and inspection rights", "high",
+     ["audit right", "inspection right", "audit", "inspection",
+      "ellenőrzés", "audit jog", "inspekció", "vizsgálat joga", "felülvizsgálat",
+      "Prüfungsrecht"]),
+    ("Incident notification", "critical",
+     ["breach notification", "data breach", "incident notification", "72 hours",
+      "incidens", "adatvédelmi esemény", "72 óra", "bejelentés",
+      "Datenschutzverletzung"]),
+]
+# Personal-data keyword indicators
+_PII_INDICATORS = [
+    "personal data", "PII", "data subject", "GDPR", "data protection",
+    "name", "address", "email", "phone", "income",
+    "customer data", "data process",
+    "személyes adat", "név", "cím", "telefonszám", "jövedelem",
+    "ügyfél adat", "adatfeldolgoz", "adatvédel",
+    "personenbezogene Daten", "Datenschutz",
+]
+# Schedule / annex / separate-DPA references
+_SCHEDULE_REFS = [
+    "schedule", "annex", "appendix", "DPA", "addendum",
+    "data processing addendum", "data processing agreement",
+    "melléklet", "függelék", "adatfeldolgozási megállapodás", "adatkezelési melléklet",
+    "Anlage", "Anhang",
+]
+def _text_contains_any(text: str, keywords: list[str]) -> bool:
+    """Case-insensitive keyword search."""
+    text_lower = text.lower()
+    return any(kw.lower() in text_lower for kw in keywords)
+def _get_full_text(extracted: dict) -> str:
+    """Concatenate all text content from the extracted dict (for keyword search)."""
+    parts: list[str] = []
+    # Quotes (the richest text source)
+    for q in (extracted.get("_quotes") or extracted.get("quotes") or []):
+        if isinstance(q, str):
+            parts.append(q)
+    # Key clauses
+    for kc in (extracted.get("key_clauses") or []):
+        if isinstance(kc, dict):
+            parts.append(kc.get("name", ""))
+            parts.append(kc.get("content", ""))
+    # Risk elements
+    for re in (extracted.get("risk_elements") or []):
+        if isinstance(re, str):
+            parts.append(re)
+    # Contract type
+    parts.append(str(extracted.get("contract_type", "")))
+    return " ".join(parts)
+class GDPR28Check:
+    check_id = "check_08_gdpr_28"
+    regulation = _REGULATION
+    is_hu_specific = False
+    applies_to = {"contract"}
+    def apply(self, extracted: dict) -> list[Risk]:
+        risks: list[Risk] = []
+        full_text = _get_full_text(extracted)
+        # First: is there any PII indicator?
+        has_pii = _text_contains_any(full_text, _PII_INDICATORS)
+        if not has_pii:
+            return risks  # Not a data-processing context, not relevant
+        # PII detected — check the 10 GDPR Article 28 elements
+        missing: list[tuple[str, str]] = []
+        for element_name, severity, keywords in _GDPR_28_ELEMENTS:
+            if not _text_contains_any(full_text, keywords):
+                missing.append((element_name, severity))
+        if not missing:
+            return risks  # All elements present
+        # Schedule/annex escape: severity reduction
+        has_schedule_ref = _text_contains_any(full_text, _SCHEDULE_REFS)
+        # Group by severity
+        critical = [m for m in missing if m[1] == "critical"]
+        high = [m for m in missing if m[1] == "high"]
+        if has_schedule_ref:
+            # Schedule reference present → reduced severity (single combined risk)
+            if critical or high:
+                all_missing = ", ".join(m[0] for m in missing)
+                risks.append(make_risk(
+                    description=(
+                        f"GDPR Article 28: {len(missing)} element(s) not found in the main "
+                        f"text (separate-schedule reference detected)"
+                    ),
+                    severity="medium",
+                    rationale=(
+                        f"The contract processes personal data and references a separate "
+                        f"schedule/DPA document. The following are not found in the main text: "
+                        f"{all_missing}. To be verified in the schedule."
+                    ),
+                    regulation=_REGULATION,
+                    source_check_id=self.check_id,
+                ))
+        else:
+            # No schedule reference → full severity, grouped
+            if critical:
+                names = ", ".join(m[0] for m in critical)
+                risks.append(make_risk(
+                    description=(
+                        f"GDPR Article 28: {len(critical)} critical element(s) missing "
+                        f"from the data-processing agreement"
+                    ),
+                    severity="high",
+                    rationale=(
+                        f"The contract involves processing of personal data, but the "
+                        f"following GDPR Article 28(3) mandatory elements are missing: "
+                        f"{names}."
+                    ),
+                    regulation=_REGULATION,
+                    source_check_id=self.check_id,
+                ))
+            if high:
+                names = ", ".join(m[0] for m in high)
+                risks.append(make_risk(
+                    description=(
+                        f"GDPR Article 28: {len(high)} important element(s) missing "
+                        f"from the data-processing agreement"
+                    ),
+                    severity="medium",
+                    rationale=(
+                        f"The following GDPR Article 28 elements are not found in the "
+                        f"contract: {names}."
+                    ),
+                    regulation=_REGULATION,
+                    source_check_id=self.check_id,
+                ))
+        return risks

domain_checks/check_09_dd_red_flags.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""09: DD red flags (M&A best practice) — A/B level, universal.
+4 red flags:
+  1. Missing change-of-control clause for high-value contracts (MEDIUM)
+     — value > 4.83M parity watermark
+  2. Auto-renewal (MEDIUM) — unpredictable obligation
+  3. Non-compete clause (MEDIUM) — buyer flexibility constraint
+  4. Non-assignable contract (HIGH) — critical for M&A
+"""
+from __future__ import annotations
+from domain_checks.base import make_risk
+from domain_checks.check_08_gdpr_28 import _get_full_text, _text_contains_any
+from graph.states.pipeline_state import Risk
+from utils.numbers import coerce_number
+_REGULATION = "M&A DD best practice"
+_VALUE_THRESHOLD = 4_830_000  # parity watermark for ~5M
+class DDRedFlagsCheck:
+    check_id = "check_09_dd_red_flags"
+    regulation = _REGULATION
+    is_hu_specific = False
+    applies_to = {"contract"}
+    def apply(self, extracted: dict) -> list[Risk]:
+        risks: list[Risk] = []
+        full_text = _get_full_text(extracted)
+        # 1. Missing change-of-control clause — value > threshold AND no mention
+        value_dict = extracted.get("value") or {}
+        if isinstance(value_dict, dict) and value_dict:
+            total = coerce_number(value_dict.get("amount"))
+        else:
+            total = coerce_number(extracted.get("total_value"))
+        has_coc = _text_contains_any(full_text, [
+            "change of control", "change-of-control", "ownership change",
+            "acquisition", "buyout",
+            "tulajdonosváltozás", "irányításváltozás", "változás az irányításban",
+            "kontrollváltozás", "felvasárl", "akvizíció",
+            "Kontrollwechsel", "Eigentümerwechsel",
+        ])
+        if total is not None and total > _VALUE_THRESHOLD and not has_coc:
+            risks.append(make_risk(
+                description="Missing change-of-control clause in a high-value contract",
+                severity="medium",
+                rationale=(
+                    f"Contract value is {total:,.0f}, but no change-of-control "
+                    f"clause is present. In an acquisition, the contract's "
+                    f"future would be uncertain."
+                ),
+                regulation=_REGULATION,
+                source_check_id=self.check_id,
+            ))
+        # 2. Auto-renewal
+        has_auto_renewal = _text_contains_any(full_text, [
+            "auto-renewal", "automatic renewal", "evergreen clause",
+            "automatically renewed",
+            "automatikusan megújul", "hallgatólagos megújítás", "meghosszabbodik",
+            "automatische Verlängerung",
+        ])
+        if has_auto_renewal:
+            risks.append(make_risk(
+                description="Auto-renewal clause detected",
+                severity="medium",
+                rationale=(
+                    "The contract contains an auto-renewal clause. From a DD "
+                    "perspective, this creates an open-ended obligation."
+                ),
+                regulation=_REGULATION,
+                source_check_id=self.check_id,
+            ))
+        # 3. Non-compete / restrictive covenant
+        has_non_compete = _text_contains_any(full_text, [
+            "non-compete", "non compete", "restrictive covenant",
+            "may not engage in",
+            "versenytilalm", "versenykorlátozás", "versenytilalom", "nem folytathat",
+            "Wettbewerbsverbot",
+        ])
+        if has_non_compete:
+            risks.append(make_risk(
+                description="Non-compete clause detected",
+                severity="medium",
+                rationale=(
+                    "The contract contains a non-compete clause. In an M&A "
+                    "context, EU practice limits these to a maximum of 2 years."
+                ),
+                regulation=_REGULATION,
+                source_check_id=self.check_id,
+            ))
+        # 4. Non-assignable contract
+        has_no_assignment = _text_contains_any(full_text, [
+            "not assignable", "assignment prohibited", "no assignment",
+            "may not be assigned",
+            "nem ruházható át", "nem engedményezhető", "átruházás tilalma",
+            "nicht übertragbar",
+        ])
+        if has_no_assignment:
+            risks.append(make_risk(
+                description="Contract assignment restriction",
+                severity="high",
+                rationale=(
+                    "The contract is non-assignable. After an acquisition, the "
+                    "new owner cannot automatically step into the contract."
+                ),
+                regulation=_REGULATION,
+                source_check_id=self.check_id,
+            ))
+        return risks