Nándorfi Vince commited on
Commit
7ff7119
·
0 Parent(s):

Initial paperhawk push to HF Space (LFS for binaries)

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +36 -0
  2. .env.example +58 -0
  3. .gitattributes +7 -0
  4. .gitignore +57 -0
  5. ARCHITECTURE.md +214 -0
  6. CLAUDE.md +191 -0
  7. Dockerfile +48 -0
  8. LICENSE +21 -0
  9. Makefile +66 -0
  10. NOTICE.md +34 -0
  11. README.md +168 -0
  12. app/__init__.py +0 -0
  13. app/async_runtime.py +126 -0
  14. app/main.py +931 -0
  15. app/streaming.py +97 -0
  16. app/tabs/__init__.py +0 -0
  17. config.py +129 -0
  18. data/sanctions_snapshot.json +114 -0
  19. docker-compose.yml +52 -0
  20. docs/HF_SPACE_DEFAULT_GETTING_STARTED.md +193 -0
  21. docs/SUBMISSION.md +170 -0
  22. docs/hf-space-deployment.md +124 -0
  23. docs/qwen-vllm-deployment.md +68 -0
  24. docs/slides/01_cover.png +3 -0
  25. docs/slides/PaperHawk_Slides.pdf +3 -0
  26. docs/slides/PaperHawk_Slides.pptx +3 -0
  27. docs/slides/README.md +104 -0
  28. docs/slides/png/slide_01.png +3 -0
  29. docs/slides/png/slide_02.png +3 -0
  30. docs/slides/png/slide_03.png +3 -0
  31. docs/slides/png/slide_04.png +3 -0
  32. docs/slides/png/slide_05.png +3 -0
  33. docs/slides/png/slide_06.png +3 -0
  34. docs/slides/png/slide_07.png +3 -0
  35. docs/slides/png/slide_08.png +3 -0
  36. docs/slides/png/slide_09.png +3 -0
  37. docs/slides/png/slide_10.png +3 -0
  38. docs/slides/slides.html +897 -0
  39. docs/social-posts/post-1-build-window-opens.md +165 -0
  40. domain_checks/__init__.py +140 -0
  41. domain_checks/base.py +60 -0
  42. domain_checks/check_01_invoice_mandatory.py +123 -0
  43. domain_checks/check_02_tax_cdv.py +108 -0
  44. domain_checks/check_03_contract_completeness.py +85 -0
  45. domain_checks/check_04_proportionality.py +68 -0
  46. domain_checks/check_05_rounded_amounts.py +96 -0
  47. domain_checks/check_06_evidence_score.py +53 -0
  48. domain_checks/check_07_materiality.py +60 -0
  49. domain_checks/check_08_gdpr_28.py +202 -0
  50. domain_checks/check_09_dd_red_flags.py +118 -0
.dockerignore ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git
3
+ .gitignore
4
+
5
+ # Env
6
+ .env
7
+ .env.*
8
+ !.env.example
9
+
10
+ # Python
11
+ __pycache__
12
+ *.pyc
13
+ *.pyo
14
+ .pytest_cache
15
+ .ruff_cache
16
+ .venv
17
+ venv
18
+
19
+ # IDE
20
+ .vscode
21
+ .idea
22
+ .DS_Store
23
+
24
+ # Perzisztens runtime adat (mount-oljuk, ne image-be sütjük)
25
+ chroma_db/
26
+ data/checkpoints.sqlite*
27
+
28
+ # Tervek és dokumentáció (image-be felesleges)
29
+ tervek/
30
+ dokumentacio/
31
+
32
+ # Test results
33
+ test_results/
34
+
35
+ # Node
36
+ node_modules/
.env.example ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # LLM Provider
3
+ # =============================================================================
4
+ # Profile: vllm (default, AMD MI300X) | ollama (local fallback) | dummy (CI/eval)
5
+ LLM_PROFILE=vllm
6
+
7
+ # vLLM (AMD Developer Cloud MI300X) — DEFAULT
8
+ # Point this at the public URL of your AMD MI300X vLLM endpoint.
9
+ # Local dev: http://localhost:8000/v1
10
+ VLLM_BASE_URL=http://localhost:8000/v1
11
+ VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct
12
+ VLLM_API_KEY=
13
+ # VLLM_API_KEY left blank = client sends "EMPTY" (vLLM no-auth mode)
14
+ # In production set a real key and start vLLM with --api-key <key>
15
+ VLLM_TEMPERATURE=0.0
16
+ VLLM_MAX_TOKENS=4096
17
+
18
+ # Ollama (optional local fallback, only when LLM_PROFILE=ollama)
19
+ OLLAMA_BASE_URL=http://localhost:11434
20
+ OLLAMA_MODEL=qwen2.5:7b-instruct
21
+
22
+ # =============================================================================
23
+ # Embedding (sentence-transformers / Hugging Face, runs locally on CPU)
24
+ # =============================================================================
25
+ # Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual incl. EN/HU/DE/FR/...)
26
+ # Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only)
27
+ EMBEDDING_MODEL=BAAI/bge-m3
28
+
29
+ # =============================================================================
30
+ # Storage
31
+ # =============================================================================
32
+ CHROMA_PATH=./chroma_db
33
+ CHROMA_COLLECTION=documents
34
+ CHECKPOINT_DB_PATH=./data/checkpoints.sqlite
35
+
36
+ # =============================================================================
37
+ # Pipeline tuning
38
+ # =============================================================================
39
+ CHUNK_MAX_CHARS=15000
40
+ CHUNK_OVERLAP_CHARS=500
41
+ SINGLE_CALL_THRESHOLD=30000
42
+
43
+ # Agentic loop guards
44
+ CHAT_MAX_ITERATIONS=10
45
+ VALIDATOR_MAX_RETRIES=2
46
+ DD_SUPERVISOR_MAX_ITERATIONS=4
47
+
48
+ # =============================================================================
49
+ # LangSmith observability (optional)
50
+ # =============================================================================
51
+ # LANGCHAIN_TRACING_V2=true
52
+ # LANGCHAIN_API_KEY=lsv2_pt_XXXXXXXXXXXXXXXXXXXXXXX
53
+ # LANGCHAIN_PROJECT=document-intelligence-amd
54
+
55
+ # =============================================================================
56
+ # Streamlit
57
+ # =============================================================================
58
+ STREAMLIT_PORT=8501
.gitattributes ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ *.png filter=lfs diff=lfs merge=lfs -text
2
+ *.pdf filter=lfs diff=lfs merge=lfs -text
3
+ *.pptx filter=lfs diff=lfs merge=lfs -text
4
+ *.docx filter=lfs diff=lfs merge=lfs -text
5
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
6
+ *.jpg filter=lfs diff=lfs merge=lfs -text
7
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+
8
+ # Virtuális környezet (nem hordozható)
9
+ .venv/
10
+ venv/
11
+ env/
12
+ ENV/
13
+
14
+ # Disztribúció
15
+ build/
16
+ dist/
17
+ *.egg-info/
18
+ *.egg
19
+ .eggs/
20
+
21
+ # Tesztelés
22
+ .pytest_cache/
23
+ .coverage
24
+ .coverage.*
25
+ htmlcov/
26
+ .tox/
27
+ .nox/
28
+
29
+ # Környezeti változók
30
+ .env
31
+ .env.local
32
+ .env.*.local
33
+ !.env.example
34
+
35
+ # Perzisztens runtime adat (auto-generálódik)
36
+ chroma_db/
37
+ data/checkpoints.sqlite
38
+ data/checkpoints.sqlite-*
39
+ *.log
40
+
41
+ # HuggingFace / sentence-transformers cache
42
+ .cache/
43
+
44
+ # IDE / OS
45
+ .vscode/
46
+ .idea/
47
+ *.swp
48
+ *.swo
49
+ *~
50
+ .DS_Store
51
+ Thumbs.db
52
+
53
+ # Node (defenzív)
54
+ node_modules/
55
+
56
+ # Test results
57
+ test_results/
ARCHITECTURE.md ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Architecture
2
+
3
+ LangGraph-native Document Intelligence platform. This document goes beyond
4
+ the README — it covers design decisions, the subgraph hierarchy, state
5
+ design, and the anti-hallucination stack.
6
+
7
+ ## 1. High-level architecture
8
+
9
+ ### 4 compiled LangGraph artifacts
10
+
11
+ The system is organized around four graphs sharing a common `AsyncSqliteSaver`
12
+ checkpointer:
13
+
14
+ | # | Graph | Entry point | When |
15
+ |---|-------|-------------|------|
16
+ | 1 | `pipeline_graph` | `app.run_pipeline(files)` | on upload |
17
+ | 2 | `chat_graph` | `app.ask(question)` | chat tab |
18
+ | 3 | `dd_graph` | `app.dd_report(thread_id)` | DD tab button |
19
+ | 4 | `package_insights_graph` | `app.package_insights(thread_id, pkg_type)` | demo button |
20
+
21
+ Chat tools read from the persisted pipeline state — they do not re-read
22
+ files. They access the in-memory `ChatToolContext`, which holds the
23
+ HybridStore and a documents snapshot.
24
+
25
+ ### Pipeline graph topology
26
+
27
+ ```
28
+ START
29
+ → start_timer
30
+ → dispatch_ingest (Send API: per-doc fan-out)
31
+ → ingest_per_doc (PDF/DOCX/PNG/TXT loader subgraph)
32
+ → ingest_join (fan-in)
33
+ → dispatch_classify (Send API)
34
+ → classify_per_doc (regex/keyword classifier in dummy mode;
35
+ vision-aware in vLLM mode)
36
+ → classify_join
37
+ → dispatch_extract (Send API)
38
+ → extract_per_doc (regex extractor in dummy mode +
39
+ flatten_universal; structured LLM in vLLM mode)
40
+ → extract_join
41
+ → quote_validator (anti-hallucination layer #7)
42
+ → dispatch_rag_index (Send API)
43
+ → rag_index_per_doc (chunker + batched embed + Chroma+BM25 upsert)
44
+ → rag_join
45
+ → compare_node (three-way matching, sync)
46
+ → risk_subgraph (basic + 14 domain × Send + plausibility +
47
+ LLM ensemble + duplicate)
48
+ → finish_timer
49
+ → report_node (10-section JSON structure)
50
+ → END
51
+ ```
52
+
53
+ The per-doc Send fan-out yields a 5–8× speedup in a CPU-bound environment.
54
+
55
+ ### Risk subgraph topology
56
+
57
+ ```
58
+ risk_subgraph (input: PipelineState):
59
+ → basic_risk_dispatch (Send: per-doc basic risk)
60
+ → basic_risk / noop_basic
61
+ → domain_dispatch_node (Send: per-doc × per-applicable-check, ~30 parallel)
62
+ → apply_domain_check
63
+ → [if llm provided] llm_risk_dispatch (Send: per-doc LLM risk + 3-filter chain)
64
+ → llm_risk_per_doc / noop_llm
65
+ → plausibility_dispatch (Send: per-doc plausibility)
66
+ → plausibility / noop_plaus
67
+ → evidence_score_node (per-doc info)
68
+ → duplicate_detector_node (package-level, sync, ISA 240)
69
+ END
70
+ ```
71
+
72
+ The full anti-hallucination 5+1 layer chain runs inside `llm_risk_per_doc`:
73
+ `llm_risk → filter_llm_risks → drop_business_normal → drop_repeats`.
74
+
75
+ ### DD multi-agent supervisor graph
76
+
77
+ ```
78
+ dd_graph:
79
+ START
80
+ → contract_filter_node (keep only contract-type docs)
81
+ → per_contract_summary_node (Python-deterministic per-contract DDContractSummary)
82
+ → supervisor_node (LLM router or heuristic; Command(goto=...))
83
+ ├─ → audit_specialist (pricing anomalies, overcharging)
84
+ ├─ → legal_specialist (red flags, change-of-control, non-compete)
85
+ ├─ → compliance_specialist (GDPR, AML, data protection)
86
+ └─ → financial_specialist (monthly obligations, expirations)
87
+ ↺ (loops back to supervisor up to dd_supervisor_max_iterations)
88
+ → dd_synthesizer (one LLM call: executive_summary +
89
+ top_red_flags + per-contract risk_level rating)
90
+ END
91
+ ```
92
+
93
+ ### Package insights graph
94
+
95
+ A simple 1-LLM-call graph: ingests the full document package and produces
96
+ cross-doc findings using a perspective-driven prompt
97
+ (`audit | dd | compliance | general`).
98
+
99
+ ## 2. State design
100
+
101
+ ### `PipelineState` (TypedDict)
102
+
103
+ Read-mostly fields with **reducer-driven Send fan-in**:
104
+
105
+ - `files: list[tuple[str, bytes]]` — raw upload
106
+ - `documents: Annotated[list[ProcessedDocument], merge_doc_results]` —
107
+ per-doc field-level merge keyed by `file_name`
108
+ - `risks: Annotated[list[Risk], merge_risks]` — dedup by description
109
+ - `comparison: ComparisonReport | None`
110
+ - `report: dict`
111
+ - `package_insights: PackageInsights | None`
112
+ - `dd_report: DDPortfolioReport | None`
113
+ - `started_at`, `finished_at`, `processing_seconds`
114
+ - `progress_events: Annotated[list[str], add]` — Streamlit progress feed
115
+
116
+ ### `Risk` (Pydantic)
117
+
118
+ The single risk type used everywhere:
119
+
120
+ - `description: str`
121
+ - `severity: str` (`"high" | "medium" | "low" | "info"`)
122
+ - `rationale: str`
123
+ - `kind: str` (`"validation" | "domain_rule" | "plausibility" | "llm_analysis" | "cross_check"`)
124
+ - `regulation: str | None` (e.g. `"HU VAT Act §169"`, `"ISA 240"`, `"GDPR Article 28"`)
125
+ - `affected_document: str | None`
126
+ - `source_check_id: str | None`
127
+
128
+ ## 3. Anti-hallucination stack (5+1 layers)
129
+
130
+ 1. **`temperature=0`** — every LLM call is deterministic-ish.
131
+ 2. **`_quotes` schema field** — verbatim source citations.
132
+ 3. **`_confidence` schema field** — per-field reliability (high|medium|low).
133
+ 4. **`validate_plausibility()`** — Python deterministic plausibility checks
134
+ (negative VAT, non-standard rates, future dates, etc.).
135
+ 5. **3-filter LLM risk pipeline** —
136
+ `filter_llm_risks` (formal: ≥5 words, ≥2 domain terms, ≥1 concrete fact)
137
+ → `drop_business_normal_risks` (semantic: cross-check vs extracted_data,
138
+ 6 known false-positive patterns)
139
+ → `drop_repeats_of_basic` (textual dedup vs basic risks, 70% threshold).
140
+ 6. **Quote validator** — final cross-check that every `_quotes` entry
141
+ actually appears in the source `full_text` (whitespace + diacritic +
142
+ case normalized). If invalid, downgrades confidence.
143
+
144
+ ## 4. Domain checks (14 deterministic rules)
145
+
146
+ | # | check_id | Regulation | HU-specific? | Applies to |
147
+ |---|----------|-----------|--------------|------------|
148
+ | 01 | `check_01_invoice_mandatory` | HU VAT Act §169 | yes | invoice |
149
+ | 02 | `check_02_tax_cdv` | HU Tax Procedure Act §22 mod-11 | yes | invoice + contract + ... |
150
+ | 03 | `check_03_contract_completeness` | Universal contract completeness | no | contract |
151
+ | 04 | `check_04_proportionality` | Universal contract proportionality (>31.7%) | no | contract |
152
+ | 05 | `check_05_rounded_amounts` | ISA 240 (Journal of Accountancy 2018) | no | invoice |
153
+ | 06 | `check_06_evidence_score` | ISA 500 | no | (separate entry, info-only) |
154
+ | 07 | `check_07_materiality` | ISA 320 | no | invoice + contract + financial_report |
155
+ | 08 | `check_08_gdpr_28` | GDPR Article 28 | no (EU) | contract |
156
+ | 09 | `check_09_dd_red_flags` | M&A DD best practice | no | contract |
157
+ | 10 | `check_10_incoterms` | Incoterms 2020 | no | contract |
158
+ | 11 | `check_11_ifrs_har` | IFRS / national GAAP comparison | no | financial_report |
159
+ | 12 | `check_12_duplicate_invoice` | ISA 240 (duplicate invoice) | no | (separate entry, package-level) |
160
+ | 13 | `check_13_aml_sanctions` | AML / Sanctions screening | no | invoice + contract + ... |
161
+ | 14 | `check_14_contract_dates` | Contract date best practice | no | contract |
162
+
163
+ The dispatch in `domain_dispatch_node` skips `check_06` and `check_12` (they
164
+ have separate entry points) and filters `is_hu_specific=True` out for non-HU
165
+ documents.
166
+
167
+ ## 5. Provider system
168
+
169
+ Three providers via `configurable_alternatives`:
170
+
171
+ - **`vllm`** — `ChatOpenAI` with `base_url=VLLM_BASE_URL` pointing at the
172
+ AMD MI300X vLLM endpoint. Production default.
173
+ - **`ollama`** — `ChatOllama` with a local Ollama daemon (Qwen 2.5 7B
174
+ Instruct). Development fallback.
175
+ - **`dummy`** — `DummyChatModel` (deterministic stub, no network).
176
+ CI / eval / load.
177
+
178
+ Provider selection is **runtime-switchable** without restart:
179
+
180
+ ```python
181
+ graph.invoke(state, config={"configurable": {"llm_profile": "dummy"}})
182
+ ```
183
+
184
+ ## 6. Embedding
185
+
186
+ `BAAI/bge-m3` (2.27 GB, 1024 dim, multilingual) by default.
187
+ Sentence-transformers loads it on first call via `@lru_cache`.
188
+ Pre-downloaded at Docker build time so runtime has no network call.
189
+
190
+ ## 7. Hybrid retrieval (Chroma + BM25)
191
+
192
+ `store/hybrid_store.py` runs vector search and BM25 in parallel and merges
193
+ with Reciprocal Rank Fusion (RRF). The chunker uses natural break points
194
+ (paragraph + sentence boundaries), tuned to ~15K-char chunks with 500-char
195
+ overlap.
196
+
197
+ ## 8. Async-first runtime
198
+
199
+ LangGraph 0.6 is async-first. The Streamlit app runs the entire async layer
200
+ on a long-lived background event loop (`app/async_runtime.py`'s `AsyncRuntime`
201
+ singleton). This keeps the ChromaDB connection, the Anthropic / OpenAI HTTP
202
+ session, and the `AsyncSqliteSaver` SQLite pool persistent across user
203
+ interactions — they do not rebuild per request.
204
+
205
+ ## 9. Multilingual support
206
+
207
+ The codebase is English-first but multilingual-tolerant:
208
+
209
+ - The classifier matches HU/EN/DE keyword patterns.
210
+ - Risk filters tolerate HU/DE business terms.
211
+ - The OCR layer keeps `eng + hun + deu` as Tesseract languages.
212
+ - Demo data may include mixed-language documents.
213
+
214
+ The output (UI, exec summary, DOCX report) is **always English**.
CLAUDE.md ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CLAUDE.md — paperhawk
2
+
3
+ Project-level instructions for Claude Code working in this repository. Any
4
+ session that starts in this folder reads this file automatically.
5
+
6
+ **Last updated:** 2026-05-03
7
+
8
+ ---
9
+
10
+ ## 1. Project overview
11
+
12
+ A LangGraph-native, multi-agent Document Intelligence platform built for the
13
+ **AMD Developer Hackathon × lablab.ai** (May 2026). MIT-licensed, English-only
14
+ codebase, designed to run on **AMD Instinct MI300X** GPUs via the vLLM runtime
15
+ serving **Qwen 2.5 Instruct** open-source models.
16
+
17
+ The system processes business document packages (invoices, contracts, delivery
18
+ notes, purchase orders, financial reports) end-to-end:
19
+
20
+ 1. **Ingest** — PDF / DOCX / image with vision-first scanned fallback
21
+ 2. **Classify** — 6-way doc-type classifier (LLM with structured output)
22
+ 3. **Extract** — typed Pydantic schema extraction with anti-hallucination
23
+ 4. **Cross-reference** — three-way matching (invoice + delivery + PO)
24
+ 5. **Risk analysis** — basic + 14 domain rules + LLM ensemble + 3 filters
25
+ 6. **Report** — DOCX export, JSON API, executive summary
26
+
27
+ The chat layer is a 5-tool agentic ReAct loop with explicit `[Source: filename]`
28
+ citations and an anti-hallucination validator.
29
+
30
+ ---
31
+
32
+ ## 2. Workflow rules
33
+
34
+ ### Language
35
+
36
+ - **English everywhere** — code, comments, docstrings, prompts, UI, error
37
+ messages, log lines.
38
+ - **Multilingual fallback** — for legacy interop and the multilingual demo:
39
+ some loaders, classifiers, and regex filters accept HU/DE input. EN is
40
+ always the primary path.
41
+ - Two HU reference documents are kept under `docs/` with `_HU.md` suffix
42
+ (`Teljes-rendszer-attekintes-langgraph_HU.md`, `MUKODESI_LEIRAS_HU.md`).
43
+ These are read-only references; do not edit.
44
+
45
+ ### License + IP
46
+
47
+ - **MIT licensed** — see `LICENSE`.
48
+ - `NOTICE.md` is a non-binding author request (no legal force).
49
+ - Never paste proprietary code from outside this repo.
50
+
51
+ ### Provider
52
+
53
+ - The default chat provider is `vllm` (Qwen 2.5 14B Instruct on AMD MI300X
54
+ through the OpenAI-compatible vLLM endpoint).
55
+ - `ollama` is a local dev fallback (Qwen 2.5 7B Instruct on a laptop GPU/CPU).
56
+ - `dummy` is the deterministic CI / eval / smoke provider (no network, no LLM).
57
+ - Never re-introduce a Claude / Anthropic provider here — that path is
58
+ out of scope for the AMD edition.
59
+
60
+ ### Git
61
+
62
+ - The AI **NEVER** runs git operations on `main` (no commit, no push, no
63
+ cherry-pick, no merge). The user runs all `main`-branch git operations.
64
+ - The AI MAY commit on non-`main` feature branches when explicitly asked.
65
+ - The AI **NEVER** pushes — push is the user's task only.
66
+
67
+ ### Build hygiene
68
+
69
+ - Do not commit `.env`, `chroma_db/`, `data/checkpoints.sqlite`, `__pycache__/`.
70
+ - Magyar / English commit messages are both fine; English preferred for the
71
+ public history of an MIT repo.
72
+
73
+ ### Anti-hallucination is sacred
74
+
75
+ - The 5+1 layers (`temperature=0`, `_quotes`, `_confidence`, plausibility
76
+ filters, LLM-risk 3 filters, quote validator) are not optional. Every
77
+ LLM-generated piece of data is cross-checked.
78
+ - Source citations in the chat use the canonical `[Source: filename]` format
79
+ (validator enforces this).
80
+
81
+ ---
82
+
83
+ ## 3. Repo layout
84
+
85
+ ```
86
+ paperhawk/
87
+ ├── app/ # Streamlit UI (5 tabs) + async runtime
88
+ ├── config.py # Pydantic Settings (env-bound)
89
+ ├── domain_checks/ # 14 deterministic rules + base + registry
90
+ ├── eval/ # Eval harness (questions + run_eval)
91
+ ├── graph/ # 4 compiled graphs (pipeline / chat / dd /
92
+ │ # package_insights) + 6 states + checkpointer
93
+ ├── ingest/ # PDF / DOCX / image / OCR / tables / txt
94
+ ├── infra/vllm/ # AMD MI300X deployment (Dockerfile + serve.sh + README)
95
+ ├── load/ # Load benchmarks
96
+ ├── nodes/ # Per-stage node functions:
97
+ │ ├── chat/ # chat agent + 5 tools
98
+ │ ├── dd/ # DD specialists + supervisor + synthesizer
99
+ │ ├── extract/ # extract + dummy + quote validator
100
+ │ ├── ingest/ # ingest helpers
101
+ │ ├── pipeline/ # classify / compare / duplicate / report / docx
102
+ │ └── risk/ # basic / domain dispatch / LLM risk + 3 filters
103
+ ├── providers/ # vLLM / Ollama / Dummy LLM providers + embeddings
104
+ ├── schemas/ # 6 JSON schemas + pydantic_models + flatten_universal
105
+ ├── store/ # ChromaDB + BM25 hybrid + chunking
106
+ ├── subgraphs/ # 6 reusable subgraphs (Send API parallelism)
107
+ ├── tests/ # unit + integration + e2e_api + e2e_screenshot
108
+ ├── tools/ # 5 chat tools + ChatToolContext
109
+ ├── utils/ # dates + numbers + docx_export
110
+ └── validation/ # anti-halluc layers (5+1)
111
+ ```
112
+
113
+ ---
114
+
115
+ ## 4. Hot files
116
+
117
+ When fixing bugs or adding features, these are the most-edited files:
118
+
119
+ - `graph/states/pipeline_state.py` — `Risk`, `Classification`, `ExtractedData`,
120
+ `merge_risks`, `merge_doc_results` reducers.
121
+ - `domain_checks/__init__.py` — the 14-check registry.
122
+ - `domain_checks/check_*_*.py` — individual deterministic rules.
123
+ - `nodes/risk/_prompts.py` — `RISK_SYSTEM_PROMPT` (anti-halluc 9+6+4 examples).
124
+ - `nodes/chat/_prompts.py` — `AGENTIC_SYSTEM_PROMPT` (17 rules).
125
+ - `validation/llm_risk_filters.py` — 3-filter chain.
126
+ - `app/main.py` — Streamlit UI (5 tabs).
127
+
128
+ ---
129
+
130
+ ## 5. Testing
131
+
132
+ ```bash
133
+ # Fast: unit + integration (dummy LLM)
134
+ LLM_PROFILE=dummy pytest tests/unit tests/integration -x --tb=short
135
+
136
+ # Slow: end-to-end with real LLM
137
+ LLM_PROFILE=vllm pytest tests/e2e_api -m e2e -x --tb=short
138
+
139
+ # UI Playwright (real LLM, slow)
140
+ LLM_PROFILE=vllm pytest tests/e2e_screenshot -x --tb=short
141
+ ```
142
+
143
+ `LLM_PROFILE=dummy` works without any external service. `LLM_PROFILE=vllm`
144
+ requires `VLLM_BASE_URL` to point at a running vLLM endpoint.
145
+
146
+ ---
147
+
148
+ ## 6. Deploy targets
149
+
150
+ - **Hugging Face Space** — Streamlit Space under
151
+ `huggingface.co/spaces/lablab-ai-amd-developer-hackathon/<your-space>`.
152
+ See `docs/hf-space-deployment.md`.
153
+ - **AMD Developer Cloud MI300X** — vLLM serving Qwen 2.5 14B (or 32B).
154
+ See `docs/qwen-vllm-deployment.md` and `infra/vllm/README.md`.
155
+
156
+ ---
157
+
158
+ ## 7. Pitch positioning
159
+
160
+ When writing project descriptions, the README, video, or social posts:
161
+
162
+ - **Beyond simple RAG** — multi-agent platform with 14 deterministic checks
163
+ + an LLM ensemble. The 5-tool chat is *agentic*, not retrieval-only.
164
+ - **Track 1** (AI Agents & Agentic Workflows) is the target track.
165
+ - **Cross-track**: Build in Public is in scope (AMD GPU prize).
166
+ - **HF Special Prize** is in scope (Reachy Mini robot — like-vote driven).
167
+
168
+ ---
169
+
170
+ ## 8. The Glossary (HU → EN field names)
171
+
172
+ The full per-field rename map is in
173
+ `pwc-ai-verseny/document-intelligence-agentic-langgraph-amd/ATIRASI_TERV.md`
174
+ sections **32 (field names) and 33 (severity literals)**. Keep that file
175
+ open when editing extraction schemas, domain checks, or anything that
176
+ touches the `Risk` Pydantic.
177
+
178
+ ---
179
+
180
+ ## 9. Common pitfalls
181
+
182
+ - **Severity literals**: always `"high" | "medium" | "low" | "info"` —
183
+ never `"magas" | "kozepes" | "alacsony"`. Many `_normalize_severity()`
184
+ helpers map HU → EN if legacy data sneaks in, but new code emits EN.
185
+ - **Risk fields**: `description`, `severity`, `rationale`, `kind`,
186
+ `regulation`, `affected_document`, `source_check_id`. NOT
187
+ `leiras / sulyossag / indoklas / tipus / jogszabaly / erinto_dokumentum / forras_check_id`.
188
+ - **Doc types**: `"invoice" | "delivery_note" | "purchase_order" | "contract" | "financial_report" | "other"`.
189
+ - **`_quotes` alias** (not `_idezetek`) — both in JSON schemas and Pydantic models.
190
+ - **Multilingual fallback**: read-only in classifiers and regex filters;
191
+ never emit HU in new code.
Dockerfile ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1.6
2
+ FROM python:3.12-slim AS base
3
+
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ PYTHONDONTWRITEBYTECODE=1 \
6
+ PIP_NO_CACHE_DIR=1 \
7
+ PIP_DISABLE_PIP_VERSION_CHECK=1
8
+
9
+ # OS-level dependencies:
10
+ # - tesseract-ocr (eng + hun + deu): scanned PDF OCR fallback (multilingual demo support)
11
+ # - poppler-utils: pdfplumber table extraction
12
+ # - libmupdf-dev: PyMuPDF native lib
13
+ # - curl: healthcheck
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ tesseract-ocr \
16
+ tesseract-ocr-eng \
17
+ tesseract-ocr-hun \
18
+ tesseract-ocr-deu \
19
+ poppler-utils \
20
+ libmupdf-dev \
21
+ curl \
22
+ && rm -rf /var/lib/apt/lists/*
23
+
24
+ WORKDIR /app
25
+
26
+ # Python deps — CPU-only torch first (smaller image), then the rest
27
+ COPY requirements.txt .
28
+ RUN pip install --upgrade pip \
29
+ && pip install --index-url https://download.pytorch.org/whl/cpu torch \
30
+ && pip install -r requirements.txt
31
+
32
+ # Sentence-transformers model pre-download (no runtime network call).
33
+ # BAAI/bge-m3 = 2.27 GB, 1024 dim, multilingual (EN/HU/DE/FR/...).
34
+ RUN python -c "from sentence_transformers import SentenceTransformer; \
35
+ SentenceTransformer('BAAI/bge-m3')"
36
+
37
+ # Source code
38
+ COPY . .
39
+
40
+ # Streamlit healthcheck — port 7860 for HF Space deployment (HF expects this)
41
+ EXPOSE 7860
42
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
43
+ CMD curl -f http://localhost:7860/_stcore/health || exit 1
44
+
45
+ CMD ["streamlit", "run", "app/main.py", \
46
+ "--server.address=0.0.0.0", \
47
+ "--server.port=7860", \
48
+ "--server.headless=true"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Nándorfi Vince, Vitai Tamás, Murcsik Gábor
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Makefile ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: install run run-local stop test test-fast eval load samples lint clean help
2
+
3
+ PYTHON := python3.12
4
+ VENV := .venv
5
+ ACTIVATE := . $(VENV)/bin/activate
6
+
7
+ help: ## Megjeleníti a parancsokat
8
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-15s\033[0m %s\n", $$1, $$2}'
9
+
10
+ install: ## Lokális venv + függőségek
11
+ $(PYTHON) -m venv $(VENV)
12
+ $(ACTIVATE) && pip install --upgrade pip
13
+ $(ACTIVATE) && pip install --index-url https://download.pytorch.org/whl/cpu torch
14
+ $(ACTIVATE) && pip install -r requirements.txt
15
+
16
+ run: ## Docker compose: app indítás (Claude default)
17
+ docker compose up -d --build langgraph-app
18
+ @echo "App: http://localhost:8501"
19
+
20
+ run-local: ## Docker compose: app + Ollama (lokális LLM)
21
+ docker compose --profile ollama up -d --build
22
+ @echo "App: http://localhost:8501 | Ollama: http://localhost:11434"
23
+ @echo "Első indítás: docker compose exec ollama ollama pull llama3.1:8b"
24
+
25
+ stop: ## Docker compose leállítás
26
+ docker compose down
27
+
28
+ dev: ## Streamlit lokálisan (.venv-et feltételez)
29
+ $(ACTIVATE) && streamlit run app/main.py
30
+
31
+ test: ## Pytest teljes (lassúak nélkül)
32
+ $(ACTIVATE) && pytest tests/ -m "not slow" -v
33
+
34
+ test-fast: ## Smoke + unit tesztek dummy LLM-mel (< 30s)
35
+ $(ACTIVATE) && pytest tests/unit/ tests/integration/ -m "not slow" -q
36
+
37
+ test-e2e: ## E2E forgatókönyvek (10 db, dummy LLM)
38
+ $(ACTIVATE) && pytest tests/e2e/ -v
39
+
40
+ eval: ## 14 chat kérdés + 10 forgatókönyv eval
41
+ $(ACTIVATE) && python eval/run_eval.py --llm dummy
42
+
43
+ eval-claude: ## Eval valódi Claude LLM-mel (lassú, API-költség)
44
+ $(ACTIVATE) && python eval/run_eval.py --llm claude
45
+
46
+ load: ## Load test: 100 chat query async-gather (dummy)
47
+ $(ACTIVATE) && python load/benchmark.py --n 100
48
+
49
+ load-parallel: ## Pipeline parallel test: 20 doksi egyszerre
50
+ $(ACTIVATE) && python load/parallel_pipeline_bench.py --n 20
51
+
52
+ samples: ## 75 minta fájl (PDF+DOCX+PNG) generálása
53
+ $(ACTIVATE) && python test_data/generate_samples.py
54
+
55
+ lint: ## Ruff lint + formatter
56
+ $(ACTIVATE) && ruff check .
57
+ $(ACTIVATE) && ruff format --check .
58
+
59
+ format: ## Ruff auto-format
60
+ $(ACTIVATE) && ruff format .
61
+
62
+ clean: ## Cache + perzisztens runtime adat törlés
63
+ find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
64
+ find . -type d -name .pytest_cache -exec rm -rf {} + 2>/dev/null || true
65
+ find . -type d -name .ruff_cache -exec rm -rf {} + 2>/dev/null || true
66
+ rm -rf chroma_db/ data/checkpoints.sqlite*
NOTICE.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NOTICE
2
+
3
+ This project is released under the **MIT License** (see `LICENSE`).
4
+
5
+ ## Author intent (non-binding request)
6
+
7
+ The codebase originated from a research project conducted in Hungarian
8
+ under a proprietary license. We have re-licensed it under MIT for the
9
+ **AMD Developer Hackathon × lablab.ai** (May 2026).
10
+
11
+ The authors kindly request that:
12
+
13
+ 1. **AI/LLM training** — if you use this codebase or its derivatives in
14
+ training data for AI models, please credit the original authors
15
+ (Nándorfi Vince, Vitai Tamás, Murcsik Gábor) and link to the
16
+ original repository.
17
+
18
+ 2. **Re-translation / re-implementation** — if you produce derivative
19
+ works in other languages, a reference to the original authors is
20
+ appreciated.
21
+
22
+ 3. **Substantial reuse** — if you build a commercial product on top of
23
+ this codebase, a courtesy attribution is appreciated.
24
+
25
+ These are **kind requests, not legal restrictions** — the MIT license
26
+ governs all rights and permissions.
27
+
28
+ ## Built by
29
+
30
+ Team **csimpicsirkek** for the AMD Developer Hackathon × lablab.ai (2026):
31
+
32
+ - Nándorfi Vince
33
+ - Vitai Tamás
34
+ - Murcsik Gábor
README.md ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: PaperHawk
3
+ emoji: 🦅
4
+ colorFrom: red
5
+ colorTo: orange
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ short_description: Real-DI-Audit/14 rules/6 anti-halluc/LangGraph/Qwen/MI300X
10
+ ---
11
+
12
+ <p align="center">
13
+ <img src="paperhawk.jpeg" alt="PaperHawk" width="900">
14
+ </p>
15
+
16
+ <h1 align="center">PaperHawk</h1>
17
+
18
+ <p align="center">
19
+ <strong>Agentic document intelligence on AMD MI300X</strong><br>
20
+ Multi-document due diligence with deterministic domain checks and agentic LLM workflows.
21
+ </p>
22
+
23
+ <p align="center">
24
+ <a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT"></a>
25
+ <img src="https://img.shields.io/badge/python-3.12+-blue.svg" alt="Python">
26
+ <img src="https://img.shields.io/badge/LangGraph-0.6-green.svg" alt="LangGraph">
27
+ <img src="https://img.shields.io/badge/AMD-MI300X-red.svg" alt="AMD MI300X">
28
+ </p>
29
+
30
+ <p align="center">
31
+ Built for the <a href="https://lablab.ai/event/amd-developer-hackathon"><strong>AMD Developer Hackathon × lablab.ai</strong></a> (May 2026).
32
+ </p>
33
+
34
+ ---
35
+
36
+ ## What is this?
37
+
38
+ A working AI system that ingests multiple business documents (invoices,
39
+ contracts, delivery notes, purchase orders, financial reports) and:
40
+
41
+ - **Extracts structured data** with anti-hallucination layers (5+1 stack)
42
+ - **Detects risks** via 14 deterministic domain rules + LLM ensemble
43
+ - **Cross-references documents** (three-way matching for audits, M&A DD)
44
+ - **Answers questions** via 5-tool agentic chat with source citations
45
+ - **Generates audit-ready reports** (DOCX export, JSON API)
46
+
47
+ This is **not "just another RAG"** — it is a multi-agent orchestration of
48
+ specialist nodes (audit / legal / compliance / financial) over a deterministic
49
+ + LLM ensemble, with explicit anti-hallucination layers.
50
+
51
+ ## Stack
52
+
53
+ | Layer | Technology |
54
+ |-------|------------|
55
+ | Orchestration | **LangGraph 0.6** (4 graphs, 6 subgraphs, async-first, AsyncSqliteSaver) |
56
+ | LLM | **Qwen 2.5 14B Instruct** via vLLM on **AMD Instinct MI300X** |
57
+ | Embedding | **BAAI/bge-m3** (multilingual, 1024 dim, sentence-transformers) |
58
+ | Vector store | **ChromaDB + BM25** hybrid (Reciprocal Rank Fusion) |
59
+ | UI | **Streamlit** (5 tabs) — deployable as a **Hugging Face Space** |
60
+ | Testing | pytest + Playwright |
61
+
62
+ ## Architecture
63
+
64
+ ```
65
+ ┌─────────────────────────────────┐
66
+ │ Streamlit UI (5 tabs) │
67
+ └────────────┬────────────────────┘
68
+
69
+ ┌────────────────────────┼────────────────────────┐
70
+ │ │ │
71
+ ┌───────▼──────┐ ┌────────▼────────┐ ┌──────▼──────┐
72
+ │ pipeline │ │ chat_graph │ │ dd_graph │
73
+ │ _graph │ │ (5 tools, 17 │ │ (multi- │
74
+ │ (6 subgraphs)│ │ rule prompt) │ │ agent │
75
+ └───────┬──────┘ └─────────────────┘ │ super- │
76
+ │ │ visor) │
77
+ │ ┌─────────────────────────┐ └─────────────┘
78
+ ├──▶ ingest_subgraph │
79
+ ├──▶ classify (per-doc) │
80
+ ├──▶ extract_subgraph │
81
+ ├──▶ rag_index_subgraph │
82
+ ├──▶ compare_node (3-way) │
83
+ └──▶ risk_subgraph │
84
+ ├─ basic risk │
85
+ ├─ 14 domain checks │
86
+ ├─ LLM risk + 3 filters │
87
+ ├─ plausibility │
88
+ └─ duplicate (ISA 240) │
89
+ ```
90
+
91
+ See [ARCHITECTURE.md](ARCHITECTURE.md) for the full architecture.
92
+
93
+ ## Quick start
94
+
95
+ ### 1. Local dev (Ollama or dummy mode)
96
+
97
+ ```bash
98
+ git clone https://github.com/<YOUR_GH_USER>/document-intelligence-agentic-langgraph-amd
99
+ cd document-intelligence-agentic-langgraph-amd
100
+ python -m venv .venv && source .venv/bin/activate
101
+ pip install -r requirements.txt
102
+ cp .env.example .env
103
+ # Edit .env: set LLM_PROFILE=dummy (no LLM) or LLM_PROFILE=ollama (Qwen 7B local)
104
+
105
+ streamlit run app/main.py
106
+ ```
107
+
108
+ ### 2. Production (Qwen on AMD MI300X via vLLM)
109
+
110
+ ```bash
111
+ # On the AMD Developer Cloud MI300X instance:
112
+ docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video \
113
+ --ipc=host --shm-size 16g \
114
+ -p 8000:8000 \
115
+ -e VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct \
116
+ rocm/vllm:latest \
117
+ sh -c 'vllm serve $VLLM_MODEL --host 0.0.0.0 --port 8000 \
118
+ --tensor-parallel-size 1 --max-model-len 32768'
119
+
120
+ # On your machine (.env):
121
+ LLM_PROFILE=vllm
122
+ VLLM_BASE_URL=http://<mi300x-public-ip>:8000/v1
123
+ VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct
124
+
125
+ streamlit run app/main.py
126
+ ```
127
+
128
+ See [docs/qwen-vllm-deployment.md](docs/qwen-vllm-deployment.md) for the full
129
+ walkthrough including cost monitoring and a Plan B (Ollama fallback).
130
+
131
+ ### 3. Hugging Face Space deploy
132
+
133
+ See [docs/hf-space-deployment.md](docs/hf-space-deployment.md).
134
+
135
+ ## Demo packages
136
+
137
+ Three pre-built demo packages bundled in `test_data/`:
138
+
139
+ - **Audit Demo** — 3 invoices from the same supplier; the March one is 50%
140
+ pricier (over-billing pattern detected by the package-level analyzer).
141
+ - **DD Demo** — NDA + service agreement + amendment in an acquisition
142
+ scenario (change-of-control + auto-renewal red flags).
143
+ - **Compliance Demo** — 2 contracts; one is missing the GDPR Article 28 clause.
144
+
145
+ Click the corresponding button on the **Upload** tab.
146
+
147
+ ## Documentation
148
+
149
+ - [ARCHITECTURE.md](ARCHITECTURE.md) — architecture overview (English)
150
+ - [docs/qwen-vllm-deployment.md](docs/qwen-vllm-deployment.md) — Qwen on AMD MI300X (English)
151
+ - [docs/hf-space-deployment.md](docs/hf-space-deployment.md) — Hugging Face Space deploy (English)
152
+ - [docs/LANGGRAPH_ONBOARDING.md](docs/LANGGRAPH_ONBOARDING.md) — onboarding for contributors (English)
153
+ - [CLAUDE.md](CLAUDE.md) — project-level Claude Code instructions
154
+ - [NOTICE.md](NOTICE.md) — author intent (non-binding)
155
+ - `docs/Teljes-rendszer-attekintes-langgraph_HU.md` — legacy Hungarian system overview (reference)
156
+ - `docs/MUKODESI_LEIRAS_HU.md` — legacy Hungarian operations manual (reference)
157
+
158
+ ## Built by
159
+
160
+ **Team CsimpiCsirkek** for the AMD Developer Hackathon × lablab.ai (2026):
161
+
162
+ - Nándorfi Vince
163
+ - Vitai Tamás
164
+ - Murcsik Gábor
165
+
166
+ ## License
167
+
168
+ **MIT** — see [LICENSE](LICENSE).
app/__init__.py ADDED
File without changes
app/async_runtime.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AsyncRuntime — long-lived background event loop for the Streamlit thread.
2
+
3
+ PROBLEM:
4
+ * Streamlit runs a synchronous event loop (uvloop) that CANNOT be patched
5
+ with ``nest_asyncio``.
6
+ * LangGraph (and every async resource: ChromaDB connections, the LLM HTTP
7
+ session, AsyncSqliteSaver checkpointers) assumes a LONG-LIVED async context.
8
+ * Opening a new loop per invoke means async-bound resources never amortize:
9
+ every chat message rebuilds the SQLite pool, the Chroma client, and the
10
+ HTTP session.
11
+
12
+ SOLUTION:
13
+ * A DEDICATED background thread that runs a single ``asyncio.new_event_loop()``
14
+ with ``run_forever`` for the entire app lifetime.
15
+ * The Streamlit thread (sync) hands coroutines to the background loop via
16
+ ``asyncio.run_coroutine_threadsafe(coro, loop)``; the returned Future
17
+ blocks the Streamlit thread until the result is ready.
18
+ * Singleton — started once, same instance reused.
19
+
20
+ This is the classic "embedded async runtime" pattern (see LangChain,
21
+ JupyterLab, ipykernel implementations). Robust and scales well.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import asyncio
27
+ import atexit
28
+ import threading
29
+ from collections.abc import AsyncIterator
30
+ from typing import Any, TypeVar
31
+
32
+ T = TypeVar("T")
33
+
34
+
35
+ class AsyncRuntime:
36
+ """Singleton background event loop. Thread-safe submit + stream API."""
37
+
38
+ _instance: AsyncRuntime | None = None
39
+ _lock = threading.Lock()
40
+
41
+ def __init__(self) -> None:
42
+ # Lazy start: the loop and thread start on the first submit()
43
+ self._loop: asyncio.AbstractEventLoop | None = None
44
+ self._thread: threading.Thread | None = None
45
+ self._started = threading.Event()
46
+
47
+ @classmethod
48
+ def get(cls) -> AsyncRuntime:
49
+ """Singleton accessor — created on first call, same instance after."""
50
+ if cls._instance is None:
51
+ with cls._lock:
52
+ if cls._instance is None:
53
+ cls._instance = AsyncRuntime()
54
+ return cls._instance
55
+
56
+ def _ensure_started(self) -> None:
57
+ """Start the background loop if not already running."""
58
+ if self._started.is_set():
59
+ return
60
+ with self._lock:
61
+ if self._started.is_set():
62
+ return
63
+
64
+ ready = threading.Event()
65
+
66
+ def _run() -> None:
67
+ # Inside the thread, create the loop and run it
68
+ self._loop = asyncio.new_event_loop()
69
+ asyncio.set_event_loop(self._loop)
70
+ ready.set()
71
+ try:
72
+ self._loop.run_forever()
73
+ finally:
74
+ self._loop.close()
75
+
76
+ self._thread = threading.Thread(
77
+ target=_run,
78
+ name="async-runtime",
79
+ daemon=True, # auto-stops when the app exits
80
+ )
81
+ self._thread.start()
82
+ ready.wait(timeout=5.0) # wait until the loop is actually running
83
+ self._started.set()
84
+
85
+ # Cleanup at app shutdown
86
+ atexit.register(self._shutdown)
87
+
88
+ def submit(self, coro) -> Any:
89
+ """Submit a coroutine to the background loop, block on the result.
90
+
91
+ This is the Streamlit thread's main API: synchronous-looking, but the
92
+ coroutine runs on a long-lived loop so async resources (Chroma,
93
+ SqliteSaver, embeddings) stay PERSISTENT across calls.
94
+ """
95
+ self._ensure_started()
96
+ assert self._loop is not None
97
+ future = asyncio.run_coroutine_threadsafe(coro, self._loop)
98
+ return future.result()
99
+
100
+ def submit_iter(self, async_gen: AsyncIterator[T]):
101
+ """Async generator → sync iterator wrapper for Streamlit st.write_stream.
102
+
103
+ The Streamlit thread iterates over the (token-)stream from the astream call.
104
+ """
105
+ self._ensure_started()
106
+ assert self._loop is not None
107
+
108
+ # We drive the async generator on the background loop by submitting
109
+ # ``__anext__()`` calls one at a time.
110
+ while True:
111
+ try:
112
+ future = asyncio.run_coroutine_threadsafe(
113
+ async_gen.__anext__(), self._loop
114
+ )
115
+ yield future.result()
116
+ except StopAsyncIteration:
117
+ break
118
+
119
+ def _shutdown(self) -> None:
120
+ """atexit handler — gracefully stop the background loop."""
121
+ if self._loop is None or not self._started.is_set():
122
+ return
123
+ try:
124
+ self._loop.call_soon_threadsafe(self._loop.stop)
125
+ except Exception:
126
+ pass
app/main.py ADDED
@@ -0,0 +1,931 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Streamlit UI — Agentic Document Intelligence (LangGraph).
2
+
3
+ 5 tabs: Upload, Results, Chat, DD Assistant, Report.
4
+
5
+ LangGraph is async-first; the Streamlit (uvloop) compatibility is handled by
6
+ the ``app.async_runtime.AsyncRuntime`` singleton with a long-lived background
7
+ event loop. The caller invokes via the synchronous ``run_async()`` wrapper.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ # Streamlit runs app/main.py directly so the project root is added explicitly
13
+ # to sys.path; that lets ``from app.streaming`` and ``from config`` resolve.
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ _PROJECT_ROOT = Path(__file__).resolve().parent.parent
18
+ if str(_PROJECT_ROOT) not in sys.path:
19
+ sys.path.insert(0, str(_PROJECT_ROOT))
20
+
21
+ import json # noqa: E402
22
+ import traceback # noqa: E402
23
+ import uuid # noqa: E402
24
+ from collections import defaultdict # noqa: E402
25
+ from datetime import datetime # noqa: E402
26
+
27
+ import streamlit as st # noqa: E402
28
+ from langchain_core.messages import HumanMessage # noqa: E402
29
+
30
+ from app.streaming import run_async, run_with_progress # noqa: E402
31
+ from config import settings # noqa: E402
32
+ from graph.chat_graph import build_chat_graph # noqa: E402
33
+ from graph.dd_graph import build_dd_graph # noqa: E402
34
+ from graph.package_insights_graph import build_package_insights_graph # noqa: E402
35
+ from graph.pipeline_graph import build_pipeline_graph # noqa: E402
36
+ from providers import get_chat_model, get_dummy_handle # noqa: E402
37
+ from store import HybridStore # noqa: E402
38
+ from tools import ChatToolContext # noqa: E402
39
+ from utils.docx_export import build_docx_sync # noqa: E402
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Page config
44
+ # ---------------------------------------------------------------------------
45
+
46
+ st.set_page_config(
47
+ page_title="Agentic Document Intelligence — LangGraph",
48
+ layout="wide",
49
+ )
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Session state init
54
+ # ---------------------------------------------------------------------------
55
+
56
+
57
+ def _init_session_state() -> None:
58
+ if "thread_id" not in st.session_state:
59
+ st.session_state.thread_id = f"st_{uuid.uuid4().hex[:12]}"
60
+ if "store" not in st.session_state:
61
+ st.session_state.store = HybridStore()
62
+ if "tool_context" not in st.session_state:
63
+ st.session_state.tool_context = ChatToolContext(store=st.session_state.store)
64
+ if "pipeline_state" not in st.session_state:
65
+ st.session_state.pipeline_state = None
66
+ if "dd_contracts_summary" not in st.session_state:
67
+ st.session_state.dd_contracts_summary = []
68
+ if "chat_history" not in st.session_state:
69
+ st.session_state.chat_history = []
70
+ if "docx_bytes" not in st.session_state:
71
+ st.session_state.docx_bytes = None
72
+
73
+
74
+ _init_session_state()
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # Sidebar — 3 buttons (Reset, Clear chat history, Clear vector store)
79
+ # ---------------------------------------------------------------------------
80
+
81
+ with st.sidebar:
82
+ st.header("Settings")
83
+ st.info(f"LLM Provider: **{settings.llm_profile}**")
84
+
85
+ if st.session_state.pipeline_state:
86
+ n_docs = len(st.session_state.pipeline_state.get("documents") or [])
87
+ st.success(f"Documents processed: {n_docs}")
88
+ st.metric("Indexed chunks", st.session_state.store.chunk_count)
89
+
90
+ st.divider()
91
+
92
+ if st.button(
93
+ "Full reset",
94
+ help="Clear everything: uploaded documents, vector store, chat history, results.",
95
+ ):
96
+ for key in list(st.session_state.keys()):
97
+ del st.session_state[key]
98
+ st.rerun()
99
+
100
+ if st.button(
101
+ "Clear chat history",
102
+ help="Only clears the chat conversation. Documents and results are kept.",
103
+ ):
104
+ st.session_state.chat_history = []
105
+ st.rerun()
106
+
107
+ if st.button(
108
+ "Clear vector store",
109
+ help="Clears the search index (ChromaDB). Chat will not be able to answer "
110
+ "until you upload documents again. Results are preserved.",
111
+ ):
112
+ try:
113
+ run_async(st.session_state.store.clear())
114
+ except Exception:
115
+ # Fallback: new instance if clear() fails
116
+ st.session_state.store = HybridStore()
117
+ st.session_state.tool_context = ChatToolContext(store=st.session_state.store)
118
+ st.session_state.chat_history = []
119
+ st.rerun()
120
+
121
+
122
+ # ---------------------------------------------------------------------------
123
+ # Title
124
+ # ---------------------------------------------------------------------------
125
+
126
+ st.title("Agentic Document Intelligence Platform")
127
+ st.caption("Multi-document cross-analysis for audit and legal use")
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # 5 Tabs
132
+ # ---------------------------------------------------------------------------
133
+
134
+ tab_upload, tab_results, tab_chat, tab_dd, tab_report = st.tabs(
135
+ ["Upload", "Results", "Chat", "DD Assistant", "Report"]
136
+ )
137
+
138
+
139
+ # =============================================================================
140
+ # Demo package handler
141
+ # =============================================================================
142
+
143
+ DEMO_ROOT = _PROJECT_ROOT / "test_data" / "demo_packages"
144
+
145
+ DEMO_PACKAGES = [
146
+ {
147
+ "key": "audit_demo",
148
+ "label": "Audit Demo",
149
+ "package_type": "audit",
150
+ "description": "3 invoices from the same supplier; the March one is 50% pricier.",
151
+ },
152
+ {
153
+ "key": "dd_demo",
154
+ "label": "Due Diligence Demo",
155
+ "package_type": "dd",
156
+ "description": "NDA + service agreement + amendment in an acquisition scenario.",
157
+ },
158
+ {
159
+ "key": "compliance_demo",
160
+ "label": "Compliance Demo",
161
+ "package_type": "compliance",
162
+ "description": "2 contracts; one is missing the GDPR Article 28 clause.",
163
+ },
164
+ ]
165
+
166
+
167
+ def _process_demo_package(pkg: dict) -> None:
168
+ """Process a demo package end-to-end: pipeline + package_insights + (optional) DD."""
169
+ pkg_dir = DEMO_ROOT / pkg["key"]
170
+ if not pkg_dir.exists():
171
+ # Backward-compat: fall back to old HU directory name
172
+ legacy = _PROJECT_ROOT / "test_data" / "demo_csomagok" / pkg["key"]
173
+ if legacy.exists():
174
+ pkg_dir = legacy
175
+ else:
176
+ st.error(f"Demo package directory not found: {pkg_dir}")
177
+ return
178
+
179
+ pdf_files = sorted(pkg_dir.glob("*.pdf"))
180
+ if not pdf_files:
181
+ st.error(f"No PDFs in the {pkg['label']} package: {pkg_dir}")
182
+ return
183
+
184
+ demo_files = [(p.name, p.read_bytes()) for p in pdf_files]
185
+ if settings.is_dummy:
186
+ get_dummy_handle().set_docs_hint([fn for fn, _ in demo_files])
187
+
188
+ try:
189
+ # 1) Pipeline with progress bar
190
+ pipeline = build_pipeline_graph(st.session_state.store, llm=get_chat_model())
191
+ progress_bar = st.progress(0.0, text=f"{pkg['label']}: starting pipeline...")
192
+ total_steps = max(len(demo_files) * 4 + 6, 12)
193
+
194
+ def _on_pipeline_progress(step: int, total: int, label: str) -> None:
195
+ progress_bar.progress(
196
+ min(step / total, 1.0),
197
+ text=f"[{step}/{total}] {label}",
198
+ )
199
+
200
+ state = run_with_progress(
201
+ pipeline,
202
+ {"files": demo_files},
203
+ on_progress=_on_pipeline_progress,
204
+ total_steps=total_steps,
205
+ )
206
+ progress_bar.progress(1.0, text="Pipeline done — running package-level analysis...")
207
+
208
+ # 2) Package insights — opt-in, runs only on demo buttons
209
+ pkg_graph = build_package_insights_graph(llm=get_chat_model())
210
+ pkg_state = run_async(pkg_graph.ainvoke({
211
+ "documents": state.get("documents") or [],
212
+ "package_type": pkg["package_type"],
213
+ }))
214
+ insights = pkg_state.get("final_insights")
215
+ if insights is not None:
216
+ state["package_insights"] = insights
217
+
218
+ # 3) DD report — only if the package contains contracts
219
+ contracts = [
220
+ d for d in (state.get("documents") or [])
221
+ if d.classification and d.classification.doc_type == "contract"
222
+ ]
223
+ if contracts:
224
+ progress_bar.progress(1.0, text="DD analysis...")
225
+ dd_graph = build_dd_graph(llm=get_chat_model())
226
+ dd_state = run_async(dd_graph.ainvoke({"documents": contracts}))
227
+ state["dd_report"] = dd_state.get("dd_report")
228
+ st.session_state.dd_contracts_summary = dd_state.get("contracts") or []
229
+
230
+ progress_bar.progress(1.0, text="Processing complete!")
231
+
232
+ st.session_state.pipeline_state = state
233
+ for pd in state.get("documents") or []:
234
+ st.session_state.tool_context.add_document(pd)
235
+
236
+ n_docs = len(state.get("documents") or [])
237
+ n_risks = len(state.get("risks") or [])
238
+ elapsed = state.get("processing_seconds", 0)
239
+ st.success(
240
+ f"{pkg['label']} loaded: {n_docs} documents in {elapsed:.1f} sec, "
241
+ f"{n_risks} risks identified. Open the Results / DD Assistant tab."
242
+ )
243
+ st.rerun()
244
+ except Exception as exc:
245
+ st.error(f"Error processing the demo package: {exc}")
246
+ with st.expander("Developer details (full traceback)"):
247
+ st.code(traceback.format_exc(), language="python")
248
+
249
+
250
+ # =============================================================================
251
+ # TAB 1: Upload
252
+ # =============================================================================
253
+
254
+ with tab_upload:
255
+ st.subheader("Upload documents")
256
+
257
+ if st.session_state.pipeline_state:
258
+ n_docs = len(st.session_state.pipeline_state.get("documents") or [])
259
+ st.info(
260
+ f"Currently {n_docs} documents are processed. "
261
+ "Open the Results tab, or upload more files."
262
+ )
263
+
264
+ uploaded = st.file_uploader(
265
+ "Drop documents here (PDF, DOCX, image, or text)",
266
+ type=["pdf", "docx", "png", "jpg", "jpeg", "txt"],
267
+ accept_multiple_files=True,
268
+ )
269
+
270
+ if uploaded and st.button("Start processing", type="primary"):
271
+ files = [(f.name, f.read()) for f in uploaded]
272
+
273
+ if settings.is_dummy:
274
+ get_dummy_handle().set_docs_hint([fn for fn, _ in files])
275
+
276
+ try:
277
+ graph = build_pipeline_graph(st.session_state.store, llm=get_chat_model())
278
+ progress_bar = st.progress(0.0, text="Starting...")
279
+ total_steps = max(len(files) * 4 + 6, 12)
280
+
281
+ def _on_progress(step: int, total: int, label: str) -> None:
282
+ progress_bar.progress(
283
+ min(step / total, 1.0),
284
+ text=f"[{step}/{total}] {label}",
285
+ )
286
+
287
+ state = run_with_progress(
288
+ graph,
289
+ {"files": files},
290
+ on_progress=_on_progress,
291
+ total_steps=total_steps,
292
+ )
293
+ progress_bar.progress(1.0, text="Processing complete!")
294
+
295
+ st.session_state.pipeline_state = state
296
+ st.session_state.dd_contracts_summary = [] # reset DD on manual flow
297
+ for pd in state.get("documents") or []:
298
+ st.session_state.tool_context.add_document(pd)
299
+
300
+ n_docs = len(state.get("documents") or [])
301
+ n_risks = len(state.get("risks") or [])
302
+ elapsed = state.get("processing_seconds", 0)
303
+ st.success(
304
+ f"Processed {n_docs} documents in {elapsed:.1f} sec; "
305
+ f"{n_risks} risks identified."
306
+ )
307
+ st.rerun()
308
+ except Exception as exc:
309
+ st.error(f"Processing error: {exc}")
310
+ with st.expander("Developer details (full traceback)"):
311
+ st.code(traceback.format_exc(), language="python")
312
+
313
+ st.divider()
314
+ st.subheader("Quick demo")
315
+ st.caption(
316
+ "Pre-built scenarios for the pitch. One click loads and processes the "
317
+ "matching documents (pipeline + package-level analysis + DD if there are contracts)."
318
+ )
319
+
320
+ cols = st.columns(len(DEMO_PACKAGES))
321
+ for col, pkg in zip(cols, DEMO_PACKAGES, strict=False):
322
+ with col:
323
+ st.markdown(f"**{pkg['label']}**")
324
+ st.caption(pkg["description"])
325
+ if st.button("Run", key=f"demo_{pkg['key']}"):
326
+ _process_demo_package(pkg)
327
+
328
+
329
+ # =============================================================================
330
+ # TAB 2: Results
331
+ # =============================================================================
332
+
333
+ with tab_results:
334
+ state = st.session_state.pipeline_state
335
+ if state is None:
336
+ st.info("Upload documents on the Upload tab to see results.")
337
+ else:
338
+ report = state.get("report") or {}
339
+ perf = report.get("performance") or {}
340
+
341
+ # 4 metrics
342
+ c1, c2, c3, c4 = st.columns(4)
343
+ with c1:
344
+ st.metric("Processing time", f"{perf.get('processing_seconds', 0):.1f} sec")
345
+ with c2:
346
+ st.metric("Documents", perf.get("documents", 0))
347
+ with c3:
348
+ st.metric("Manual estimate", f"{perf.get('manual_estimate_minutes', 0)} min")
349
+ with c4:
350
+ st.metric("Speedup", f"{perf.get('speedup', 0):.1f}x")
351
+
352
+ st.divider()
353
+ st.subheader("Classification")
354
+ from domain_checks import get_evidence_score
355
+ for pd_doc in state.get("documents") or []:
356
+ if pd_doc.ingested is None:
357
+ continue
358
+ cls = pd_doc.classification
359
+ col1, col2, col3 = st.columns([3, 2, 1])
360
+ with col1:
361
+ st.write(f"**{pd_doc.ingested.file_name}**")
362
+ with col2:
363
+ doc_type_display = cls.doc_type_display if cls else "Other"
364
+ st.write(f"{doc_type_display}")
365
+ with col3:
366
+ conf = cls.confidence if cls else 0.0
367
+ doc_type = cls.doc_type if cls else "other"
368
+ ev_score = get_evidence_score(doc_type)
369
+ label = "confident" if conf > 0.8 else "uncertain"
370
+ st.write(f"{label} ({conf:.0%}) | ISA 500: {ev_score}/10")
371
+
372
+ st.divider()
373
+ st.subheader("Extracted data")
374
+ for pd in state.get("documents") or []:
375
+ file_name = pd.ingested.file_name if pd.ingested else "?"
376
+ doc_type_display = (
377
+ pd.classification.doc_type_display if pd.classification else "Other"
378
+ )
379
+ with st.expander(f"{file_name} — {doc_type_display}"):
380
+ if pd.extracted is None:
381
+ st.warning("No extracted data.")
382
+ continue
383
+
384
+ # Confidence indicators
385
+ confidence = pd.extracted.confidence or {}
386
+ if confidence:
387
+ low_fields = [k for k, v in confidence.items() if v == "low"]
388
+ medium_fields = [k for k, v in confidence.items() if v == "medium"]
389
+ if low_fields:
390
+ st.warning(
391
+ f"Low-confidence fields (verify in source): {', '.join(low_fields)}"
392
+ )
393
+ if medium_fields:
394
+ st.info(f"Fields needing interpretation: {', '.join(medium_fields)}")
395
+
396
+ # Quotes
397
+ quotes = pd.extracted.quotes or []
398
+ if quotes:
399
+ with st.expander("Source quotes (anti-hallucination)"):
400
+ for q in quotes:
401
+ st.caption(f'"{q}"')
402
+
403
+ display_data = {
404
+ k: v for k, v in pd.extracted.raw.items()
405
+ if k not in ("_source", "_quotes", "_confidence")
406
+ }
407
+ st.json(display_data)
408
+
409
+ # Cross-document checks
410
+ comp = state.get("comparison")
411
+ if comp:
412
+ st.divider()
413
+ st.subheader("Cross-document checks (three-way matching)")
414
+
415
+ ok = sum(1 for m in (comp.matches or []) if m.get("severity") == "ok")
416
+ warn = sum(1 for m in (comp.matches or []) if m.get("severity") == "warning")
417
+ crit = sum(1 for m in (comp.matches or []) if m.get("severity") == "critical")
418
+ miss = sum(1 for m in (comp.matches or []) if m.get("severity") == "missing")
419
+
420
+ mc1, mc2, mc3, mc4 = st.columns(4)
421
+ mc1.metric("OK", ok)
422
+ mc2.metric("Warning", warn)
423
+ mc3.metric("Critical", crit)
424
+ mc4.metric("Missing", miss)
425
+
426
+ for m in (comp.matches or []):
427
+ sev = m.get("severity", "ok")
428
+ msg = m.get("message", "") or m.get("field", "")
429
+ if sev == "critical":
430
+ st.error(f"CRITICAL: {msg}")
431
+ elif sev == "warning":
432
+ st.warning(f"WARNING: {msg}")
433
+ elif sev == "missing":
434
+ st.info(f"MISSING: {msg}")
435
+
436
+ if comp.summary:
437
+ st.caption(comp.summary)
438
+
439
+ # Risks — split rule-based vs AI observations
440
+ risks = state.get("risks") or []
441
+ basic = [r for r in risks if r.kind != "llm_analysis" and r.severity != "info"]
442
+ info_r = [r for r in risks if r.severity == "info"]
443
+ ai_r = [r for r in risks if r.kind == "llm_analysis"]
444
+
445
+ if basic or info_r or ai_r:
446
+ st.divider()
447
+
448
+ if basic:
449
+ st.subheader("Risks (rule-based)")
450
+ st.caption("Deterministic checks — math, logic, plausibility, regulations.")
451
+ by_sev = defaultdict(list)
452
+ for r in basic:
453
+ by_sev[r.severity].append(r)
454
+ for sev_label, sev_key in (("HIGH", "high"), ("MEDIUM", "medium"),
455
+ ("LOW", "low")):
456
+ items = by_sev.get(sev_key, [])
457
+ if not items:
458
+ continue
459
+ for r in items:
460
+ label = f"**{sev_label}: {r.description}**"
461
+ if r.rationale:
462
+ label += f"\n\n*Rationale:* {r.rationale}"
463
+ if r.regulation:
464
+ label += f"\n\n*Regulation:* {r.regulation}"
465
+ if sev_key == "high":
466
+ st.error(label)
467
+ elif sev_key == "medium":
468
+ st.warning(label)
469
+ else:
470
+ st.info(label)
471
+
472
+ if ai_r:
473
+ st.subheader("AI observations")
474
+ st.caption(
475
+ "LLM-based analysis — contextual patterns, unusual relationships. "
476
+ "Verify against the source before making decisions."
477
+ )
478
+ for r in ai_r:
479
+ label = r.description
480
+ if r.rationale:
481
+ label += f"\n\n*Rationale:* {r.rationale}"
482
+ if r.severity == "high":
483
+ st.error(f"**HIGH:** {label}")
484
+ elif r.severity == "medium":
485
+ st.warning(f"**MEDIUM:** {label}")
486
+ else:
487
+ st.info(f"**LOW:** {label}")
488
+
489
+ if info_r and not basic and not ai_r:
490
+ st.subheader("Information")
491
+ for r in info_r:
492
+ st.info(r.description)
493
+
494
+ if not risks:
495
+ st.divider()
496
+ st.success("No risk indicators found.")
497
+
498
+ # Package-level analysis — only on demo packages (opt-in)
499
+ insights = state.get("package_insights")
500
+ if insights is not None:
501
+ st.divider()
502
+ st.subheader("Package-level analysis")
503
+ st.caption(
504
+ "Beyond the automatic pipeline, the AI also reviews the full document "
505
+ "package together from a cross-doc perspective. It looks for patterns "
506
+ "visible only when the documents are reviewed together."
507
+ )
508
+
509
+ if insights.executive_summary:
510
+ st.markdown("**Executive summary**")
511
+ st.write(insights.executive_summary)
512
+
513
+ if insights.findings:
514
+ st.markdown("**Package-level risks**")
515
+ for f in insights.findings:
516
+ sev = (f.get("severity") or f.get("sulyossag") or "low").lower()
517
+ description = f.get("description") or f.get("leiras", "")
518
+ rationale = f.get("rationale") or f.get("indoklas", "")
519
+ affected = f.get("affected_documents") or f.get("erinto_dokumentumok") or []
520
+
521
+ label = description
522
+ if rationale:
523
+ label += f"\n\n*Rationale:* {rationale}"
524
+ if affected:
525
+ label += f"\n\n*Affected documents:* {', '.join(affected)}"
526
+
527
+ if sev in ("high", "magas"):
528
+ st.error(f"**HIGH:** {label}")
529
+ elif sev in ("medium", "kozepes", "közepes"):
530
+ st.warning(f"**MEDIUM:** {label}")
531
+ else:
532
+ st.info(f"**LOW:** {label}")
533
+
534
+ if insights.key_observations:
535
+ st.markdown("**Key observations**")
536
+ for obs in insights.key_observations:
537
+ st.write(f"- {obs}")
538
+
539
+
540
+ # =============================================================================
541
+ # TAB 3: Chat
542
+ # =============================================================================
543
+
544
+ with tab_chat:
545
+ st.subheader("Ask about your documents")
546
+ if st.session_state.pipeline_state is None:
547
+ st.info("Upload and process documents to use the chat.")
548
+ else:
549
+ st.caption(
550
+ "Agentic mode — the AI uses tools to answer "
551
+ "(search, extraction, comparison, validation)."
552
+ )
553
+
554
+ # History
555
+ for msg in st.session_state.chat_history:
556
+ with st.chat_message(msg["role"]):
557
+ st.markdown(msg["content"])
558
+ if msg.get("sources"):
559
+ with st.expander("Sources"):
560
+ for src in msg["sources"]:
561
+ st.write(f"- {src}")
562
+
563
+ if prompt := st.chat_input("Ask anything about the uploaded documents..."):
564
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
565
+ with st.chat_message("user"):
566
+ st.markdown(prompt)
567
+
568
+ llm = get_chat_model()
569
+ chat_graph = build_chat_graph(llm, st.session_state.tool_context)
570
+
571
+ with st.chat_message("assistant"):
572
+ with st.spinner("Analyzing..."):
573
+ try:
574
+ result_state = run_async(chat_graph.ainvoke({
575
+ "messages": [HumanMessage(content=prompt)],
576
+ }))
577
+ answer = result_state.get("final_answer", "(empty answer)")
578
+ sources = result_state.get("sources_cited") or []
579
+ except Exception as exc:
580
+ answer = f"Chat error: {exc}"
581
+ sources = []
582
+ st.markdown(answer)
583
+ if sources:
584
+ with st.expander("Sources"):
585
+ for src in sources:
586
+ st.write(f"- {src}")
587
+
588
+ st.session_state.chat_history.append({
589
+ "role": "assistant",
590
+ "content": answer,
591
+ "sources": sources,
592
+ })
593
+
594
+
595
+ # =============================================================================
596
+ # TAB 4: DD Assistant
597
+ # =============================================================================
598
+
599
+ with tab_dd:
600
+ st.subheader("Due Diligence assistant")
601
+ st.caption(
602
+ "Contract portfolio analysis from an acquisition / DD perspective: "
603
+ "near-term expirations, change-of-control clauses, GDPR risks, monthly "
604
+ "obligations and critical red flags. Multi-agent supervisor "
605
+ "(audit + legal + compliance + financial)."
606
+ )
607
+
608
+ state = st.session_state.pipeline_state
609
+ if state is None:
610
+ st.info("Upload and process contracts to start a DD analysis.")
611
+ else:
612
+ contracts = [
613
+ d for d in (state.get("documents") or [])
614
+ if d.classification and d.classification.doc_type == "contract"
615
+ ]
616
+ if not contracts:
617
+ st.warning(
618
+ f"Of the {len(state.get('documents') or [])} processed documents "
619
+ "none are contracts. The DD assistant operates on contract-type "
620
+ "documents only. Try the demo package."
621
+ )
622
+ else:
623
+ st.success(f"{len(contracts)} contracts in the portfolio.")
624
+
625
+ if st.button("Start DD analysis", type="primary"):
626
+ try:
627
+ dd_graph = build_dd_graph(llm=get_chat_model())
628
+ with st.spinner("Multi-agent supervisor running..."):
629
+ dd_state = run_async(dd_graph.ainvoke({"documents": contracts}))
630
+ state["dd_report"] = dd_state.get("dd_report")
631
+ st.session_state.dd_contracts_summary = dd_state.get("contracts") or []
632
+ st.session_state.pipeline_state = state
633
+ st.rerun()
634
+ except Exception as exc:
635
+ st.error(f"DD analysis error: {exc}")
636
+ with st.expander("Developer details (full traceback)"):
637
+ st.code(traceback.format_exc(), language="python")
638
+
639
+ report = state.get("dd_report")
640
+ contracts_summary = st.session_state.dd_contracts_summary
641
+
642
+ if report is not None:
643
+ st.divider()
644
+ st.subheader("Executive summary")
645
+ st.write(report.executive_summary)
646
+
647
+ mc1, mc2, mc3, mc4 = st.columns(4)
648
+ mc1.metric("Contracts", report.contract_count)
649
+ mc2.metric("High-risk", len(report.high_risk_contracts))
650
+ mc3.metric("Expiring soon (12 mo)", len(report.expiring_soon))
651
+ mc4.metric("Top red flags", len(report.top_red_flags))
652
+
653
+ if report.total_monthly_obligations:
654
+ st.subheader("Monthly obligations (estimated)")
655
+ obl_cols = st.columns(min(len(report.total_monthly_obligations), 4))
656
+ for col, (cur, amt) in zip(
657
+ obl_cols, report.total_monthly_obligations.items(), strict=False
658
+ ):
659
+ col.metric(cur, f"{amt:,.0f}")
660
+
661
+ if report.top_red_flags:
662
+ st.subheader("Top red flags")
663
+ for i, flag in enumerate(report.top_red_flags, start=1):
664
+ st.error(f"{i}. {flag}")
665
+
666
+ if report.expiring_soon:
667
+ st.subheader("Expiring soon (within 12 months)")
668
+ for fname in report.expiring_soon:
669
+ st.warning(f"- {fname}")
670
+
671
+ if contracts_summary:
672
+ st.subheader("Contract details")
673
+ for c in contracts_summary:
674
+ with st.expander(
675
+ f"{c.file_name} — {c.risk_level.upper()} risk"
676
+ ):
677
+ st.write(f"**Type:** {c.contract_type}")
678
+ if c.parties:
679
+ st.write(f"**Parties:** {', '.join(c.parties)}")
680
+ if c.effective_date or c.expiry_date:
681
+ st.write(
682
+ f"**Validity:** {c.effective_date or '?'} — "
683
+ f"{c.expiry_date or '?'}"
684
+ )
685
+ if c.total_value:
686
+ st.write(
687
+ f"**Value:** {c.total_value:,.0f} {c.currency}"
688
+ )
689
+ if c.monthly_fee:
690
+ st.write(
691
+ f"**Monthly fee:** {c.monthly_fee:,.0f} {c.monthly_fee_currency}"
692
+ )
693
+ if c.risk_elements:
694
+ st.write("**Risk elements:**")
695
+ for k in c.risk_elements:
696
+ st.write(f"- {k}")
697
+ if c.red_flags:
698
+ st.write("**Red flags:**")
699
+ for p in c.red_flags:
700
+ st.write(f"- {p}")
701
+
702
+
703
+ # =============================================================================
704
+ # TAB 5: Report
705
+ # =============================================================================
706
+
707
+ with tab_report:
708
+ state = st.session_state.pipeline_state
709
+ report = (state or {}).get("report") or {} if state else {}
710
+
711
+ if not state or not report:
712
+ st.info("Upload and process documents to generate a report.")
713
+ else:
714
+ st.subheader("Report")
715
+ if report.get("generated_at"):
716
+ st.write(f"**Generated at:** {report['generated_at']}")
717
+ st.write(f"**Document count:** {report.get('document_count', 0)}")
718
+
719
+ # Executive summary (LLM)
720
+ if report.get("executive_summary"):
721
+ st.subheader("Executive summary")
722
+ st.write(report["executive_summary"])
723
+
724
+ # Cross-document section
725
+ comp = report.get("comparison")
726
+ if comp:
727
+ st.subheader("Cross-document checks")
728
+ matches = comp.get("matches") or []
729
+ ok = sum(1 for m in matches if m.get("severity") == "ok")
730
+ warn = sum(1 for m in matches if m.get("severity") == "warning")
731
+ crit = sum(1 for m in matches if m.get("severity") == "critical")
732
+ mc1, mc2, mc3 = st.columns(3)
733
+ mc1.metric("OK", ok)
734
+ mc2.metric("Warning", warn)
735
+ mc3.metric("Critical", crit)
736
+
737
+ # Risks split — rule-based vs AI observations
738
+ risk_buckets = report.get("risks") or {}
739
+ all_risks = (
740
+ (risk_buckets.get("high") or [])
741
+ + (risk_buckets.get("medium") or [])
742
+ + (risk_buckets.get("low") or [])
743
+ + (risk_buckets.get("info") or [])
744
+ )
745
+
746
+ if all_risks:
747
+ basic_r = [r for r in all_risks if r.get("kind") != "llm_analysis"]
748
+ ai_r = [r for r in all_risks if r.get("kind") == "llm_analysis"]
749
+
750
+ if basic_r:
751
+ st.subheader("Risks (rule-based)")
752
+ for r in basic_r:
753
+ sev = r.get("severity", "low")
754
+ description = r.get("description", "")
755
+ if sev == "high":
756
+ st.error(f"HIGH: {description}")
757
+ elif sev == "medium":
758
+ st.warning(f"MEDIUM: {description}")
759
+ elif sev == "info":
760
+ st.info(f"INFO: {description}")
761
+ else:
762
+ st.info(f"LOW: {description}")
763
+
764
+ if ai_r:
765
+ st.subheader("AI observations")
766
+ st.caption("Verify against the source before making decisions.")
767
+ for r in ai_r:
768
+ sev = r.get("severity", "low")
769
+ description = r.get("description", "")
770
+ rationale = r.get("rationale", "")
771
+ label = description if not rationale else f"{description} — {rationale}"
772
+ if sev == "high":
773
+ st.error(f"HIGH: {label}")
774
+ elif sev == "medium":
775
+ st.warning(f"MEDIUM: {label}")
776
+ else:
777
+ st.info(f"LOW: {label}")
778
+
779
+ # Package-level analysis section
780
+ package_section = report.get("package_insights")
781
+ if package_section:
782
+ st.divider()
783
+ st.subheader("Package-level analysis")
784
+ st.caption(
785
+ "Beyond the automatic pipeline, the AI reviewed the full document "
786
+ "package as a whole from a cross-doc perspective."
787
+ )
788
+ if package_section.get("executive_summary"):
789
+ st.markdown("**Executive summary**")
790
+ st.write(package_section["executive_summary"])
791
+
792
+ package_findings = package_section.get("findings") or []
793
+ if package_findings:
794
+ st.markdown("**Package-level risks**")
795
+ for f in package_findings:
796
+ sev = (f.get("severity") or f.get("sulyossag") or "low").lower()
797
+ description = f.get("description") or f.get("leiras", "")
798
+ rationale = f.get("rationale") or f.get("indoklas", "")
799
+ affected = f.get("affected_documents") or f.get("erinto_dokumentumok") or []
800
+
801
+ label = description
802
+ if rationale:
803
+ label += f"\n\n*Rationale:* {rationale}"
804
+ if affected:
805
+ label += f"\n\n*Affected documents:* {', '.join(affected)}"
806
+
807
+ if sev in ("high", "magas"):
808
+ st.error(f"**HIGH:** {label}")
809
+ elif sev in ("medium", "kozepes", "közepes"):
810
+ st.warning(f"**MEDIUM:** {label}")
811
+ else:
812
+ st.info(f"**LOW:** {label}")
813
+
814
+ observations = package_section.get("key_observations") or []
815
+ if observations:
816
+ st.markdown("**Key observations**")
817
+ for obs in observations:
818
+ st.write(f"- {obs}")
819
+
820
+ # DD analysis section
821
+ dd_section = report.get("dd_analysis")
822
+ if dd_section:
823
+ st.divider()
824
+ st.subheader("Due Diligence analysis")
825
+ st.caption("Contract portfolio analysis from an acquisition / DD perspective.")
826
+
827
+ if dd_section.get("executive_summary"):
828
+ st.markdown("**Executive summary**")
829
+ st.write(dd_section["executive_summary"])
830
+
831
+ red_flags = dd_section.get("top_red_flags") or []
832
+ if red_flags:
833
+ st.markdown("**Top red flags**")
834
+ for flag in red_flags:
835
+ st.error(flag)
836
+
837
+ contracts_list = dd_section.get("contracts") or []
838
+ if contracts_list:
839
+ st.markdown("**Per-contract risk level**")
840
+ for c in contracts_list:
841
+ if hasattr(c, "model_dump"):
842
+ c = c.model_dump()
843
+ level = c.get("risk_level") or c.get("kockazati_szint", "low")
844
+ file_name = c.get("file_name", "")
845
+ contract_type = c.get("contract_type") or c.get("szerzodes_tipusa", "")
846
+ parties = ", ".join(c.get("parties") or c.get("felek") or [])
847
+ label = f"{file_name} ({contract_type})"
848
+ if parties:
849
+ label += f" — Parties: {parties}"
850
+ if level in ("high", "magas"):
851
+ st.error(f"**HIGH:** {label}")
852
+ elif level in ("medium", "kozepes", "közepes"):
853
+ st.warning(f"**MEDIUM:** {label}")
854
+ else:
855
+ st.info(f"**LOW:** {label}")
856
+
857
+ obligations = dd_section.get("total_monthly_obligations") or {}
858
+ if obligations:
859
+ st.markdown("**Monthly obligations (estimated)**")
860
+ obl_cols = st.columns(min(len(obligations), 4))
861
+ for col, (currency, amount) in zip(
862
+ obl_cols, obligations.items(), strict=False
863
+ ):
864
+ col.metric(currency, f"{amount:,.0f}")
865
+
866
+ # JSON view (debug)
867
+ st.divider()
868
+ with st.expander("JSON view (raw)"):
869
+ st.json(report)
870
+
871
+ # Export
872
+ st.subheader("Export")
873
+ col_json, col_docx = st.columns(2)
874
+ with col_json:
875
+ report_json = json.dumps(report, ensure_ascii=False, indent=2, default=str)
876
+ st.download_button(
877
+ label="Download report (JSON)",
878
+ data=report_json,
879
+ file_name=f"report_{datetime.now().strftime('%Y%m%d_%H%M')}.json",
880
+ mime="application/json",
881
+ help="Raw data in JSON form — for machine processing or archival.",
882
+ )
883
+
884
+ with col_docx:
885
+ if st.button("Generate DOCX report", type="primary"):
886
+ try:
887
+ docx_bytes = build_docx_sync(state)
888
+ st.session_state.docx_bytes = docx_bytes
889
+ st.success("DOCX ready — click the download button.")
890
+ except Exception as exc:
891
+ st.error(f"DOCX generation error: {exc}")
892
+ with st.expander("Developer details"):
893
+ st.code(traceback.format_exc(), language="python")
894
+
895
+ if st.session_state.docx_bytes:
896
+ st.download_button(
897
+ label="Download DOCX",
898
+ data=st.session_state.docx_bytes,
899
+ file_name=f"report_{datetime.now().strftime('%Y%m%d_%H%M')}.docx",
900
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
901
+ help="Formatted Word document — for printing, presentations, or client handoff.",
902
+ )
903
+
904
+
905
+ # ---------------------------------------------------------------------------
906
+ # Applied standards footer (dynamic — only the actually triggered standards)
907
+ # ---------------------------------------------------------------------------
908
+
909
+ if st.session_state.pipeline_state:
910
+ _state = st.session_state.pipeline_state
911
+ _risks = _state.get("risks") or []
912
+ if _risks:
913
+ from domain_checks import get_applied_standards
914
+ _standards = get_applied_standards(_risks)
915
+ if _standards:
916
+ st.divider()
917
+ st.caption(
918
+ "**Applied standards and methods:** "
919
+ + " | ".join(_standards)
920
+ )
921
+
922
+
923
+ # ---------------------------------------------------------------------------
924
+ # Footer (MIT-licensed; see LICENSE)
925
+ # ---------------------------------------------------------------------------
926
+
927
+ st.divider()
928
+ st.caption(
929
+ "Built by Team CsimpiCsirkek for the AMD Developer Hackathon × lablab.ai (2026). "
930
+ "MIT licensed — see LICENSE. Powered by LangGraph + Qwen on AMD MI300X."
931
+ )
app/streaming.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Streamlit + asyncio integration helper.
2
+
3
+ Bridges Streamlit (uvloop) and LangGraph (asyncio) via a long-lived background
4
+ event loop (see app/async_runtime.py).
5
+
6
+ ``run_async()`` and ``stream_async()`` are simple wrappers — every call uses
7
+ the same background loop, so persistent resources (ChromaDB, AsyncSqliteSaver,
8
+ sentence-transformers cache) are NOT rebuilt per call.
9
+
10
+ ``run_with_progress()`` produces per-event progress-bar updates from the
11
+ ``astream(stream_mode="updates")`` event stream.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import AsyncIterator
17
+ from typing import Any, Callable
18
+
19
+ from app.async_runtime import AsyncRuntime
20
+
21
+
22
+ def run_async(coro):
23
+ """Sync wrapper: run a coroutine on the long-lived background loop."""
24
+ return AsyncRuntime.get().submit(coro)
25
+
26
+
27
+ def stream_async(async_gen: AsyncIterator[Any]):
28
+ """Async generator → sync iterator (compatible with Streamlit st.write_stream)."""
29
+ yield from AsyncRuntime.get().submit_iter(async_gen)
30
+
31
+
32
+ _PROGRESS_LABEL_MAP = {
33
+ "start_timer": "Starting",
34
+ "ingest_per_doc": "Loading documents",
35
+ "ingest_join": "Loading documents (join)",
36
+ "classify_per_doc": "Classifying",
37
+ "classify_join": "Classifying (join)",
38
+ "extract_per_doc": "Extracting structured data",
39
+ "extract_join": "Extracting (join)",
40
+ "quote_validator": "Quote verification",
41
+ "rag_index_per_doc": "Indexing",
42
+ "rag_join": "Indexing (join)",
43
+ "compare": "Cross-document checks",
44
+ "risk": "Risk analysis",
45
+ "report": "Generating report",
46
+ "finish_timer": "Done",
47
+ }
48
+
49
+
50
+ def run_with_progress(
51
+ graph,
52
+ input_state: dict,
53
+ on_progress: Callable[[int, int, str], None] | None = None,
54
+ total_steps: int | None = None,
55
+ ) -> dict:
56
+ """LangGraph ``astream`` → progress-bar callback + final state.
57
+
58
+ The background event loop drives the async generator; the ``on_progress``
59
+ callback runs on the CALLER thread (Streamlit main thread) after every
60
+ event — so ``st.progress(...)`` widgets can be updated safely.
61
+
62
+ Args:
63
+ graph: a CompiledStateGraph (or anything supporting astream).
64
+ input_state: the graph entry state.
65
+ on_progress: optional callback ``(step, total, label)``. Streamlit
66
+ widget calls are safe here (caller thread).
67
+ total_steps: optional progress-bar denominator.
68
+
69
+ Returns:
70
+ The graph's final state (same as ``ainvoke()``).
71
+ """
72
+
73
+ async def _astream_events():
74
+ """Async generator: split multi-stream-mode into (stream_mode, event) pairs."""
75
+ async for stream_mode, event in graph.astream(
76
+ input_state, stream_mode=["updates", "values"]
77
+ ):
78
+ yield (stream_mode, event)
79
+
80
+ final_state: dict = {}
81
+ step = 0
82
+
83
+ # ``submit_iter`` turns an async iterator into a sync one on the caller thread,
84
+ # so the progress callback runs on the Streamlit main thread.
85
+ for stream_mode, event in AsyncRuntime.get().submit_iter(_astream_events()):
86
+ if stream_mode == "updates":
87
+ for node_name in (event or {}).keys():
88
+ step += 1
89
+ label = _PROGRESS_LABEL_MAP.get(node_name, node_name)
90
+ if on_progress is not None:
91
+ total = total_steps if total_steps is not None else max(step, 12)
92
+ on_progress(step, total, label)
93
+ elif stream_mode == "values":
94
+ if isinstance(event, dict):
95
+ final_state = event
96
+
97
+ return final_state
app/tabs/__init__.py ADDED
File without changes
config.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Central configuration — Pydantic BaseSettings env-bound.
2
+
3
+ Single source of truth: the ``settings = Settings()`` singleton. Every module
4
+ imports this. The ``.env`` file is automatically loaded (python-dotenv) if it
5
+ exists in the project root.
6
+
7
+ Profiles:
8
+ * ``LLM_PROFILE=vllm`` — Qwen 2.5 on AMD MI300X via vLLM (OpenAI-compat). Production default.
9
+ * ``LLM_PROFILE=ollama`` — local Ollama (Qwen 2.5 7B Instruct). Dev / data-privacy.
10
+ * ``LLM_PROFILE=dummy`` — deterministic stub (CI / eval / load).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from pathlib import Path
16
+ from typing import Literal
17
+
18
+ from pydantic import Field, computed_field
19
+ from pydantic_settings import BaseSettings, SettingsConfigDict
20
+
21
+ # Project root absolute path — independent of where we are launched from
22
+ PROJECT_ROOT = Path(__file__).resolve().parent
23
+
24
+
25
+ class Settings(BaseSettings):
26
+ """Full application runtime configuration.
27
+
28
+ Every field reads from .env or env vars, with defaults. If .env does not
29
+ exist, the defaults run.
30
+ """
31
+
32
+ model_config = SettingsConfigDict(
33
+ env_file=PROJECT_ROOT / ".env",
34
+ env_file_encoding="utf-8",
35
+ case_sensitive=False,
36
+ extra="ignore", # don't raise on unknown env vars (e.g. LANGCHAIN_*)
37
+ )
38
+
39
+ # ---------------------------------------------------------------------
40
+ # LLM provider selection
41
+ # ---------------------------------------------------------------------
42
+ llm_profile: Literal["vllm", "ollama", "dummy"] = "vllm"
43
+ """Default LLM profile. Runtime override:
44
+ ``graph.invoke(state, config={"configurable": {"llm_profile": "dummy"}})``."""
45
+
46
+ # vLLM (AMD Developer Cloud MI300X) — production default
47
+ vllm_base_url: str = "http://localhost:8000/v1"
48
+ """vLLM endpoint URL. In production: http://<mi300x-public-ip>:8000/v1"""
49
+
50
+ vllm_model: str = "Qwen/Qwen2.5-14B-Instruct"
51
+ """Model id served by vLLM. Alternatives: Qwen/Qwen2.5-32B-Instruct, Qwen/Qwen2.5-7B-Instruct."""
52
+
53
+ vllm_api_key: str | None = None
54
+ """Optional API key for vLLM. If unset, sent as 'EMPTY' (vLLM no-auth mode).
55
+ In production set a real key and start vLLM with --api-key <key>."""
56
+
57
+ vllm_temperature: float = 0.0
58
+ vllm_max_tokens: int = 4096
59
+
60
+ # Ollama — local fallback
61
+ ollama_base_url: str = "http://localhost:11434"
62
+ ollama_model: str = "qwen2.5:7b-instruct"
63
+ ollama_temperature: float = 0.0
64
+
65
+ # ---------------------------------------------------------------------
66
+ # Embedding model — sentence-transformers, runs locally on CPU
67
+ # ---------------------------------------------------------------------
68
+ embedding_model: str = "BAAI/bge-m3"
69
+ """Default: BAAI/bge-m3 (2.27 GB, 1024 dim, multilingual EN/HU/DE/FR/...).
70
+ Lighter alternative if memory-constrained: BAAI/bge-small-en-v1.5 (133 MB, 384 dim, en-only)."""
71
+
72
+ # ---------------------------------------------------------------------
73
+ # Storage
74
+ # ---------------------------------------------------------------------
75
+ chroma_path: Path = Field(default=PROJECT_ROOT / "chroma_db")
76
+ chroma_collection: str = "documents"
77
+ checkpoint_db_path: Path = Field(default=PROJECT_ROOT / "data" / "checkpoints.sqlite")
78
+
79
+ # ---------------------------------------------------------------------
80
+ # Pipeline tuning
81
+ # ---------------------------------------------------------------------
82
+ chunk_max_chars: int = 15_000
83
+ chunk_overlap_chars: int = 500
84
+ single_call_threshold: int = 30_000
85
+ """If doc.full_text < this many chars, a single LLM call is enough (no chunking)."""
86
+
87
+ # Loop guards
88
+ chat_max_iterations: int = 10
89
+ """Chat agent ↔ tools loop max iterations — infinite-loop guard."""
90
+
91
+ validator_max_retries: int = 2
92
+ """Chat validator → agent retry count when source citations are missing."""
93
+
94
+ dd_supervisor_max_iterations: int = 4
95
+ """DD supervisor max iterations before forced synthesizer fallback."""
96
+
97
+ # ---------------------------------------------------------------------
98
+ # Streamlit
99
+ # ---------------------------------------------------------------------
100
+ streamlit_port: int = 8501
101
+
102
+ # ---------------------------------------------------------------------
103
+ # LangSmith observability (optional)
104
+ # ---------------------------------------------------------------------
105
+ langchain_tracing_v2: bool = False
106
+ langchain_api_key: str | None = None
107
+ langchain_project: str = "document-intelligence-amd"
108
+
109
+ # ---------------------------------------------------------------------
110
+ # Computed fields
111
+ # ---------------------------------------------------------------------
112
+ @computed_field
113
+ @property
114
+ def project_root(self) -> Path:
115
+ return PROJECT_ROOT
116
+
117
+ @computed_field
118
+ @property
119
+ def langsmith_enabled(self) -> bool:
120
+ return self.langchain_tracing_v2 and bool(self.langchain_api_key)
121
+
122
+ @computed_field
123
+ @property
124
+ def is_dummy(self) -> bool:
125
+ return self.llm_profile == "dummy"
126
+
127
+
128
+ # Singleton — every module imports this
129
+ settings = Settings()
data/sanctions_snapshot.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "source": "EU Consolidated Sanctions List + OFAC SDN (snapshot)",
4
+ "date": "2026-04-10",
5
+ "note": "Statikus demo lista -- nem elo API. Frissitendo periodikusan."
6
+ },
7
+ "entities": [
8
+ {
9
+ "name": "Gazprom",
10
+ "country": "RU",
11
+ "type": "entity"
12
+ },
13
+ {
14
+ "name": "Rosneft",
15
+ "country": "RU",
16
+ "type": "entity"
17
+ },
18
+ {
19
+ "name": "Sberbank",
20
+ "country": "RU",
21
+ "type": "entity"
22
+ },
23
+ {
24
+ "name": "VTB Bank",
25
+ "country": "RU",
26
+ "type": "entity"
27
+ },
28
+ {
29
+ "name": "Rostec",
30
+ "country": "RU",
31
+ "type": "entity"
32
+ },
33
+ {
34
+ "name": "Almaz-Antey",
35
+ "country": "RU",
36
+ "type": "entity"
37
+ },
38
+ {
39
+ "name": "Kalashnikov Concern",
40
+ "country": "RU",
41
+ "type": "entity"
42
+ },
43
+ {
44
+ "name": "Russian Direct Investment Fund",
45
+ "country": "RU",
46
+ "type": "entity"
47
+ },
48
+ {
49
+ "name": "Novatek",
50
+ "country": "RU",
51
+ "type": "entity"
52
+ },
53
+ {
54
+ "name": "Sovcomflot",
55
+ "country": "RU",
56
+ "type": "entity"
57
+ },
58
+ {
59
+ "name": "Belaruskali",
60
+ "country": "BY",
61
+ "type": "entity"
62
+ },
63
+ {
64
+ "name": "Belneftekhim",
65
+ "country": "BY",
66
+ "type": "entity"
67
+ },
68
+ {
69
+ "name": "National Iranian Oil Company",
70
+ "country": "IR",
71
+ "type": "entity"
72
+ },
73
+ {
74
+ "name": "Bank Melli Iran",
75
+ "country": "IR",
76
+ "type": "entity"
77
+ },
78
+ {
79
+ "name": "Bank Saderat Iran",
80
+ "country": "IR",
81
+ "type": "entity"
82
+ },
83
+ {
84
+ "name": "Korea Mining Development Trading Corporation",
85
+ "country": "KP",
86
+ "type": "entity"
87
+ },
88
+ {
89
+ "name": "Commercial Bank of Syria",
90
+ "country": "SY",
91
+ "type": "entity"
92
+ },
93
+ {
94
+ "name": "Volga Industrial Holdings",
95
+ "country": "RU",
96
+ "type": "entity"
97
+ }
98
+ ],
99
+ "high_risk_countries": [
100
+ "RU",
101
+ "BY",
102
+ "IR",
103
+ "KP",
104
+ "SY",
105
+ "CU",
106
+ "VE",
107
+ "PA",
108
+ "VG",
109
+ "KY",
110
+ "BZ",
111
+ "SC",
112
+ "VU"
113
+ ]
114
+ }
docker-compose.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ # ---------------------------------------------------------------------------
3
+ # Streamlit + LangGraph backend
4
+ # ---------------------------------------------------------------------------
5
+ langgraph-app:
6
+ build: .
7
+ image: paperhawk:latest
8
+ container_name: document-intelligence-amd
9
+ ports:
10
+ - "8501:8501"
11
+ env_file:
12
+ - .env
13
+ environment:
14
+ # Default vLLM — overridable via .env or shell export
15
+ - LLM_PROFILE=${LLM_PROFILE:-vllm}
16
+ - VLLM_BASE_URL=${VLLM_BASE_URL:-http://localhost:8000/v1}
17
+ - VLLM_MODEL=${VLLM_MODEL:-Qwen/Qwen2.5-14B-Instruct}
18
+ - OLLAMA_BASE_URL=http://ollama:11434
19
+ volumes:
20
+ # AsyncSqliteSaver checkpointer persists across restarts
21
+ - ./data:/app/data
22
+ # ChromaDB persistent vector store
23
+ - ./chroma_db:/app/chroma_db
24
+ depends_on:
25
+ ollama:
26
+ condition: service_healthy
27
+ required: false
28
+ restart: unless-stopped
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Ollama LLM server (OPTIONAL profile — local dev fallback)
32
+ # ---------------------------------------------------------------------------
33
+ # Start: docker compose --profile ollama up -d
34
+ # Model: docker compose exec ollama ollama pull qwen2.5:7b-instruct
35
+ ollama:
36
+ image: ollama/ollama:latest
37
+ container_name: document-intelligence-amd-ollama
38
+ profiles: ["ollama"]
39
+ ports:
40
+ - "11434:11434"
41
+ volumes:
42
+ - ollama_models:/root/.ollama
43
+ healthcheck:
44
+ test: ["CMD", "ollama", "list"]
45
+ interval: 10s
46
+ timeout: 5s
47
+ retries: 10
48
+ start_period: 30s
49
+ restart: unless-stopped
50
+
51
+ volumes:
52
+ ollama_models:
docs/HF_SPACE_DEFAULT_GETTING_STARTED.md ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HF Space Default Getting Started — Snapshot 2026-05-05
2
+
3
+ A `lablab-ai-amd-developer-hackathon/paperhawk` Space létrehozása után a HF Spaces egy default "Get Started" útmutatót mutat. Ezt mentjük el itt referenciaként, mert a default Dockerfile-mintája hasznos referencia a paperhawk Dockerfile átírásához (port 8501 → 7860, user-setup pattern).
4
+
5
+ **Forrás**: a Space oldal alján, a default-README után jelent meg.
6
+
7
+ **URL**: https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk
8
+
9
+ **Kontextus**: a Space frissen létrehozva, Docker SDK + Blank template + `Real-DI-Audit/14 rules/6 anti-halluc/LangGraph/Qwen/MI300X` short description.
10
+
11
+ ---
12
+
13
+ ## Get started with your Docker Space!
14
+
15
+ Your space has been created, follow these steps to get started (or read the full [documentation](https://huggingface.co/docs/hub/spaces-sdks-docker))
16
+
17
+ ### Start by cloning this repo by using:
18
+
19
+ **HTTPS:**
20
+
21
+ ```bash
22
+ git clone https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk
23
+ ```
24
+
25
+ **SSH:**
26
+
27
+ ```bash
28
+ git clone git@hf.co:spaces/lablab-ai-amd-developer-hackathon/paperhawk
29
+ ```
30
+
31
+ ### Make sure you're CLI v2.x.x or above:
32
+
33
+ ```bash
34
+ curl -LsSf https://hf.co/cli/install.sh | sh
35
+ ```
36
+
37
+ ### Download the Space:
38
+
39
+ ```bash
40
+ hf download lablab-ai-amd-developer-hackathon/paperhawk --repo-type=space
41
+ ```
42
+
43
+ ---
44
+
45
+ ## Let's create a simple Python app using FastAPI
46
+
47
+ ### `requirements.txt`
48
+
49
+ ```
50
+ fastapi
51
+ uvicorn[standard]
52
+ ```
53
+
54
+ > **Hint:** You can also create the requirements file directly in your browser.
55
+
56
+ ### `app.py`
57
+
58
+ ```python
59
+ from fastapi import FastAPI
60
+
61
+ app = FastAPI()
62
+
63
+ @app.get("/")
64
+ def greet_json():
65
+ return {"Hello": "World!"}
66
+ ```
67
+
68
+ > **Hint:** You can also create the app file directly in your browser.
69
+
70
+ ---
71
+
72
+ ## Create your Dockerfile
73
+
74
+ ```dockerfile
75
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
76
+ # you will also find guides on how best to write your Dockerfile
77
+
78
+ FROM python:3.9
79
+
80
+ RUN useradd -m -u 1000 user
81
+ USER user
82
+ ENV PATH="/home/user/.local/bin:$PATH"
83
+
84
+ WORKDIR /app
85
+
86
+ COPY --chown=user ./requirements.txt requirements.txt
87
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
88
+
89
+ COPY --chown=user . /app
90
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
91
+ ```
92
+
93
+ > **Hint:** Alternatively, you can create the Dockerfile file directly in your browser.
94
+
95
+ ---
96
+
97
+ ## Then commit and push
98
+
99
+ ```bash
100
+ git add requirements.txt app.py Dockerfile
101
+ git commit -m "Add application file"
102
+ git push
103
+ ```
104
+
105
+ > Finally, your Space should be running on this page after a few moments!
106
+
107
+ ---
108
+
109
+ ## App port
110
+
111
+ > Your Docker Space needs to listen on port `7860`.
112
+
113
+ ## Personalize your Space
114
+
115
+ Make your Space stand out by customizing its emoji, colors, and description by **editing metadata** in its `README.md` file.
116
+
117
+ ## Documentation
118
+
119
+ Read the full documentation for Docker Spaces [here](https://huggingface.co/docs/hub/spaces-sdks-docker).
120
+
121
+ ---
122
+
123
+ ## Mit jelent ez nekünk (paperhawk-specifikus megjegyzések)
124
+
125
+ ### A default Dockerfile vs a paperhawk Dockerfile
126
+
127
+ A paperhawk meglévő Dockerfile-ja **fejlettebb** mint a default-példa:
128
+
129
+ | Aspektus | HF default | Paperhawk |
130
+ |---|---|---|
131
+ | Python version | `python:3.9` | `python:3.12-slim` (modernebb) |
132
+ | User setup | `useradd -m -u 1000 user` + `USER user` (non-root, security best-practice) | NINCS (root user) |
133
+ | OS-deps | nincs | `tesseract-ocr` + `poppler-utils` + `libmupdf-dev` (PDF + OCR) |
134
+ | Pre-download | nincs | `BAAI/bge-m3` 2.27 GB (build-time) |
135
+ | App | `uvicorn` FastAPI | `streamlit` |
136
+ | Port | **`7860`** | **`8501`** → **átírva 7860-ra a HF Space-nek** (2026-05-05) |
137
+
138
+ ### A 2 fő átírás amit a paperhawk Dockerfile-on csinálni kellett
139
+
140
+ 1. **Port-átállítás 8501 → 7860** (kész, 2026-05-05):
141
+ - `EXPOSE 8501` → `EXPOSE 7860`
142
+ - `--server.port=8501` → `--server.port=7860`
143
+ - `HEALTHCHECK ... http://localhost:8501/_stcore/health` → `http://localhost:7860/_stcore/health`
144
+
145
+ 2. **(opcionális) User-setup hozzáadása** security best-practice szempontból:
146
+ - `RUN useradd -m -u 1000 user`
147
+ - `USER user`
148
+ - `ENV PATH="/home/user/.local/bin:$PATH"`
149
+ - `COPY --chown=user ...`
150
+ - **A HF Spaces NEM követeli kötelező módon**, és a paperhawk-stack root-ként is jól fut.
151
+
152
+ ### A README.md front-matter
153
+
154
+ A HF Spaces megköveteli a `README.md` tetején egy YAML front-matter-t. A paperhawk `README.md` tetejére beillesztve (2026-05-05):
155
+
156
+ ```yaml
157
+ ---
158
+ title: PaperHawk
159
+ emoji: 🦅
160
+ colorFrom: red
161
+ colorTo: orange
162
+ sdk: docker
163
+ pinned: false
164
+ license: mit
165
+ short_description: Real-DI-Audit/14 rules/6 anti-halluc/LangGraph/Qwen/MI300X
166
+ ---
167
+ ```
168
+
169
+ A meglévő paperhawk `README.md`-tartalom (project README) ezután következik. A front-matter csak a HF Space-nek szól, GitHub-on is renderelhető (a YAML-t code-block-ként mutatja).
170
+
171
+ ### A clone + push workflow a paperhawk-on
172
+
173
+ A meglévő paperhawk GitHub-repón (`nandorfivince/paperhawk`) hozzáadunk egy új remote-ot:
174
+
175
+ ```bash
176
+ cd ~/development/<host-paperhawk-path>
177
+ git remote add space https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk
178
+ git push space main
179
+ ```
180
+
181
+ A push első futáskor authenticálni kér — a HF Hub-token-t kéri, amit a Vincsipe accountból lehet generálni a https://huggingface.co/settings/tokens-en (új Token, "Write" scope).
182
+
183
+ ### App port környezeti változó
184
+
185
+ A HF Spaces a `7860`-as portot várja default. A paperhawk `streamlit` parancs ki van egészítve a `--server.port=7860` flag-gel a `Dockerfile`-ben (2026-05-05).
186
+
187
+ ### HF Spaces hardware
188
+
189
+ CPU Basic = free tier, 16 GB RAM, 2 vCPU. Bőven elég a paperhawk-Streamlit-jéhez (~3-5 GB RAM-fogyasztás bge-m3 + ChromaDB + Streamlit). A vLLM az AMD MI300X-en fut **külön**, a Space `VLLM_BASE_URL` Secret-en keresztül hivatkozik rá.
190
+
191
+ ### Sleep mode
192
+
193
+ A free Space 48 órás inaktivitás után alvó-módba kerül. Az első request a felébredés után 30-60 sec. A bíráskodás alatt érdemes **periodikusan** pingelni a Space-t (pl. UptimeRobot 30 perces intervallum), vagy a Build-in-Public posztokon megosztani hogy organic-traffic-al ébren tartsuk.
docs/SUBMISSION.md ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PaperHawk — Hackathon Submission Brief
2
+
3
+ > One-pager for the **AMD Developer Hackathon × lablab.ai** (May 2026) submission form.
4
+ > Every section below is ready to paste directly into the lablab.ai project page.
5
+
6
+ ---
7
+
8
+ ## Project Title
9
+
10
+ **PaperHawk**
11
+
12
+ ---
13
+
14
+ ## Short Description
15
+
16
+ > Multi-agent document intelligence that catches what RAG misses. 14 deterministic domain checks, 5+1 anti-hallucination layers, and a 5-tool agentic chat — running Qwen 2.5 on AMD Instinct MI300X via vLLM. Open source, MIT licensed.
17
+
18
+ *(247 characters)*
19
+
20
+ ---
21
+
22
+ ## Long Description
23
+
24
+ ### The Problem
25
+
26
+ RAG retrieves passages. Audit finds inconsistencies. Today's RAG chatbots can't do the second.
27
+
28
+ When someone opens a folder of 25 invoices, three contracts, two purchase orders, and a financial report, they don't ask a chatbot to summarize the contract. They ask: *"Does the supplier in Invoice #7 match the vendor in PO #3? Is the VAT rate consistent across the package? Is there a hidden change-of-control clause? Is the math on the gross total correct? Are any of these counterparties on the EU/OFAC sanctions list?"*
29
+
30
+ These are not retrieval questions. They are **reasoning, validation, and cross-reference** questions over multiple typed documents. A standard chunk-embed-retrieve-generate pipeline cannot answer them, because the question is not contained in any single chunk. It lives in the relationship between documents.
31
+
32
+ PaperHawk is built specifically for this gap.
33
+
34
+ ### What We Built
35
+
36
+ PaperHawk is a LangGraph 0.6-native system with **4 compiled graphs** (pipeline, chat, DD assistant, package insights) wired together with **Send-API parallelism**, an `AsyncSqliteSaver` checkpointer, and a `configurable_alternatives` provider that swaps cleanly between vLLM (production), Ollama (local dev), and a deterministic dummy (CI). It is not a single-agent retrieval pipeline.
37
+
38
+ Concretely:
39
+
40
+ - **6 reusable subgraphs** for ingest, classification, extraction, risk dispatch, LLM risk ensemble, and chat tool routing
41
+ - **14 deterministic domain checks** wired into a registry — ISA 240/500/320 (audit standards), GDPR Article 28, Incoterms 2020, AML sanctions, tax-ID validation, contract completeness, materiality thresholds, and more. Every check is a Python `Protocol` implementation, not an LLM prompt.
42
+ - **5+1 anti-hallucination layers**: `temperature=0`, a `_quotes` field for verbatim source citation, `_confidence` per extracted field, plausibility validators, a 3-layer LLM-risk filter chain, and a quote validator that drops any LLM output whose claimed source quote isn't found in the document.
43
+ - **5-tool agentic chat** (`list_documents`, `get_extraction`, `search_documents`, `compare_documents`, `validate_document`) with strict `[Source: filename.pdf]` citations validated by a post-processor — answers without provenance never reach the user.
44
+ - **Multi-agent DD assistant**: 4 specialist agents (audit / legal / compliance / financial) coordinated by a supervisor and a synthesizer, in the spirit of the LangGraph supervisor cookbook but production-shaped.
45
+ - **Streamlit 5-tab UI**: Upload, Results, Chat, DD Assistant, Report — drivable in 30 seconds with three pre-bundled demo packages.
46
+
47
+ The codebase ships with **61 tests passing in CI** without any LLM (the deterministic dummy provider), is MIT licensed, and is English-first with a multilingual fallback path for EN/HU/DE inputs.
48
+
49
+ ### Why AMD Instinct MI300X
50
+
51
+ The MI300X gives us **192 GB of HBM3 memory** in a single accelerator — enough headroom to host Qwen 2.5 14B Instruct in BF16 with comfortable KV-cache space for our long agentic conversations. The DD supervisor plus four specialists in one session easily exceeds 32k tokens of context, and the MI300X handles it without paging.
52
+
53
+ vLLM's continuous batching on ROCm lets the Streamlit UI fire concurrent requests during a multi-document upload without queueing artifacts. The FP8 / BF16 paths supported by the MI300X memory bandwidth open a clean upgrade route to Qwen 2.5 32B for finals night.
54
+
55
+ We're using the AMD Developer Cloud — `infra/vllm/Dockerfile` and `infra/vllm/serve.sh` are committed in the repo and start vLLM with `--api-key`, `--max-model-len 32768`, and a configurable model tag. The whole inference stack is containerized; nothing is hand-rolled on the GPU node.
56
+
57
+ ### Why Qwen 2.5 Instruct
58
+
59
+ Three reasons.
60
+
61
+ First, **strong tool calling**. Qwen 2.5 14B handles our 5-tool chat router reliably; tool-routing accuracy in our integration tests is on par with the proprietary reference model we used in early development. The tool-call JSON is well-formed, parameters are typed correctly, and unnecessary tool calls are rare.
62
+
63
+ Second, **structured output that holds**. `with_structured_output` returns valid Pydantic v2 JSON every time in our extraction subgraph, including the nested `_quotes` and `_confidence` fields. This is where many smaller open-source models fail under load — Qwen 2.5 doesn't.
64
+
65
+ Third, **multilingual fluency**. Our pipeline often reads Hungarian, German, and English documents in the same package, and Qwen handles cross-lingual extraction without dropping accuracy. We don't fine-tune; we pull `Qwen/Qwen2.5-14B-Instruct` from Hugging Face directly into the vLLM container — clean, reproducible, and rerunnable by anyone.
66
+
67
+ ### The Pipeline (5-Step End-to-End)
68
+
69
+ 1. **Ingest** — PDF, DOCX, and image inputs go through three loaders. Scanned PDFs hit a vision-first fallback (the LLM reads the rendered page directly); native PDFs use PyMuPDF + pdfplumber for table-aware extraction; DOCX is parsed natively.
70
+ 2. **Classify** — A 6-way doc-type classifier (`invoice`, `delivery_note`, `purchase_order`, `contract`, `financial_report`, `other`) with structured output, calibrated for ISA 500 evidence-quality scoring.
71
+ 3. **Extract** — Per doc-type Pydantic schema, with a universal extraction subgraph as a fallback for unknown types. Every extracted field carries its own `_quotes` and `_confidence` — anti-hallucination is built into the type system, not a post-hoc check.
72
+ 4. **Cross-reference** — Three-way matching (invoice + delivery note + purchase order) for audit packages; multi-agent synthesis for DD packages; package-level analyzers for duplicate-invoice detection (ISA 240) and pricing anomalies.
73
+ 5. **Risk + Report** — Plausibility checks + 14 domain checks (deterministic, parallel via Send fan-out) + LLM risk ensemble + 3-layer filter that drops repeats, business-normal flags, and unsupported claims. Final output: a ranked risk list with severity, regulation source, and source citations; a downloadable DOCX report; structured JSON for API consumers.
74
+
75
+ ### Anti-Hallucination Is Non-Negotiable
76
+
77
+ The system is designed so the LLM cannot lie about a document and have the lie pass through.
78
+
79
+ Every LLM-generated extraction includes a `_quotes` array with the verbatim text the model cites as source. A post-processor scans each quote against the document body. If the quote isn't there, the field is rejected — period. The 3-layer LLM-risk filter rejects any risk claim whose quoted evidence isn't in the package, repeats a finding from the deterministic domain checks, or describes a normal business condition.
80
+
81
+ This isn't a guardrail layer slapped on top — it's the trust contract between the model and the user, and it runs on every output. The `validation/` package is one of the most-edited folders in the repo precisely because we treat it as a first-class concern, not an afterthought.
82
+
83
+ ### Demo Packages
84
+
85
+ Three pre-built scenarios are bundled in `test_data/demo_packages/`. Each is a one-click demo from the Upload tab:
86
+
87
+ - **Audit Demo** — Three invoices from the same supplier; the March one is 50% pricier than January and February. The package-level analyzer flags it as an over-billing pattern, and the chat answers *"Why is the March invoice more expensive?"* with cited line items.
88
+ - **DD Demo** — An NDA, a service agreement, and an amendment in an acquisition scenario. The DD assistant flags a hidden change-of-control trigger and an automatic-renewal red flag, and the synthesizer writes an executive summary in three paragraphs.
89
+ - **Compliance Demo** — Two contracts; one is missing GDPR Article 28 sub-processor language. Domain check #8 detects it, and the report includes the exact regulatory citation.
90
+
91
+ End-to-end demo time on AMD MI300X: **30–90 seconds** per package.
92
+
93
+ ### Track 1 + Build in Public + Hugging Face Special Prize
94
+
95
+ **Track 1 — AI Agents & Agentic Workflows** is our primary submission. The track brief asks for projects that "move beyond simple RAG to build sophisticated AI agentic systems and workloads." PaperHawk fits the brief: 4 compiled graphs, 6 subgraphs, multi-agent DD orchestration, 5-tool agentic chat, and a registry-based deterministic check fabric. None of this is retrieval-only. The chat *is* an agent; the DD assistant is a multi-agent system; the pipeline is a typed-state orchestration.
96
+
97
+ **Ship It + Build in Public** is a natural cross-track fit. The repo is MIT licensed and public on GitHub. We're publishing a technical walkthrough and at least two updates on X / LinkedIn — tagging `@AIatAMD` and `@lablab` — covering two design choices that don't usually appear in hackathon RAG demos: the LangGraph Send-API parallelism for the deterministic check fan-out, and the post-hoc citation validator for the chat tool outputs.
98
+
99
+ **Hugging Face Special Prize**: deployed as a Streamlit Space under the `lablab-ai-amd-developer-hackathon` organization. Public, runnable in the browser, no signup required. The Space carries the same `paperhawk.jpeg` cover and points at our vLLM endpoint; visitors can drive the three demo packages from the front page.
100
+
101
+ One codebase, one MIT license, three prize pools.
102
+
103
+ ### Tech Stack
104
+
105
+ | Layer | Choice |
106
+ |---|---|
107
+ | **Orchestration** | LangGraph 0.6 (4 compiled graphs, 6 subgraphs, AsyncSqliteSaver) |
108
+ | **LLM** | Qwen 2.5 14B Instruct on vLLM (AMD Instinct MI300X, ROCm) |
109
+ | **Embedding** | BAAI/bge-m3 (multilingual, 1024-dim, sentence-transformers) |
110
+ | **Retrieval** | ChromaDB + BM25 hybrid with Reciprocal Rank Fusion |
111
+ | **Schemas** | Pydantic v2 with field aliases for the `_quotes` JSON contract |
112
+ | **UI** | Streamlit 5-tab + async runtime + long-lived background event loop |
113
+ | **Deploy** | Hugging Face Spaces (Streamlit SDK) + AMD Developer Cloud (vLLM container) |
114
+ | **Testing** | pytest 8 (61 PASS in CI without any LLM), Playwright UI smoke tests |
115
+ | **License** | MIT |
116
+
117
+ ### Built By
118
+
119
+ **Team CsimpiCsirkek**:
120
+
121
+ - **Vince Nándorfi** — Lead, LangGraph architecture, AMD adaptation
122
+ - **Tamás Vitai**
123
+ - **Gábor Murcsik**
124
+
125
+ ---
126
+
127
+ ## Technology & Category Tags
128
+
129
+ `agentic-ai` · `multi-agent` · `langgraph` · `qwen` · `amd-mi300x` · `vllm` · `rocm` · `huggingface-spaces` · `document-intelligence` · `streamlit` · `python` · `mit-license`
130
+
131
+ ---
132
+
133
+ ## Tracks Targeted
134
+
135
+ | Track / Prize | Status | Rationale |
136
+ |---|---|---|
137
+ | **Track 1 — AI Agents & Agentic Workflows** | Primary submission | Multi-agent system, 4 compiled graphs, 6 subgraphs, 5-tool agentic chat — well past the "simple RAG" line |
138
+ | **Ship It + Build in Public** | Cross-track | MIT-licensed public GitHub repo + technical walkthrough + ≥2 social posts tagging `@AIatAMD` and `@lablab` |
139
+ | **Hugging Face Special Prize** | Special category | Streamlit Space published under the `lablab-ai-amd-developer-hackathon` HF organization |
140
+
141
+ ---
142
+
143
+ ## Submission Checklist
144
+
145
+ | Item | Status | Notes |
146
+ |---|---|---|
147
+ | Project Title | DONE | `PaperHawk` |
148
+ | Short Description | DONE | 247 characters, A+C blend |
149
+ | Long Description | DONE | 10 sections, builder-energy tone |
150
+ | Cover Image | DONE | `paperhawk.jpeg` (2048 × 819 px) |
151
+ | Technology & Category Tags | DONE | 12 tags |
152
+ | Public GitHub Repository | DONE | `github.com/nandorfivince/paperhawk` |
153
+ | Video Presentation | TODO | Demo walkthrough video |
154
+ | Slide Presentation | TODO | 5–8 slide deck |
155
+ | Demo Application URL | TODO | HF Space public URL |
156
+ | HF Space URL | TODO | Under `lablab-ai-amd-developer-hackathon` org |
157
+
158
+ ---
159
+
160
+ ## Submission URLs (filled at submission time)
161
+
162
+ - **GitHub repo**: https://github.com/nandorfivince/paperhawk
163
+ - **Hugging Face Space**: *(to be added)*
164
+ - **Demo video**: *(to be added)*
165
+ - **Slide deck**: *(to be added)*
166
+ - **Live application URL**: *(same as HF Space URL)*
167
+
168
+ ---
169
+
170
+ *This document is the canonical submission brief. Paste sections directly into the lablab.ai project page when filing the submission.*
docs/hf-space-deployment.md ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Space deployment
2
+
3
+ The Streamlit app deploys to a **Hugging Face Space** under the
4
+ `lablab-ai-amd-developer-hackathon` organization. This is **mandatory** for
5
+ the Hugging Face Special Prize and convenient as the public demo URL.
6
+
7
+ ## 1. Prerequisites
8
+
9
+ - Hugging Face account
10
+ - Membership in the **AMD Developer Hackathon** HF organization
11
+ ([join here](https://huggingface.co/login?next=%2Forganizations%2Flablab-ai-amd-developer-hackathon%2Fshare%2FELARrxoRIHvseSHRhANJYFEZQazsQIYhJf))
12
+ - A running vLLM endpoint on the AMD MI300X (see `qwen-vllm-deployment.md`)
13
+
14
+ ## 2. Create the Space
15
+
16
+ 1. Hugging Face → Spaces → New Space
17
+ 2. Owner: `lablab-ai-amd-developer-hackathon`
18
+ 3. Space name: `paperhawk`
19
+ 4. License: MIT
20
+ 5. SDK: **Streamlit**
21
+ 6. Hardware: **CPU basic** (free) — vLLM runs on MI300X, the Space only hosts the UI
22
+
23
+ ## 3. Push the code
24
+
25
+ ```bash
26
+ git remote add space https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk
27
+ git push space main
28
+ ```
29
+
30
+ The Space auto-builds from the repo using `requirements.txt` and runs
31
+ `app.py` (or, in our layout, configures Streamlit to start `app/main.py`).
32
+
33
+ ## 4. Set Space env vars
34
+
35
+ In the Space → Settings → Variables and secrets, add:
36
+
37
+ ```
38
+ LLM_PROFILE=vllm
39
+ VLLM_BASE_URL=http://<mi300x-public-ip>:8000/v1
40
+ VLLM_MODEL=Qwen/Qwen2.5-14B-Instruct
41
+ VLLM_API_KEY=<the api key you set on the vLLM server>
42
+ EMBEDDING_MODEL=BAAI/bge-m3
43
+ ```
44
+
45
+ Mark `VLLM_API_KEY` as a **secret** (not a regular variable).
46
+
47
+ ## 5. Space front-matter
48
+
49
+ Edit the `README.md` to start with the HF Spaces front-matter:
50
+
51
+ ```yaml
52
+ ---
53
+ title: Document Intelligence (AMD Edition)
54
+ emoji: 🔍
55
+ colorFrom: red
56
+ colorTo: yellow
57
+ sdk: streamlit
58
+ sdk_version: 1.40.0
59
+ app_file: app/main.py
60
+ pinned: false
61
+ license: mit
62
+ short_description: Multi-document due diligence with LangGraph + Qwen on AMD MI300X
63
+ tags:
64
+ - langgraph
65
+ - agentic
66
+ - rag
67
+ - qwen
68
+ - amd
69
+ - document-intelligence
70
+ ---
71
+ ```
72
+
73
+ (The current README.md is the project README; this front-matter goes on top
74
+ when the repo is mirrored to the HF Space.)
75
+
76
+ ## 6. Verify the Space
77
+
78
+ After the build finishes (~3-5 minutes):
79
+
80
+ 1. Open `https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/paperhawk`
81
+ 2. Click the **Audit Demo** button → it should run end-to-end and produce
82
+ risks + a report.
83
+ 3. Open the **Chat** tab → ask a question → the answer should include
84
+ `[Source: filename.pdf]` citations.
85
+
86
+ ## 7. Resource tier
87
+
88
+ The free CPU basic tier (16 GB RAM, 2 vCPU) handles:
89
+
90
+ - BGE-m3 embedding (~2.3 GB on first load)
91
+ - ChromaDB (small index)
92
+ - Streamlit UI
93
+
94
+ The vLLM model runs on the MI300X, **not** here. The Space just renders the
95
+ UI and proxies requests to the vLLM endpoint.
96
+
97
+ If the free tier is too tight on memory, upgrade to **CPU upgrade** ($0.03/h).
98
+
99
+ ## 8. Sleep mode mitigation
100
+
101
+ A free Space sleeps after 48 hours of inactivity. The first request after
102
+ sleep takes ~30-60 seconds to wake. Mitigations:
103
+
104
+ - Share the Space link in your Build-in-Public posts → continuous traffic →
105
+ less likely to sleep.
106
+ - Set up a 30-minute external ping (e.g. UptimeRobot) the day before
107
+ judging.
108
+
109
+ ## 9. The HF Special Prize is like-driven
110
+
111
+ Once the Space is live:
112
+
113
+ 1. Share the URL on X / LinkedIn (tag `@lablab` and `@AIatAMD`).
114
+ 2. Ask your followers to like the Space.
115
+ 3. The Space with the most likes at the end of the hackathon wins:
116
+ - 1st: Reachy Mini Wireless robot + 6 months HF PRO + $500 HF credit
117
+ - 2nd: 3 months HF PRO + $300 credit
118
+ - 3rd: 2 months HF PRO + $200 credit
119
+
120
+ ## 10. Submission to lablab
121
+
122
+ When submitting on lablab.ai, paste the Space URL into the **Application
123
+ URL** and **Hugging Face Space link** fields. This is mandatory for the HF
124
+ prize qualification.
docs/qwen-vllm-deployment.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Qwen on AMD MI300X — vLLM deployment
2
+
3
+ This guide covers the production deployment path: running Qwen 2.5 Instruct
4
+ (14B or 32B) via [vLLM](https://github.com/vllm-project/vllm) on an
5
+ **AMD Instinct MI300X** through the AMD Developer Cloud, with the Streamlit
6
+ app calling the vLLM endpoint over the OpenAI-compatible REST API.
7
+
8
+ For the canonical step-by-step (including the docker run command and a
9
+ benchmark table), see [`infra/vllm/README.md`](../infra/vllm/README.md).
10
+
11
+ ## Why this stack?
12
+
13
+ - **Open source LLM** — Qwen 2.5 is Apache-2 licensed; safe for the MIT
14
+ open-source license here, and a partner-prize bonus on the hackathon.
15
+ - **Multilingual** — Qwen 2.5 handles HU/DE/EN well, which matters for our
16
+ multilingual demo data.
17
+ - **AMD-native** — vLLM has a ROCm build (`rocm/vllm:latest`) optimized for
18
+ the MI300X. No CUDA, no NVIDIA dependency.
19
+ - **OpenAI-compatible API** — `langchain-openai`'s `ChatOpenAI` adapter
20
+ works out of the box with a custom `base_url`. Tool-calling, structured
21
+ output, and streaming all behave the same as the public OpenAI endpoint.
22
+ - **No vendor lock-in** — the same code runs against Ollama (locally) and
23
+ against any OpenAI-compatible inference server.
24
+
25
+ ## Cost monitoring
26
+
27
+ AMD Developer Cloud pricing (May 2026 ballpark):
28
+
29
+ - ~$4-8/hour pay-as-you-go for an MI300X instance.
30
+ - Each team member gets `$100` in cloud credits → 60 hours of MI300X uptime
31
+ at $5/h. With 3 team members, ~180 hours total.
32
+
33
+ **Discipline:**
34
+
35
+ 1. Only run during demo / test / build sessions; **stop the instance when
36
+ idle**.
37
+ 2. Keep one teammate's credit untouched as a final-day buffer.
38
+ 3. Run end-to-end smoke tests early — a hot fix on deadline day burns hours
39
+ you can't get back.
40
+
41
+ ## Plan B: Ollama fallback
42
+
43
+ If the AMD credit doesn't arrive in time, or the MI300X has a network issue
44
+ on demo day:
45
+
46
+ ```bash
47
+ LLM_PROFILE=ollama OLLAMA_MODEL=qwen2.5:7b-instruct streamlit run app/main.py
48
+ ```
49
+
50
+ Pull the model first:
51
+
52
+ ```bash
53
+ ollama pull qwen2.5:7b-instruct
54
+ ```
55
+
56
+ Quality drops (7B vs 14B/32B), but the demo flow stays alive on a laptop
57
+ GPU or even CPU.
58
+
59
+ ## Production hardening (post-hackathon)
60
+
61
+ For an actual production deployment beyond the hackathon scope:
62
+
63
+ - TLS termination (Caddy / Nginx in front of vLLM)
64
+ - API-key rotation (`--api-key` flag with a periodic rotation script)
65
+ - Prometheus + Grafana on vLLM `/metrics`
66
+ - `--quantization fp8` to fit a larger model on smaller hardware
67
+ - `--enable-prefix-caching` for repeated long system prompts
68
+ - Multi-GPU / multi-region scaling via SkyPilot or vLLM Production Stack
docs/slides/01_cover.png ADDED

Git LFS Details

  • SHA256: 9a7cc84b3ee3d544e006e461bc135a0708e44a57e789400ac4f3ffa9a788c8c3
  • Pointer size: 131 Bytes
  • Size of remote file: 179 kB
docs/slides/PaperHawk_Slides.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:302aa5982d5cace9bd4e154d97d5feabe3ded8c42fffdaa61857d8aaec89d492
3
+ size 1328878
docs/slides/PaperHawk_Slides.pptx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba00be30282e781f49d84117bcebbaa584a02ef1725eee714944ff0468e09dc1
3
+ size 771365
docs/slides/README.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PaperHawk — Slide Deck
2
+
3
+ The 10-slide deck for the AMD Developer Hackathon × lablab.ai submission.
4
+
5
+ - **Source**: `slides.html` (single self-contained HTML, ~1100 lines, no JS, no external assets except the repo's `paperhawk.jpeg`)
6
+ - **Format**: 16:9 landscape (1280 × 720 px per slide)
7
+ - **Palette**: AMD red `#ED1C24` + AMD orange `#FB6624` + PaperHawk black `#1A1A1A` + Qwen purple `#7C3AED` accent
8
+ - **Typography**: Inter (Google Fonts), JetBrains Mono for code/labels
9
+ - **License**: MIT (same as the repo)
10
+
11
+ ## Render to PDF (Playwright)
12
+
13
+ ```bash
14
+ # One-time setup
15
+ pip install playwright
16
+ playwright install chromium
17
+
18
+ # Render slides.html → PaperHawk_Slides.pdf
19
+ python - <<'PY'
20
+ import asyncio
21
+ from pathlib import Path
22
+ from playwright.async_api import async_playwright
23
+
24
+ async def main():
25
+ src = Path("docs/slides/slides.html").resolve().as_uri()
26
+ out = Path("docs/slides/PaperHawk_Slides.pdf")
27
+ async with async_playwright() as p:
28
+ browser = await p.chromium.launch()
29
+ page = await browser.new_page(viewport={"width": 1280, "height": 720})
30
+ await page.goto(src, wait_until="networkidle")
31
+ await page.pdf(
32
+ path=str(out),
33
+ width="1280px",
34
+ height="720px",
35
+ print_background=True,
36
+ margin={"top": "0", "right": "0", "bottom": "0", "left": "0"},
37
+ )
38
+ await browser.close()
39
+
40
+ asyncio.run(main())
41
+ print("Wrote", "docs/slides/PaperHawk_Slides.pdf")
42
+ PY
43
+ ```
44
+
45
+ ## Render the cover slide as PNG (HF Space hero)
46
+
47
+ ```bash
48
+ python - <<'PY'
49
+ import asyncio
50
+ from pathlib import Path
51
+ from playwright.async_api import async_playwright
52
+
53
+ async def main():
54
+ src = Path("docs/slides/slides.html").resolve().as_uri()
55
+ out = Path("docs/slides/01_cover.png")
56
+ async with async_playwright() as p:
57
+ browser = await p.chromium.launch()
58
+ page = await browser.new_page(viewport={"width": 1280, "height": 720})
59
+ await page.goto(src, wait_until="networkidle")
60
+ # Screenshot the first .slide element only.
61
+ cover = page.locator(".slide").first
62
+ await cover.screenshot(path=str(out), omit_background=False)
63
+ await browser.close()
64
+
65
+ asyncio.run(main())
66
+ print("Wrote", "docs/slides/01_cover.png")
67
+ PY
68
+ ```
69
+
70
+ ## Preview locally
71
+
72
+ ```bash
73
+ # Open in your browser (renders identical to the PDF):
74
+ xdg-open docs/slides/slides.html
75
+ ```
76
+
77
+ ## Iteration workflow
78
+
79
+ 1. Edit `slides.html` (CSS at the top, slides as `<section class="slide">` blocks)
80
+ 2. Reload the browser tab to preview
81
+ 3. When happy, re-run the Playwright PDF script
82
+ 4. Commit both `slides.html` and the generated PDF
83
+
84
+ ## Slide map
85
+
86
+ | # | Title | Visual |
87
+ |---|---|---|
88
+ | 1 | Cover | `paperhawk.jpeg` hero + team + tagline |
89
+ | 2 | The Problem | RAG-vs-audit split contrast |
90
+ | 3 | What We Built | 5 big-number stat cards |
91
+ | 4 | The Pipeline | 5-step ribbon (red→orange gradient) |
92
+ | 5 | The 14 Domain Checks | 3-tier table (audit / compliance / standards) |
93
+ | 6 | Anti-Halluc + DD | 5+1 layer stack | DD supervisor pattern |
94
+ | 7 | The Stack | Vertical stack-row layout (AMD + Qwen highlighted) |
95
+ | 8 | Demo Packages | 3 demo cards + timing banner |
96
+ | 9 | Built for Builders | 3 builders cards + repo/HF/MIT meta |
97
+ | 10 | Team + Closing | 3 team cards + closing tagline |
98
+
99
+ ## Notes
100
+
101
+ - All copy is English, builder-energy tone, no PwC/Hungarian narrative residue
102
+ - The `paperhawk.jpeg` reference is `../../paperhawk.jpeg` (relative to `docs/slides/`)
103
+ - The gradient strip on every slide top is `linear-gradient(90deg, AMD-red → AMD-orange → Qwen-purple)` — a visual signature
104
+ - "Team CsimpiCsirkek" appears in the cover meta + final footer; "Built to ship" closing tagline carries the winner-team subtext without being on-the-nose
docs/slides/png/slide_01.png ADDED

Git LFS Details

  • SHA256: 9a7cc84b3ee3d544e006e461bc135a0708e44a57e789400ac4f3ffa9a788c8c3
  • Pointer size: 131 Bytes
  • Size of remote file: 179 kB
docs/slides/png/slide_02.png ADDED

Git LFS Details

  • SHA256: 2570275891e9075c6ae1d5dd748be65b97e12e6bea4349574b14f922e1c22c84
  • Pointer size: 130 Bytes
  • Size of remote file: 62.9 kB
docs/slides/png/slide_03.png ADDED

Git LFS Details

  • SHA256: 1a20ba1a513657703561384b15f8718f2461179756441202fd91e373b49cf30e
  • Pointer size: 130 Bytes
  • Size of remote file: 58.3 kB
docs/slides/png/slide_04.png ADDED

Git LFS Details

  • SHA256: feabfef2a7c353d12a4994a853cd11c96a0a1c8fc2f36e1e691b2f7207ab935f
  • Pointer size: 130 Bytes
  • Size of remote file: 66 kB
docs/slides/png/slide_05.png ADDED

Git LFS Details

  • SHA256: dfe874a6755c065f5efbae5db7a2b14d3eb245272fa877beafd2e6bfa82d4d4f
  • Pointer size: 130 Bytes
  • Size of remote file: 81.9 kB
docs/slides/png/slide_06.png ADDED

Git LFS Details

  • SHA256: 67de912da1ae5df5544a34954e15d3361a10ad1b7db16d500b1127976cfded9a
  • Pointer size: 130 Bytes
  • Size of remote file: 68.5 kB
docs/slides/png/slide_07.png ADDED

Git LFS Details

  • SHA256: ab86eba5eddca00bb85712f21151c855fcc1a8cb2ed0448f08129279b074211d
  • Pointer size: 130 Bytes
  • Size of remote file: 72.2 kB
docs/slides/png/slide_08.png ADDED

Git LFS Details

  • SHA256: 26f0474dd0e0dec91640b71c38f384018b55843ae6852de530c3787bb6076add
  • Pointer size: 130 Bytes
  • Size of remote file: 61.7 kB
docs/slides/png/slide_09.png ADDED

Git LFS Details

  • SHA256: 9508c9f923635f732e80738bc25e42658aeacf571709b960394fe77ee70132cc
  • Pointer size: 130 Bytes
  • Size of remote file: 61.8 kB
docs/slides/png/slide_10.png ADDED

Git LFS Details

  • SHA256: 70f8ec06f30fb4cbf070bc615690c7677bf29f2eabdc3384eb9eae8dd1efe6fb
  • Pointer size: 130 Bytes
  • Size of remote file: 69.6 kB
docs/slides/slides.html ADDED
@@ -0,0 +1,897 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>PaperHawk — AMD Developer Hackathon Slide Deck</title>
6
+ <link rel="preconnect" href="https://fonts.googleapis.com">
7
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
8
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
9
+ <style>
10
+ :root {
11
+ --amd-red: #ED1C24;
12
+ --amd-orange: #FB6624;
13
+ --paperhawk-black: #1A1A1A;
14
+ --paperhawk-gold: #D4A857;
15
+ --qwen-purple: #7C3AED;
16
+ --bg-light: #FFFFFF;
17
+ --bg-cream: #FFF6E5;
18
+ --bg-purple-light: #F5EFFF;
19
+ --text-dark: #1A1A1A;
20
+ --text-muted: #6B6B6B;
21
+ --text-light: #FFFFFF;
22
+ --border-soft: rgba(0, 0, 0, 0.08);
23
+ }
24
+
25
+ * { box-sizing: border-box; margin: 0; padding: 0; }
26
+
27
+ html, body {
28
+ font-family: 'Inter', -apple-system, sans-serif;
29
+ color: var(--text-dark);
30
+ background: #E5E5E5;
31
+ line-height: 1.5;
32
+ -webkit-font-smoothing: antialiased;
33
+ }
34
+
35
+ .slide {
36
+ width: 1280px;
37
+ height: 720px;
38
+ padding: 64px 80px 64px 80px;
39
+ background: white;
40
+ position: relative;
41
+ overflow: hidden;
42
+ page-break-after: always;
43
+ margin: 30px auto;
44
+ box-shadow: 0 4px 30px rgba(0,0,0,0.12);
45
+ }
46
+
47
+ /* Top gradient bar — AMD red → orange → Qwen purple */
48
+ .slide::before {
49
+ content: "";
50
+ position: absolute;
51
+ top: 0; left: 0; right: 0;
52
+ height: 6px;
53
+ background: linear-gradient(90deg, var(--amd-red) 0%, var(--amd-orange) 50%, var(--qwen-purple) 100%);
54
+ z-index: 5;
55
+ }
56
+
57
+ .slide-label {
58
+ font-size: 11px;
59
+ letter-spacing: 0.18em;
60
+ color: var(--amd-red);
61
+ text-transform: uppercase;
62
+ font-weight: 700;
63
+ margin-bottom: 18px;
64
+ }
65
+
66
+ .slide-title {
67
+ font-size: 56px;
68
+ font-weight: 800;
69
+ line-height: 1.08;
70
+ color: var(--text-dark);
71
+ margin-bottom: 18px;
72
+ letter-spacing: -0.01em;
73
+ }
74
+ .slide-title .accent { color: var(--amd-red); }
75
+ .slide-title .qwen-accent { color: var(--qwen-purple); }
76
+
77
+ .slide-subtitle {
78
+ font-size: 22px;
79
+ font-weight: 500;
80
+ color: var(--text-muted);
81
+ max-width: 980px;
82
+ line-height: 1.4;
83
+ }
84
+
85
+ .slide-footer {
86
+ position: absolute;
87
+ bottom: 28px;
88
+ left: 80px; right: 80px;
89
+ display: flex;
90
+ justify-content: space-between;
91
+ font-size: 11px;
92
+ letter-spacing: 0.12em;
93
+ color: var(--text-muted);
94
+ text-transform: uppercase;
95
+ font-family: 'JetBrains Mono', monospace;
96
+ }
97
+
98
+ /* === Slide 1: Cover === */
99
+ .slide-cover {
100
+ background: var(--paperhawk-black);
101
+ color: var(--text-light);
102
+ padding: 64px 80px;
103
+ }
104
+ .slide-cover .slide-label { color: var(--amd-orange); }
105
+ .slide-cover .hawk-image {
106
+ width: 100%;
107
+ max-height: 280px;
108
+ object-fit: cover;
109
+ object-position: center;
110
+ border-radius: 6px;
111
+ margin-bottom: 32px;
112
+ filter: brightness(0.92) contrast(1.06);
113
+ }
114
+ .slide-cover .slide-title {
115
+ color: white;
116
+ font-size: 80px;
117
+ margin-bottom: 12px;
118
+ }
119
+ .slide-cover .slide-title .accent { color: var(--amd-orange); }
120
+ .slide-cover .slide-subtitle {
121
+ color: rgba(255,255,255,0.85);
122
+ font-size: 20px;
123
+ max-width: 800px;
124
+ }
125
+ .slide-cover .meta {
126
+ position: absolute;
127
+ bottom: 56px;
128
+ left: 80px; right: 80px;
129
+ display: flex;
130
+ justify-content: space-between;
131
+ align-items: flex-end;
132
+ font-size: 13px;
133
+ color: rgba(255,255,255,0.55);
134
+ font-family: 'JetBrains Mono', monospace;
135
+ }
136
+ .slide-cover .meta-team {
137
+ display: flex; gap: 28px;
138
+ font-family: 'Inter', sans-serif;
139
+ }
140
+ .slide-cover .meta-team strong {
141
+ color: white;
142
+ font-weight: 600;
143
+ }
144
+
145
+ /* === Slide 2: The Problem (split contrast) === */
146
+ .problem-grid {
147
+ display: grid;
148
+ grid-template-columns: 1fr 1fr;
149
+ gap: 28px;
150
+ margin-top: 44px;
151
+ }
152
+ .problem-card {
153
+ padding: 28px;
154
+ border-radius: 8px;
155
+ }
156
+ .problem-card.left {
157
+ background: var(--bg-cream);
158
+ border-left: 5px solid var(--amd-red);
159
+ }
160
+ .problem-card.right {
161
+ background: var(--paperhawk-black);
162
+ color: white;
163
+ }
164
+ .problem-card h3 {
165
+ font-size: 18px;
166
+ margin-bottom: 14px;
167
+ letter-spacing: 0.02em;
168
+ }
169
+ .problem-card.left h3 { color: var(--amd-red); }
170
+ .problem-card.right h3 { color: var(--amd-orange); }
171
+ .problem-card p {
172
+ font-size: 15px;
173
+ line-height: 1.6;
174
+ margin-bottom: 12px;
175
+ }
176
+ .problem-card.right p:last-child {
177
+ color: rgba(255,255,255,0.62);
178
+ font-size: 14px;
179
+ }
180
+ .problem-card.left p:last-child {
181
+ color: var(--text-muted);
182
+ font-size: 14px;
183
+ }
184
+
185
+ /* === Stat grid (slide 3) === */
186
+ .stat-grid {
187
+ display: grid;
188
+ grid-template-columns: repeat(5, 1fr);
189
+ gap: 14px;
190
+ margin-top: 44px;
191
+ }
192
+ .stat-card {
193
+ background: var(--bg-cream);
194
+ border-left: 4px solid var(--amd-red);
195
+ padding: 24px 20px;
196
+ border-radius: 4px;
197
+ }
198
+ .stat-card .stat-value {
199
+ font-size: 44px;
200
+ font-weight: 800;
201
+ color: var(--amd-red);
202
+ line-height: 1;
203
+ margin-bottom: 10px;
204
+ letter-spacing: -0.02em;
205
+ }
206
+ .stat-card .stat-label {
207
+ font-size: 13px;
208
+ font-weight: 500;
209
+ color: var(--text-dark);
210
+ line-height: 1.3;
211
+ }
212
+
213
+ /* === Pipeline ribbon (slide 4) === */
214
+ .pipeline-ribbon {
215
+ display: flex;
216
+ gap: 3px;
217
+ margin-top: 40px;
218
+ }
219
+ .pipeline-step {
220
+ flex: 1;
221
+ padding: 24px 20px;
222
+ color: white;
223
+ position: relative;
224
+ }
225
+ .pipeline-step:nth-child(1) { background: #ED1C24; }
226
+ .pipeline-step:nth-child(2) { background: #ED4426; }
227
+ .pipeline-step:nth-child(3) { background: #ED6628; }
228
+ .pipeline-step:nth-child(4) { background: #ED882A; }
229
+ .pipeline-step:nth-child(5) { background: #FB6624; }
230
+ .pipeline-step .step-num {
231
+ width: 28px; height: 28px;
232
+ background: white;
233
+ color: var(--amd-red);
234
+ border-radius: 50%;
235
+ font-weight: 800;
236
+ font-size: 14px;
237
+ display: flex; align-items: center; justify-content: center;
238
+ margin-bottom: 14px;
239
+ }
240
+ .pipeline-step .step-name {
241
+ font-size: 19px; font-weight: 700;
242
+ margin-bottom: 8px;
243
+ letter-spacing: -0.01em;
244
+ }
245
+ .pipeline-step .step-desc {
246
+ font-size: 12px;
247
+ opacity: 0.92;
248
+ line-height: 1.45;
249
+ }
250
+
251
+ /* === Domain checks (slide 5) === */
252
+ .domain-categories {
253
+ display: grid;
254
+ grid-template-columns: repeat(3, 1fr);
255
+ gap: 16px;
256
+ margin-top: 36px;
257
+ }
258
+ .domain-cat {
259
+ padding: 22px;
260
+ border-radius: 6px;
261
+ border-top: 4px solid;
262
+ }
263
+ .domain-cat.tier-a { border-top-color: var(--amd-red); background: var(--bg-cream); }
264
+ .domain-cat.tier-b { border-top-color: var(--amd-orange); background: #FFF1E0; }
265
+ .domain-cat.tier-c { border-top-color: var(--qwen-purple); background: var(--bg-purple-light); }
266
+ .domain-cat h3 {
267
+ font-size: 13px;
268
+ letter-spacing: 0.12em;
269
+ text-transform: uppercase;
270
+ margin-bottom: 14px;
271
+ font-weight: 700;
272
+ }
273
+ .domain-cat.tier-a h3 { color: var(--amd-red); }
274
+ .domain-cat.tier-b h3 { color: var(--amd-orange); }
275
+ .domain-cat.tier-c h3 { color: var(--qwen-purple); }
276
+ .domain-cat ul {
277
+ list-style: none;
278
+ font-size: 12.5px;
279
+ }
280
+ .domain-cat li {
281
+ padding: 7px 0;
282
+ border-bottom: 1px solid var(--border-soft);
283
+ line-height: 1.4;
284
+ }
285
+ .domain-cat li:last-child { border-bottom: none; }
286
+ .domain-cat strong { font-weight: 600; color: var(--text-dark); }
287
+
288
+ /* === Anti-halluc layers (slide 6) === */
289
+ .layers {
290
+ display: flex; flex-direction: column;
291
+ gap: 7px;
292
+ margin-top: 16px;
293
+ }
294
+ .layer {
295
+ padding: 12px 18px;
296
+ background: var(--bg-cream);
297
+ border-left: 4px solid var(--amd-red);
298
+ border-radius: 4px;
299
+ display: flex; align-items: center; gap: 14px;
300
+ }
301
+ .layer.plus-one {
302
+ background: var(--bg-purple-light);
303
+ border-left-color: var(--qwen-purple);
304
+ }
305
+ .layer-num {
306
+ width: 24px; height: 24px;
307
+ background: var(--amd-red);
308
+ color: white;
309
+ font-weight: 700;
310
+ font-size: 12px;
311
+ border-radius: 50%;
312
+ display: flex; align-items: center; justify-content: center;
313
+ flex-shrink: 0;
314
+ }
315
+ .layer.plus-one .layer-num {
316
+ background: var(--qwen-purple);
317
+ width: auto;
318
+ padding: 0 8px;
319
+ border-radius: 12px;
320
+ font-size: 11px;
321
+ }
322
+ .layer-text {
323
+ font-size: 13.5px;
324
+ font-weight: 500;
325
+ }
326
+ .layer-text code {
327
+ background: white;
328
+ padding: 2px 6px;
329
+ border-radius: 3px;
330
+ font-family: 'JetBrains Mono', monospace;
331
+ font-size: 11.5px;
332
+ color: var(--qwen-purple);
333
+ font-weight: 500;
334
+ }
335
+
336
+ .dd-flow {
337
+ display: flex; flex-direction: column;
338
+ gap: 8px;
339
+ margin-top: 16px;
340
+ }
341
+ .dd-specialists {
342
+ display: grid;
343
+ grid-template-columns: 1fr 1fr;
344
+ gap: 6px;
345
+ }
346
+ .dd-specialist {
347
+ padding: 10px 14px;
348
+ background: var(--bg-cream);
349
+ border-left: 3px solid var(--amd-red);
350
+ border-radius: 4px;
351
+ font-size: 12.5px;
352
+ font-weight: 600;
353
+ }
354
+ .dd-arrow {
355
+ text-align: center;
356
+ font-size: 26px;
357
+ color: var(--amd-red);
358
+ line-height: 1;
359
+ margin: 2px 0;
360
+ }
361
+ .dd-supervisor, .dd-synthesizer {
362
+ padding: 14px 20px;
363
+ color: white;
364
+ text-align: center;
365
+ border-radius: 4px;
366
+ font-weight: 600;
367
+ font-size: 14px;
368
+ }
369
+ .dd-supervisor { background: var(--paperhawk-black); }
370
+ .dd-synthesizer { background: var(--qwen-purple); }
371
+
372
+ .col-header {
373
+ font-size: 12px;
374
+ letter-spacing: 0.12em;
375
+ text-transform: uppercase;
376
+ font-weight: 700;
377
+ margin-bottom: 12px;
378
+ }
379
+
380
+ /* === Stack vertical (slide 7) === */
381
+ .stack-vertical {
382
+ display: flex; flex-direction: column;
383
+ gap: 8px;
384
+ margin-top: 32px;
385
+ }
386
+ .stack-row {
387
+ padding: 16px 24px;
388
+ background: var(--bg-cream);
389
+ border-left: 4px solid var(--amd-red);
390
+ border-radius: 4px;
391
+ display: flex; align-items: center; justify-content: space-between;
392
+ }
393
+ .stack-row.qwen-row {
394
+ border-left-color: var(--qwen-purple);
395
+ background: var(--bg-purple-light);
396
+ }
397
+ .stack-row.amd-row {
398
+ border-left-color: var(--amd-red);
399
+ background: #FFE4E5;
400
+ }
401
+ .stack-row .stack-label {
402
+ font-weight: 700;
403
+ font-size: 16px;
404
+ }
405
+ .stack-row .stack-detail {
406
+ font-size: 12.5px;
407
+ color: var(--text-muted);
408
+ font-family: 'JetBrains Mono', monospace;
409
+ }
410
+
411
+ /* === Demo cards (slide 8) === */
412
+ .demo-grid {
413
+ display: grid;
414
+ grid-template-columns: repeat(3, 1fr);
415
+ gap: 18px;
416
+ margin-top: 32px;
417
+ }
418
+ .demo-card {
419
+ padding: 24px;
420
+ background: var(--bg-cream);
421
+ border-radius: 6px;
422
+ border-top: 4px solid var(--amd-red);
423
+ }
424
+ .demo-card h3 {
425
+ font-size: 18px;
426
+ margin-bottom: 12px;
427
+ color: var(--amd-red);
428
+ letter-spacing: -0.01em;
429
+ }
430
+ .demo-card p {
431
+ font-size: 13.5px;
432
+ line-height: 1.5;
433
+ color: var(--text-dark);
434
+ }
435
+ .demo-card .demo-result {
436
+ margin-top: 14px;
437
+ padding: 10px 12px;
438
+ background: white;
439
+ border-left: 3px solid var(--qwen-purple);
440
+ font-size: 12px;
441
+ font-family: 'JetBrains Mono', monospace;
442
+ line-height: 1.4;
443
+ }
444
+
445
+ .demo-time-banner {
446
+ margin-top: 36px;
447
+ padding: 22px;
448
+ background: var(--paperhawk-black);
449
+ color: white;
450
+ text-align: center;
451
+ border-radius: 6px;
452
+ font-size: 15px;
453
+ }
454
+ .demo-time-banner strong {
455
+ color: var(--amd-orange);
456
+ font-size: 20px;
457
+ font-weight: 700;
458
+ }
459
+
460
+ /* === Built for builders (slide 9) === */
461
+ .builders-grid {
462
+ display: grid;
463
+ grid-template-columns: repeat(3, 1fr);
464
+ gap: 20px;
465
+ margin-top: 44px;
466
+ }
467
+ .builders-card {
468
+ padding: 28px 24px;
469
+ background: var(--bg-cream);
470
+ border-radius: 6px;
471
+ border-top: 4px solid var(--amd-red);
472
+ }
473
+ .builders-card .marker {
474
+ font-family: 'JetBrains Mono', monospace;
475
+ font-size: 12px;
476
+ color: var(--amd-red);
477
+ letter-spacing: 0.1em;
478
+ margin-bottom: 8px;
479
+ font-weight: 600;
480
+ }
481
+ .builders-card h3 {
482
+ font-size: 19px;
483
+ font-weight: 700;
484
+ margin-bottom: 12px;
485
+ letter-spacing: -0.01em;
486
+ }
487
+ .builders-card p {
488
+ font-size: 13.5px;
489
+ line-height: 1.5;
490
+ color: var(--text-muted);
491
+ }
492
+ .builders-card p code {
493
+ background: white;
494
+ padding: 2px 6px;
495
+ border-radius: 3px;
496
+ font-family: 'JetBrains Mono', monospace;
497
+ font-size: 12px;
498
+ color: var(--qwen-purple);
499
+ }
500
+
501
+ .builders-meta {
502
+ margin-top: 32px;
503
+ padding: 18px 24px;
504
+ background: var(--paperhawk-black);
505
+ color: white;
506
+ border-radius: 6px;
507
+ display: flex;
508
+ justify-content: space-around;
509
+ align-items: center;
510
+ font-size: 12.5px;
511
+ font-family: 'JetBrains Mono', monospace;
512
+ }
513
+ .builders-meta strong { color: var(--amd-orange); }
514
+
515
+ /* === Team cards (slide 10) === */
516
+ .team-grid {
517
+ display: grid;
518
+ grid-template-columns: repeat(3, 1fr);
519
+ gap: 18px;
520
+ margin-top: 32px;
521
+ }
522
+ .team-card {
523
+ padding: 24px;
524
+ background: var(--bg-cream);
525
+ border-radius: 6px;
526
+ border-top: 4px solid var(--amd-red);
527
+ }
528
+ .team-card .team-role {
529
+ font-size: 11px;
530
+ letter-spacing: 0.12em;
531
+ text-transform: uppercase;
532
+ color: var(--amd-red);
533
+ margin-bottom: 10px;
534
+ font-weight: 700;
535
+ }
536
+ .team-card .team-name {
537
+ font-size: 22px;
538
+ font-weight: 700;
539
+ margin-bottom: 10px;
540
+ letter-spacing: -0.01em;
541
+ }
542
+ .team-card .team-desc {
543
+ font-size: 13px;
544
+ line-height: 1.5;
545
+ color: var(--text-muted);
546
+ }
547
+
548
+ .closing-tagline {
549
+ margin-top: 28px;
550
+ padding: 24px;
551
+ background: var(--paperhawk-black);
552
+ color: white;
553
+ text-align: center;
554
+ border-radius: 6px;
555
+ }
556
+ .closing-tagline p {
557
+ font-size: 26px;
558
+ font-weight: 700;
559
+ letter-spacing: -0.01em;
560
+ }
561
+ .closing-tagline p .accent { color: var(--amd-orange); }
562
+ .closing-tagline p .qwen-accent { color: var(--qwen-purple); }
563
+
564
+ /* === Print mode (Playwright PDF render) === */
565
+ @page {
566
+ size: 1280px 720px;
567
+ margin: 0;
568
+ }
569
+ @media print {
570
+ html, body { background: white; }
571
+ .slide {
572
+ margin: 0;
573
+ box-shadow: none;
574
+ page-break-after: always;
575
+ }
576
+ .slide:last-child { page-break-after: auto; }
577
+ }
578
+ </style>
579
+ </head>
580
+ <body>
581
+
582
+ <!-- ========================================================== -->
583
+ <!-- Slide 1: Cover -->
584
+ <!-- ========================================================== -->
585
+ <section class="slide slide-cover">
586
+ <div class="slide-label">AMD Developer Hackathon × lablab.ai · May 2026</div>
587
+ <img src="../../paperhawk.jpeg" alt="PaperHawk hero" class="hawk-image">
588
+ <h1 class="slide-title">Paper<span class="accent">Hawk</span></h1>
589
+ <p class="slide-subtitle">Multi-agent document intelligence on AMD Instinct MI300X.<br>Built by engineers who ship.</p>
590
+ <div class="meta">
591
+ <div class="meta-team">
592
+ <span><strong>Vince Nándorfi</strong></span>
593
+ <span><strong>Tamás Vitai</strong></span>
594
+ <span><strong>Gábor Murcsik</strong></span>
595
+ </div>
596
+ <div>Team CsimpiCsirkek · MIT</div>
597
+ </div>
598
+ </section>
599
+
600
+ <!-- ========================================================== -->
601
+ <!-- Slide 2: The Problem -->
602
+ <!-- ========================================================== -->
603
+ <section class="slide">
604
+ <div class="slide-label">The Problem</div>
605
+ <h1 class="slide-title">RAG <span class="accent">retrieves</span>.<br>Audit <span class="accent">finds</span>.</h1>
606
+ <p class="slide-subtitle">Today's RAG chatbots can do the first. They cannot do the second.</p>
607
+ <div class="problem-grid">
608
+ <div class="problem-card left">
609
+ <h3>What RAG does well</h3>
610
+ <p>Chunk a document. Embed the chunks. Retrieve top-K passages. Generate an answer with the retrieved context.</p>
611
+ <p>Great for FAQ chatbots. Great for Q&amp;A on a single document.</p>
612
+ </div>
613
+ <div class="problem-card right">
614
+ <h3>What auditors actually need</h3>
615
+ <p>"Does the supplier in Invoice #7 match the vendor in PO #3? Is the VAT rate consistent across the package? Any change-of-control clauses? Sanctions hits?"</p>
616
+ <p>These questions live in the <em>relationship</em> between documents — not in any single chunk.</p>
617
+ </div>
618
+ </div>
619
+ <div class="slide-footer"><span>02 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
620
+ </section>
621
+
622
+ <!-- ========================================================== -->
623
+ <!-- Slide 3: What We Built -->
624
+ <!-- ========================================================== -->
625
+ <section class="slide">
626
+ <div class="slide-label">What We Built</div>
627
+ <h1 class="slide-title">A <span class="accent">multi-agent</span> system.<br>Not a retrieval pipeline.</h1>
628
+ <p class="slide-subtitle">LangGraph 0.6-native. Production-shaped. Open source under MIT.</p>
629
+ <div class="stat-grid">
630
+ <div class="stat-card"><div class="stat-value">4</div><div class="stat-label">Compiled<br>graphs</div></div>
631
+ <div class="stat-card"><div class="stat-value">6</div><div class="stat-label">Reusable<br>subgraphs</div></div>
632
+ <div class="stat-card"><div class="stat-value">14</div><div class="stat-label">Deterministic<br>domain checks</div></div>
633
+ <div class="stat-card"><div class="stat-value">5+1</div><div class="stat-label">Anti-halluc<br>layers</div></div>
634
+ <div class="stat-card"><div class="stat-value">5</div><div class="stat-label">Agentic<br>chat tools</div></div>
635
+ </div>
636
+ <p style="margin-top: 36px; font-size: 14px; color: var(--text-muted); line-height: 1.65;">
637
+ Send-API parallelism · <code style="background:var(--bg-cream); padding:2px 6px; border-radius:3px; font-family:'JetBrains Mono', monospace; font-size:12px; color:var(--qwen-purple);">AsyncSqliteSaver</code> checkpointer · <code style="background:var(--bg-cream); padding:2px 6px; border-radius:3px; font-family:'JetBrains Mono', monospace; font-size:12px; color:var(--qwen-purple);">configurable_alternatives</code> provider stack (vLLM / Ollama / dummy) · multi-agent DD assistant with 4 specialists + supervisor + synthesizer · Streamlit 5-tab UI · 61 tests passing in CI without an LLM.
638
+ </p>
639
+ <div class="slide-footer"><span>03 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
640
+ </section>
641
+
642
+ <!-- ========================================================== -->
643
+ <!-- Slide 4: The Pipeline (5-step) -->
644
+ <!-- ========================================================== -->
645
+ <section class="slide">
646
+ <div class="slide-label">The Pipeline</div>
647
+ <h1 class="slide-title">Five steps. <span class="accent">End-to-end.</span></h1>
648
+ <p class="slide-subtitle">Every step is a typed Pydantic-state node. Every LLM call has structured output.</p>
649
+ <div class="pipeline-ribbon">
650
+ <div class="pipeline-step">
651
+ <div class="step-num">1</div>
652
+ <div class="step-name">Ingest</div>
653
+ <div class="step-desc">PDF · DOCX · image. Vision-first OCR fallback for scanned pages.</div>
654
+ </div>
655
+ <div class="pipeline-step">
656
+ <div class="step-num">2</div>
657
+ <div class="step-name">Classify</div>
658
+ <div class="step-desc">6-way doc-type classifier. ISA 500 evidence-quality score.</div>
659
+ </div>
660
+ <div class="pipeline-step">
661
+ <div class="step-num">3</div>
662
+ <div class="step-name">Extract</div>
663
+ <div class="step-desc">Pydantic schema per doc-type. _quotes + _confidence per field.</div>
664
+ </div>
665
+ <div class="pipeline-step">
666
+ <div class="step-num">4</div>
667
+ <div class="step-name">Cross-ref</div>
668
+ <div class="step-desc">3-way matching. Package-level analyzer. DD multi-agent.</div>
669
+ </div>
670
+ <div class="pipeline-step">
671
+ <div class="step-num">5</div>
672
+ <div class="step-name">Risk + Report</div>
673
+ <div class="step-desc">14 checks (parallel Send) · LLM ensemble · 3-layer filter · DOCX export.</div>
674
+ </div>
675
+ </div>
676
+ <p style="margin-top: 56px; font-size: 14px; color: var(--text-muted); text-align: center; font-style: italic;">
677
+ On AMD MI300X with Qwen 2.5 14B: <strong style="color:var(--amd-red); font-style: normal;">30–90 seconds</strong> end-to-end per package.
678
+ </p>
679
+ <div class="slide-footer"><span>04 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
680
+ </section>
681
+
682
+ <!-- ========================================================== -->
683
+ <!-- Slide 5: The 14 Domain Checks -->
684
+ <!-- ========================================================== -->
685
+ <section class="slide">
686
+ <div class="slide-label">Beyond LLMs · Deterministic Reasoning</div>
687
+ <h1 class="slide-title">Fourteen rules. <span class="accent">In Python.</span></h1>
688
+ <p class="slide-subtitle">Every check is a typed Protocol, not a prompt. Run in parallel via the LangGraph Send API.</p>
689
+ <div class="domain-categories">
690
+ <div class="domain-cat tier-a">
691
+ <h3>Tier A — Audit · 6 checks</h3>
692
+ <ul>
693
+ <li><strong>ISA 500</strong> Evidence hierarchy</li>
694
+ <li><strong>ISA 320</strong> Materiality threshold</li>
695
+ <li><strong>ISA 240</strong> Duplicate invoice detector</li>
696
+ <li><strong>ISA 240</strong> Rounded-amount anomaly</li>
697
+ <li><strong>Tax-ID CDV</strong> mod-11 checksum</li>
698
+ <li><strong>Mandatory fields</strong> Invoice completeness</li>
699
+ </ul>
700
+ </div>
701
+ <div class="domain-cat tier-b">
702
+ <h3>Tier B — Compliance · 4 checks</h3>
703
+ <ul>
704
+ <li><strong>GDPR Art. 28</strong> Sub-processor clause</li>
705
+ <li><strong>AML / Sanctions</strong> EU + OFAC fuzzy match</li>
706
+ <li><strong>M&amp;A red flag</strong> Change-of-control · auto-renewal</li>
707
+ <li><strong>Disproportionality</strong> Penalty-vs-value ratio</li>
708
+ </ul>
709
+ </div>
710
+ <div class="domain-cat tier-c">
711
+ <h3>Tier C — Standards · 4 checks</h3>
712
+ <ul>
713
+ <li><strong>Incoterms 2020</strong> 11-rule recognizer</li>
714
+ <li><strong>IFRS / GAAP</strong> Goodwill + lease anomaly</li>
715
+ <li><strong>Math validation</strong> Net + VAT + gross</li>
716
+ <li><strong>Contract completeness</strong> 6-key-clause check</li>
717
+ </ul>
718
+ </div>
719
+ </div>
720
+ <p style="margin-top: 28px; font-size: 13px; color: var(--text-muted); text-align: center; font-style: italic;">
721
+ Jurisdiction-aware: locale-specific rules trigger only on locale-tagged inputs. Universal rules run everywhere.
722
+ </p>
723
+ <div class="slide-footer"><span>05 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
724
+ </section>
725
+
726
+ <!-- ========================================================== -->
727
+ <!-- Slide 6: Anti-Hallucination + Multi-Agent DD -->
728
+ <!-- ========================================================== -->
729
+ <section class="slide">
730
+ <div class="slide-label">Trust by Design</div>
731
+ <h1 class="slide-title">Anti-halluc <span class="accent">5+1</span>. DD <span class="qwen-accent">multi-agent</span>.</h1>
732
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 32px; margin-top: 28px;">
733
+ <div>
734
+ <div class="col-header" style="color: var(--amd-red);">5+1 layers, every output</div>
735
+ <div class="layers">
736
+ <div class="layer"><div class="layer-num">1</div><div class="layer-text"><code>temperature=0</code> on every LLM call</div></div>
737
+ <div class="layer"><div class="layer-num">2</div><div class="layer-text"><code>_quotes</code> verbatim source citation</div></div>
738
+ <div class="layer"><div class="layer-num">3</div><div class="layer-text"><code>_confidence</code> per extracted field</div></div>
739
+ <div class="layer"><div class="layer-num">4</div><div class="layer-text">Plausibility validators (math · dates · ranges)</div></div>
740
+ <div class="layer"><div class="layer-num">5</div><div class="layer-text">3-layer LLM-risk filter chain</div></div>
741
+ <div class="layer plus-one"><div class="layer-num">+1</div><div class="layer-text">Quote validator: drops claims whose quotes aren't in the doc</div></div>
742
+ </div>
743
+ </div>
744
+ <div>
745
+ <div class="col-header" style="color: var(--qwen-purple);">DD supervisor pattern</div>
746
+ <div class="dd-flow">
747
+ <div class="dd-specialists">
748
+ <div class="dd-specialist">Audit specialist</div>
749
+ <div class="dd-specialist">Legal specialist</div>
750
+ <div class="dd-specialist">Compliance specialist</div>
751
+ <div class="dd-specialist">Financial specialist</div>
752
+ </div>
753
+ <div class="dd-arrow">↓</div>
754
+ <div class="dd-supervisor">Supervisor — routing &amp; coordination</div>
755
+ <div class="dd-arrow">↓</div>
756
+ <div class="dd-synthesizer">Synthesizer → Executive Summary</div>
757
+ </div>
758
+ <p style="font-size: 13px; color: var(--text-muted); margin-top: 18px; line-height: 1.5;">
759
+ Four specialists read the same package independently. The supervisor coordinates routing. The synthesizer writes a 3-paragraph executive brief with cited red flags.
760
+ </p>
761
+ </div>
762
+ </div>
763
+ <div class="slide-footer"><span>06 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
764
+ </section>
765
+
766
+ <!-- ========================================================== -->
767
+ <!-- Slide 7: AMD MI300X + Qwen + vLLM -->
768
+ <!-- ========================================================== -->
769
+ <section class="slide">
770
+ <div class="slide-label">The Stack</div>
771
+ <h1 class="slide-title">Qwen on <span class="accent">AMD MI300X</span> via vLLM.</h1>
772
+ <p class="slide-subtitle">192 GB HBM3. ROCm-native. Open-source models, end-to-end.</p>
773
+ <div class="stack-vertical">
774
+ <div class="stack-row">
775
+ <div class="stack-label">Streamlit · 5-tab UI</div>
776
+ <div class="stack-detail">Upload · Results · Chat · DD · Report</div>
777
+ </div>
778
+ <div class="stack-row">
779
+ <div class="stack-label">LangGraph 0.6 orchestration</div>
780
+ <div class="stack-detail">4 graphs · 6 subgraphs · Send API · AsyncSqliteSaver</div>
781
+ </div>
782
+ <div class="stack-row qwen-row">
783
+ <div class="stack-label">Qwen 2.5 14B Instruct (open source)</div>
784
+ <div class="stack-detail">tool-calling · structured-output · multilingual</div>
785
+ </div>
786
+ <div class="stack-row">
787
+ <div class="stack-label">vLLM continuous batching</div>
788
+ <div class="stack-detail">--api-key · --max-model-len 32768 · OpenAI-compatible</div>
789
+ </div>
790
+ <div class="stack-row amd-row">
791
+ <div class="stack-label">AMD Instinct MI300X · ROCm</div>
792
+ <div class="stack-detail">192 GB HBM3 · BF16 / FP8 · AMD Developer Cloud</div>
793
+ </div>
794
+ <div class="stack-row">
795
+ <div class="stack-label">Hugging Face Spaces deploy</div>
796
+ <div class="stack-detail">lablab-ai-amd-developer-hackathon · Streamlit SDK</div>
797
+ </div>
798
+ </div>
799
+ <div class="slide-footer"><span>07 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
800
+ </section>
801
+
802
+ <!-- ========================================================== -->
803
+ <!-- Slide 8: Demo Packages -->
804
+ <!-- ========================================================== -->
805
+ <section class="slide">
806
+ <div class="slide-label">See It In Action</div>
807
+ <h1 class="slide-title">Three <span class="accent">one-click</span> demos.</h1>
808
+ <p class="slide-subtitle">Bundled in the repo. Drivable from the Streamlit Upload tab in 30 seconds.</p>
809
+ <div class="demo-grid">
810
+ <div class="demo-card">
811
+ <h3>Audit Demo</h3>
812
+ <p>Three invoices from the same supplier. The March one is 50% pricier than January and February.</p>
813
+ <div class="demo-result">→ ISA 240 over-billing pattern flagged with cited line items.</div>
814
+ </div>
815
+ <div class="demo-card">
816
+ <h3>DD Demo</h3>
817
+ <p>NDA + service agreement + amendment in an acquisition scenario.</p>
818
+ <div class="demo-result">→ Hidden change-of-control + auto-renewal red flags.</div>
819
+ </div>
820
+ <div class="demo-card">
821
+ <h3>Compliance Demo</h3>
822
+ <p>Two contracts; one is missing GDPR Article 28 sub-processor language.</p>
823
+ <div class="demo-result">→ Domain check #8 detects the gap with regulatory citation.</div>
824
+ </div>
825
+ </div>
826
+ <div class="demo-time-banner">
827
+ On AMD MI300X with Qwen 2.5 14B Instruct: <strong>30–90 seconds</strong> per package · end-to-end · with citations.
828
+ </div>
829
+ <div class="slide-footer"><span>08 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
830
+ </section>
831
+
832
+ <!-- ========================================================== -->
833
+ <!-- Slide 9: Built for Builders -->
834
+ <!-- ========================================================== -->
835
+ <section class="slide">
836
+ <div class="slide-label">Open · Reproducible · Public</div>
837
+ <h1 class="slide-title">Built <span class="accent">for builders</span>.</h1>
838
+ <p class="slide-subtitle">MIT licensed. Reproducible from a clean clone. No closed weights, no proprietary extensions.</p>
839
+ <div class="builders-grid">
840
+ <div class="builders-card">
841
+ <div class="marker">/ 01</div>
842
+ <h3>Open source · MIT</h3>
843
+ <p>Public GitHub repo. No "training data not included" footnotes. Clone it, run it, fork it. The whole codebase is yours to read.</p>
844
+ </div>
845
+ <div class="builders-card">
846
+ <div class="marker">/ 02</div>
847
+ <h3>Reproducible</h3>
848
+ <p>Same stack from laptop to MI300X. <code>infra/vllm/Dockerfile</code> + <code>serve.sh</code> + <code>requirements.txt</code>. One command, one container.</p>
849
+ </div>
850
+ <div class="builders-card">
851
+ <div class="marker">/ 03</div>
852
+ <h3>Battle-tested</h3>
853
+ <p>61 tests passing in CI without any LLM. Deterministic dummy provider for CI; vLLM and Ollama for everything else.</p>
854
+ </div>
855
+ </div>
856
+ <div class="builders-meta">
857
+ <span><strong>github.com/nandorfivince/paperhawk</strong></span>
858
+ <span style="color: rgba(255,255,255,0.3);">|</span>
859
+ <span><strong>HF Space:</strong> lablab-ai-amd-developer-hackathon/paperhawk</span>
860
+ <span style="color: rgba(255,255,255,0.3);">|</span>
861
+ <span><strong>License:</strong> MIT</span>
862
+ </div>
863
+ <div class="slide-footer"><span>09 / 10</span><span>PaperHawk · AMD × lablab.ai</span></div>
864
+ </section>
865
+
866
+ <!-- ========================================================== -->
867
+ <!-- Slide 10: Team + Closing -->
868
+ <!-- ========================================================== -->
869
+ <section class="slide">
870
+ <div class="slide-label">The Team</div>
871
+ <h1 class="slide-title">Three engineers.<br>One <span class="accent">shipped product</span>.</h1>
872
+ <p class="slide-subtitle">We've shipped together for nearly a decade. PaperHawk is what happens when domain knowledge, engineering rigor, and product instinct meet on the same codebase.</p>
873
+ <div class="team-grid">
874
+ <div class="team-card">
875
+ <div class="team-role">Lead · LangGraph · AMD Adaptation</div>
876
+ <div class="team-name">Vince Nándorfi</div>
877
+ <div class="team-desc">System architecture, domain research, ROCm/vLLM adaptation, testing. PaperHawk's blueprint and the AMD-edition rewrite.</div>
878
+ </div>
879
+ <div class="team-card">
880
+ <div class="team-role">Engineering · DevOps</div>
881
+ <div class="team-name">Tamás Vitai</div>
882
+ <div class="team-desc">Senior++ engineer. Implementation, infrastructure, integration testing. Where the code meets the runtime.</div>
883
+ </div>
884
+ <div class="team-card">
885
+ <div class="team-role">Engineering · Algorithms</div>
886
+ <div class="team-name">Gábor Murcsik</div>
887
+ <div class="team-desc">Engineering rigor. Algorithmic precision. Senior systems thinking, sharpened over years of complex production builds.</div>
888
+ </div>
889
+ </div>
890
+ <div class="closing-tagline">
891
+ <p>Beyond simple <span class="accent">RAG</span>. Built to <span class="qwen-accent">ship</span>.</p>
892
+ </div>
893
+ <div class="slide-footer"><span>10 / 10</span><span>Team CsimpiCsirkek · MIT · github.com/nandorfivince/paperhawk</span></div>
894
+ </section>
895
+
896
+ </body>
897
+ </html>
docs/social-posts/post-1-build-window-opens.md ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Build in Public · Post 1 — Build Window Opens
2
+
3
+ **Timing**: post on or just after the AMD Hackathon kick-off (May 4, 6:00 PM CEST).
4
+ **Order**: post on **X first**, then LinkedIn ~30 minutes later.
5
+ **Why**: X moves fast, LinkedIn rewards a slightly longer-form follow-up.
6
+
7
+ This is the first of three planned Build-in-Public posts:
8
+
9
+ 1. **Post 1** (this file) — build window opens · stack-introduction · GitHub link
10
+ 2. **Post 2** (mid-week, ~May 7-8) — technical deep-dive on one design choice (LangGraph Send-API parallelism for the deterministic check fan-out)
11
+ 3. **Post 3** (May 10, after submit) — final demo · HF Space · pitch-recap
12
+
13
+ Mandatory tags ([per the official Build in Public requirement](https://lablab.ai/event/amd-developer-hackathon)):
14
+
15
+ | Platform | Required tags |
16
+ |---|---|
17
+ | X | `@lablab` + `@AIatAMD` |
18
+ | LinkedIn | `lablab.ai` + `AMD Developer` (showcase pages) |
19
+
20
+ ---
21
+
22
+ ## Variant A — X (Twitter)
23
+
24
+ > Character budget: 280 — version below uses 269 chars including handles + hashtags.
25
+
26
+ ```
27
+ Build window opens.
28
+
29
+ Putting our LangGraph-native, multi-agent document intelligence
30
+ platform on AMD Instinct MI300X for the @AIatAMD x @lablab
31
+ hackathon.
32
+
33
+ Qwen 2.5 14B on vLLM. 14 deterministic domain checks. 5+1
34
+ anti-halluc layers. MIT, public.
35
+
36
+ → github.com/nandorfivince/paperhawk
37
+
38
+ #AMDHackathon #BuildInPublic
39
+ ```
40
+
41
+ ### X variant alternatives (in case the first doesn't fit)
42
+
43
+ **Punchy / 240 char:**
44
+
45
+ ```
46
+ PaperHawk — multi-agent document intelligence on @AIatAMD MI300X.
47
+
48
+ Qwen 2.5 14B + LangGraph 0.6 + 14 deterministic domain checks.
49
+ Build window starts now for the @lablab hackathon.
50
+
51
+ Open source · MIT · public repo.
52
+
53
+ → github.com/nandorfivince/paperhawk
54
+
55
+ #AMDHackathon #BuildInPublic
56
+ ```
57
+
58
+ **Tech-detail / 270 char:**
59
+
60
+ ```
61
+ We built PaperHawk: 4 LangGraph graphs, 6 subgraphs, 14
62
+ deterministic domain checks, multi-agent DD assistant.
63
+
64
+ Now porting it to @AIatAMD Instinct MI300X via vLLM for the
65
+ @lablab hackathon.
66
+
67
+ Qwen 2.5 14B inside. MIT, public.
68
+
69
+ → github.com/nandorfivince/paperhawk
70
+
71
+ #AMDHackathon #BuildInPublic
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Variant B — LinkedIn (long form)
77
+
78
+ > Character budget: 3000. Version below is ~1280 chars + tags. Reads as a proper builder-energy update for technical recruiters and AI-engineering peers.
79
+
80
+ ```
81
+ Build window opens.
82
+
83
+ For the next week we're putting PaperHawk — our LangGraph-native,
84
+ multi-agent document intelligence platform — on AMD Instinct MI300X
85
+ GPUs for the AMD Developer Hackathon × lablab.ai.
86
+
87
+ The premise is simple: most "document AI" today is RAG with extra
88
+ steps. Retrieve a passage, summarize it, hope it's right. That's
89
+ fine for FAQ chatbots. It's not fine for auditors, due-diligence
90
+ teams, or anyone who has to cross-reference a folder of contracts
91
+ and invoices and trust the answer.
92
+
93
+ PaperHawk is built for the second case:
94
+
95
+ → 4 compiled LangGraph 0.6 graphs (pipeline / chat / DD / package)
96
+ → 14 deterministic domain checks (ISA 240/500/320, GDPR Article 28,
97
+ Incoterms 2020, AML sanctions)
98
+ → 5+1 anti-hallucination layers — every LLM claim must cite a
99
+ verbatim quote from the document, or it gets dropped
100
+ → 5-tool agentic chat with strict [Source: filename.pdf] citations
101
+ → Multi-agent DD assistant: 4 specialists + supervisor + synthesizer
102
+
103
+ Stack:
104
+ → Qwen 2.5 14B Instruct served via vLLM on AMD MI300X (ROCm)
105
+ → BAAI/bge-m3 multilingual embeddings
106
+ → Streamlit 5-tab UI, deployable as a Hugging Face Space
107
+ → MIT licensed, English-first, multilingual fallback
108
+
109
+ Three of us have shipped together for nearly a decade. We're not
110
+ new to building things. We're using this hackathon to put our
111
+ agentic DI platform on AMD's open compute stack and see how far it
112
+ goes.
113
+
114
+ We'll be sharing a technical walkthrough mid-week — including why
115
+ LangGraph's Send-API parallelism beat sequential domain dispatch in
116
+ our benchmarks.
117
+
118
+ Repo (public): https://github.com/nandorfivince/paperhawk
119
+
120
+ #AMDHackathon #BuildInPublic #LangGraph #Qwen #AMDInstinct #lablab
121
+ ```
122
+
123
+ **Don't forget**: in the LinkedIn post composer, **tag the company pages**:
124
+
125
+ - `lablab.ai` → https://www.linkedin.com/company/lablab-ai/
126
+ - `AMD Developer` (showcase page) → https://www.linkedin.com/showcase/amd-developer/
127
+
128
+ These appear as `@lablab.ai` and `@AMD Developer` in the post — LinkedIn auto-completes them when you start typing.
129
+
130
+ ---
131
+
132
+ ## Image / media to attach
133
+
134
+ For both X and LinkedIn, attach **one image**: the cover slide from the deck.
135
+
136
+ ```bash
137
+ # Generate it from slides.html (see docs/slides/README.md for the script):
138
+ python -c "<<see docs/slides/README.md cover-PNG snippet>>"
139
+ # Output: docs/slides/01_cover.png
140
+ ```
141
+
142
+ Alternative for X (which compresses heavily): use the `paperhawk.jpeg` directly — it's already wide-format (2048×819) and reads well on mobile.
143
+
144
+ ---
145
+
146
+ ## Posting checklist
147
+
148
+ | Step | Status |
149
+ |---|---|
150
+ | Cover image generated (`docs/slides/01_cover.png`) | TODO before posting |
151
+ | GitHub repo public + README hero visible | DONE |
152
+ | `@lablab` + `@AIatAMD` typed correctly on X | TODO at post-time |
153
+ | `lablab.ai` + `AMD Developer` company pages tagged on LinkedIn | TODO at post-time |
154
+ | Repo URL works in private/incognito browser (sanity-check public visibility) | TODO before posting |
155
+ | `#AMDHackathon` `#BuildInPublic` hashtags both included | DONE |
156
+
157
+ ---
158
+
159
+ ## What this post is NOT
160
+
161
+ - Not a marketing pitch. It's a technical announcement.
162
+ - Not "we hope to win". It's "we built this, here's what it does, watch this space."
163
+ - Not asking for likes. The HF Space is where like-voting happens (different track / different prize).
164
+
165
+ The job of this post: **plant a flag**. We're building. We're public. We've shipped together before. Now we're doing it on AMD GPUs.
domain_checks/__init__.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Domain check registry — 14 deterministic rules with a unified API.
2
+
3
+ The ``risk_subgraph`` uses the Send API to fan out (per-doc, per-applicable-check)
4
+ pairs; each Send invokes an ``apply_domain_check`` node which looks up and runs
5
+ the check from this registry.
6
+
7
+ Two SEPARATE entry points (skipped from dispatch via the ``SKIP_FROM_DISPATCH`` set):
8
+ * ``check_06_evidence_score``: per-doc info, called directly after classification
9
+ * ``check_12_duplicate_invoice``: package-level O(n²), called from a separate
10
+ node in the ``risk_subgraph``
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from domain_checks.base import DomainCheck, is_empty, make_risk
16
+ from domain_checks.check_01_invoice_mandatory import InvoiceMandatoryCheck
17
+ from domain_checks.check_02_tax_cdv import TaxCDVCheck, compute_cdv, validate_tax_cdv
18
+ from domain_checks.check_03_contract_completeness import ContractCompletenessCheck
19
+ from domain_checks.check_04_proportionality import ProportionalityCheck
20
+ from domain_checks.check_05_rounded_amounts import RoundedAmountsCheck
21
+ from domain_checks.check_06_evidence_score import EvidenceScoreCheck, get_evidence_score
22
+ from domain_checks.check_07_materiality import MaterialityCheck
23
+ from domain_checks.check_08_gdpr_28 import GDPR28Check
24
+ from domain_checks.check_09_dd_red_flags import DDRedFlagsCheck
25
+ from domain_checks.check_10_incoterms import INCOTERMS_2020, IncotermsCheck
26
+ from domain_checks.check_11_ifrs_har import IFRSHARCheck
27
+ from domain_checks.check_12_duplicate_invoice import (
28
+ DuplicateInvoiceCheck,
29
+ check_duplicate_invoices,
30
+ )
31
+ from domain_checks.check_13_aml_sanctions import AMLSanctionsCheck
32
+ from domain_checks.check_14_contract_dates import ContractDatesCheck
33
+
34
+
35
+ # Unified registry of all 14 checks. The risk_subgraph's domain_dispatch_node
36
+ # iterates this list and Send-fans-out the (doc, check) pairs. Skipped
37
+ # checks (06: evidence score, 12: duplicate detection) are called via separate
38
+ # entry points.
39
+ CHECK_REGISTRY: list[DomainCheck] = [
40
+ InvoiceMandatoryCheck(), # 01: HU VAT Act §169 (HU jurisdiction)
41
+ TaxCDVCheck(), # 02: HU Tax Procedure Act §22 mod-11 (HU jurisdiction)
42
+ ContractCompletenessCheck(), # 03: Universal contract completeness
43
+ ProportionalityCheck(), # 04: Universal contract proportionality
44
+ RoundedAmountsCheck(), # 05: ISA 240
45
+ EvidenceScoreCheck(), # 06: ISA 500 (separate entry point)
46
+ MaterialityCheck(), # 07: ISA 320
47
+ GDPR28Check(), # 08: GDPR Article 28
48
+ DDRedFlagsCheck(), # 09: M&A DD best practice
49
+ IncotermsCheck(), # 10: Incoterms 2020
50
+ IFRSHARCheck(), # 11: IFRS / national GAAP comparison
51
+ DuplicateInvoiceCheck(), # 12: ISA 240 package-level (separate entry point)
52
+ AMLSanctionsCheck(), # 13: AML / Sanctions screening
53
+ ContractDatesCheck(), # 14: Contract date best practice
54
+ ]
55
+
56
+ # Skipped check_ids (NOT Send-fanned out; called by separate nodes)
57
+ SKIP_FROM_DISPATCH = {"check_06_evidence_score", "check_12_duplicate_invoice"}
58
+
59
+
60
+ def get_check(check_id: str) -> DomainCheck | None:
61
+ """Look up a check by check_id."""
62
+ for c in CHECK_REGISTRY:
63
+ if c.check_id == check_id:
64
+ return c
65
+ return None
66
+
67
+
68
+ def get_applied_standards(risks) -> list[str]:
69
+ """Return the list of standards/regulations actually applied to the package.
70
+
71
+ The UI footer only shows standards that had at least one risk finding,
72
+ OR that always run (e.g. ISA 500 evidence score).
73
+ """
74
+ # Standards that always run (universal, every jurisdiction)
75
+ always = {"ISA 500"}
76
+
77
+ # Standards referenced in actual risks (i.e. triggered)
78
+ from_risks: set[str] = set()
79
+ for r in risks or []:
80
+ if hasattr(r, "regulation"):
81
+ reg = r.regulation
82
+ elif isinstance(r, dict):
83
+ reg = r.get("regulation") or r.get("jogszabaly") # legacy compat
84
+ else:
85
+ reg = None
86
+ if reg:
87
+ from_risks.add(reg)
88
+
89
+ all_standards = always | from_risks
90
+
91
+ # Sorted display order for the UI footer
92
+ order = [
93
+ "HU VAT Act §169", "HU Tax Procedure Act §22",
94
+ "Universal contract completeness", "Universal contract proportionality",
95
+ "ISA 240", "ISA 240 (duplicate invoice)",
96
+ "ISA 500", "ISA 320",
97
+ "GDPR Article 28", "M&A DD best practice",
98
+ "Incoterms 2020", "IFRS / national GAAP comparison",
99
+ "AML / Sanctions screening",
100
+ "Contract date best practice",
101
+ "EU VAT Directive",
102
+ ]
103
+ result = [s for s in order if s in all_standards]
104
+ # Append any standards not in the fixed order
105
+ for s in sorted(all_standards):
106
+ if s and s not in result:
107
+ result.append(s)
108
+ return result
109
+
110
+
111
+ __all__ = [
112
+ "DomainCheck",
113
+ "CHECK_REGISTRY",
114
+ "SKIP_FROM_DISPATCH",
115
+ "get_check",
116
+ "get_applied_standards",
117
+ "is_empty",
118
+ "make_risk",
119
+ # Check classes
120
+ "InvoiceMandatoryCheck",
121
+ "TaxCDVCheck",
122
+ "ContractCompletenessCheck",
123
+ "ProportionalityCheck",
124
+ "RoundedAmountsCheck",
125
+ "EvidenceScoreCheck",
126
+ "MaterialityCheck",
127
+ "GDPR28Check",
128
+ "DDRedFlagsCheck",
129
+ "IncotermsCheck",
130
+ "IFRSHARCheck",
131
+ "DuplicateInvoiceCheck",
132
+ "AMLSanctionsCheck",
133
+ "ContractDatesCheck",
134
+ # Helpers
135
+ "compute_cdv",
136
+ "validate_tax_cdv",
137
+ "get_evidence_score",
138
+ "INCOTERMS_2020",
139
+ "check_duplicate_invoices",
140
+ ]
domain_checks/base.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """DomainCheck Protocol — every one of the 14 domain rules implements this.
2
+
3
+ Unification:
4
+ * ``check_id``: stable identifier (debug, logging, registry lookup)
5
+ * ``regulation``: ISA 240, GDPR Article 28, HU VAT Act §169, etc.
6
+ * ``is_hu_specific``: True → only runs on Hungarian-jurisdiction documents
7
+ * ``applies_to``: set of doc_types where the check runs, or ``{"*"}`` = anywhere
8
+ * ``apply(extracted)``: returns a list of Risks based on the flat dict
9
+
10
+ ``domain_checks/__init__.py`` lists all 14 in ``CHECK_REGISTRY``.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import Protocol, runtime_checkable
16
+
17
+ from graph.states.pipeline_state import Risk
18
+
19
+
20
+ @runtime_checkable
21
+ class DomainCheck(Protocol):
22
+ """Protocol-level interface — every check class implements this."""
23
+
24
+ check_id: str
25
+ regulation: str
26
+ is_hu_specific: bool
27
+ applies_to: set[str]
28
+
29
+ def apply(self, extracted: dict) -> list[Risk]: ...
30
+
31
+
32
+ def make_risk(
33
+ description: str,
34
+ severity: str,
35
+ rationale: str,
36
+ regulation: str,
37
+ source_check_id: str,
38
+ ) -> Risk:
39
+ """Unified Risk builder for the domain checks."""
40
+ return Risk(
41
+ description=description,
42
+ severity=severity,
43
+ rationale=rationale,
44
+ kind="domain_rule",
45
+ regulation=regulation,
46
+ source_check_id=source_check_id,
47
+ )
48
+
49
+
50
+ def is_empty(value) -> bool:
51
+ """Mirror of ``prototype-agentic/domain_checks.py:_is_empty``."""
52
+ from utils.numbers import is_null_alias
53
+
54
+ if value is None:
55
+ return True
56
+ if isinstance(value, str):
57
+ return is_null_alias(value) or value.strip() == ""
58
+ if isinstance(value, (list, dict)):
59
+ return len(value) == 0
60
+ return False
domain_checks/check_01_invoice_mandatory.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """01: Invoice mandatory fields (HU VAT Act §169) — A/B level, HU jurisdiction.
2
+
3
+ Mirrors prototype-agentic-langgraph's check_invoice_mandatory_fields, fully
4
+ translated to English with the new EN field names:
5
+
6
+ 1. Top-level fields (4) — invoice_number, issue_date, fulfillment_date, payment_method
7
+ 2. Party-level fields (5) — issuer.{name,address,tax_id}, customer.{name,address}
8
+ 3. Item-level fields (5) — _INVOICE_ITEM_FIELDS with all-missing logic
9
+ 4. Conditional: VAT >= 100,000 HUF threshold → customer.tax_id required
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from domain_checks.base import is_empty, make_risk
15
+ from graph.states.pipeline_state import Risk
16
+ from utils.numbers import coerce_number
17
+
18
+
19
+ _INVOICE_MANDATORY = [
20
+ ("invoice_number", "Invoice number", "high"),
21
+ ("issue_date", "Issue date", "high"),
22
+ ("fulfillment_date", "Fulfillment date", "medium"),
23
+ ("payment_method", "Payment method", "medium"),
24
+ ]
25
+
26
+ _INVOICE_PARTY_FIELDS = [
27
+ ("issuer", "name", "Issuer name", "high"),
28
+ ("issuer", "address", "Issuer address", "medium"),
29
+ ("issuer", "tax_id", "Issuer tax ID", "high"),
30
+ ("customer", "name", "Customer name", "high"),
31
+ ("customer", "address", "Customer address", "medium"),
32
+ ]
33
+
34
+ _INVOICE_ITEM_FIELDS = [
35
+ ("description", "Item description", "high"),
36
+ ("quantity", "Quantity", "medium"),
37
+ ("unit", "Unit of measure", "medium"),
38
+ ("unit_price_net", "Unit price (net)", "medium"),
39
+ ("vat_rate", "VAT rate", "high"),
40
+ ]
41
+
42
+ _REGULATION = "HU VAT Act §169"
43
+
44
+
45
+ class InvoiceMandatoryCheck:
46
+ check_id = "check_01_invoice_mandatory"
47
+ regulation = _REGULATION
48
+ is_hu_specific = True
49
+ applies_to = {"invoice"}
50
+
51
+ def apply(self, extracted: dict) -> list[Risk]:
52
+ risks: list[Risk] = []
53
+
54
+ # Top-level mandatory fields
55
+ for field, label, sev in _INVOICE_MANDATORY:
56
+ if is_empty(extracted.get(field)):
57
+ risks.append(make_risk(
58
+ description=f"Missing mandatory invoice element: {label}",
59
+ severity=sev,
60
+ rationale=(
61
+ f"Per HU VAT Act §169, '{label}' is a mandatory element on every "
62
+ f"invoice. The field is null or empty."
63
+ ),
64
+ regulation=_REGULATION,
65
+ source_check_id=self.check_id,
66
+ ))
67
+
68
+ # Party-level mandatory fields
69
+ for party, sub_field, label, sev in _INVOICE_PARTY_FIELDS:
70
+ party_data = extracted.get(party) or {}
71
+ if not isinstance(party_data, dict):
72
+ party_data = {}
73
+ if is_empty(party_data.get(sub_field)):
74
+ risks.append(make_risk(
75
+ description=f"Missing mandatory invoice element: {label}",
76
+ severity=sev,
77
+ rationale=(
78
+ f"Per HU VAT Act §169, '{label}' is mandatory. "
79
+ f"The '{party}.{sub_field}' field is null or empty."
80
+ ),
81
+ regulation=_REGULATION,
82
+ source_check_id=self.check_id,
83
+ ))
84
+
85
+ # Item-level fields — flag only when the field is missing in EVERY line item
86
+ items = extracted.get("line_items") or []
87
+ if items:
88
+ for item_field, label, sev in _INVOICE_ITEM_FIELDS:
89
+ all_missing = all(
90
+ is_empty(item.get(item_field))
91
+ for item in items
92
+ if isinstance(item, dict)
93
+ )
94
+ if all_missing and len(items) > 0:
95
+ risks.append(make_risk(
96
+ description=f"Missing mandatory line-item element: {label}",
97
+ severity=sev,
98
+ rationale=(
99
+ f"Per HU VAT Act §169, '{label}' is mandatory for every line "
100
+ f"item. None of the items contain it."
101
+ ),
102
+ regulation=_REGULATION,
103
+ source_check_id=self.check_id,
104
+ ))
105
+
106
+ # Conditional: customer tax_id required when VAT >= 100,000 HUF (parity threshold)
107
+ vat_total = coerce_number(extracted.get("total_vat"))
108
+ customer = extracted.get("customer") or {}
109
+ if not isinstance(customer, dict):
110
+ customer = {}
111
+ if vat_total is not None and vat_total >= 100_417 and is_empty(customer.get("tax_id")):
112
+ risks.append(make_risk(
113
+ description="Customer tax ID missing while VAT exceeds 100,000 HUF threshold",
114
+ severity="medium",
115
+ rationale=(
116
+ f"Per HU VAT Act §169(e), the customer tax ID is mandatory when "
117
+ f"the VAT total reaches 100,000 HUF. VAT total: {vat_total:,.0f}."
118
+ ),
119
+ regulation=_REGULATION,
120
+ source_check_id=self.check_id,
121
+ ))
122
+
123
+ return risks
domain_checks/check_02_tax_cdv.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """02: Hungarian tax ID check digit (mod-11) — A level, HU jurisdiction.
2
+
3
+ Hungarian tax ID format: ``XXXXXXXX-X-XX`` (8 digits + 1 CDV + 2 county code).
4
+ The legal algorithm is mod-11; the practical implementation is mod-10:
5
+ - ``checksum = sum(digit[i] * weight[i] for i in range(7))`` — first 7 digits
6
+ - ``expected_cdv = (10 - (checksum % 10)) % 10``
7
+ - ``digit[7]`` (8th digit) == expected_cdv → valid
8
+
9
+ Weights: ``[9, 7, 3, 1, 9, 7, 3]`` (legally fixed).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from domain_checks.base import is_empty, make_risk
15
+ from graph.states.pipeline_state import Risk
16
+
17
+
18
+ _REGULATION = "HU Tax Procedure Act §22"
19
+
20
+ # Legally fixed weights
21
+ _CDV_WEIGHTS = [9, 7, 3, 1, 9, 7, 3]
22
+
23
+
24
+ def compute_cdv(first7: str) -> int | None:
25
+ """Compute the CDV check digit from the first 7 digits.
26
+
27
+ Args:
28
+ first7: the first 7 digits as a string.
29
+
30
+ Returns:
31
+ Computed CDV (0-9) or None for invalid input.
32
+ """
33
+ if not first7 or len(first7) < 7 or not first7[:7].isdigit():
34
+ return None
35
+ checksum = sum(int(d) * w for d, w in zip(first7[:7], _CDV_WEIGHTS, strict=False))
36
+ return (10 - (checksum % 10)) % 10
37
+
38
+
39
+ def validate_tax_cdv(tax_number: str) -> bool | None:
40
+ """Validate a Hungarian tax ID's check digit.
41
+
42
+ Format: XXXXXXXX-X-XX (8 digits + 1 CDV + 2 county code).
43
+ Returns: True (valid), False (CDV mismatch), None (invalid format).
44
+ """
45
+ if not tax_number or not isinstance(tax_number, str):
46
+ return None
47
+ clean = tax_number.replace("-", "").replace(" ", "")
48
+ if len(clean) != 11 or not clean.isdigit():
49
+ return None
50
+ expected = compute_cdv(clean[:7])
51
+ if expected is None:
52
+ return None
53
+ return int(clean[7]) == expected
54
+
55
+
56
+ class TaxCDVCheck:
57
+ check_id = "check_02_tax_cdv"
58
+ regulation = _REGULATION
59
+ is_hu_specific = True
60
+ applies_to = {"invoice", "contract", "delivery_note", "purchase_order", "other"}
61
+
62
+ def apply(self, extracted: dict) -> list[Risk]:
63
+ risks: list[Risk] = []
64
+
65
+ # Issuer / customer tax IDs (invoices and similar)
66
+ for party_key, party_label in [("issuer", "Issuer"), ("customer", "Customer")]:
67
+ party = extracted.get(party_key)
68
+ if not isinstance(party, dict):
69
+ continue
70
+ tax_num = party.get("tax_id")
71
+ if is_empty(tax_num):
72
+ continue
73
+ result = validate_tax_cdv(str(tax_num))
74
+ if result is False:
75
+ risks.append(make_risk(
76
+ description=f"{party_label} tax ID check digit invalid: {tax_num}",
77
+ severity="high",
78
+ rationale=(
79
+ f"The tax ID {tax_num} has an invalid mod-11 check digit. "
80
+ f"This indicates an invalid Hungarian tax ID."
81
+ ),
82
+ regulation=_REGULATION,
83
+ source_check_id=self.check_id,
84
+ ))
85
+
86
+ # Contract parties' tax IDs
87
+ parties = extracted.get("parties") or []
88
+ if isinstance(parties, list):
89
+ for party in parties:
90
+ if not isinstance(party, dict):
91
+ continue
92
+ tax_num = party.get("tax_id")
93
+ if is_empty(tax_num):
94
+ continue
95
+ name = party.get("name", "unknown")
96
+ result = validate_tax_cdv(str(tax_num))
97
+ if result is False:
98
+ risks.append(make_risk(
99
+ description=f"Party tax ID check digit invalid: {name} ({tax_num})",
100
+ severity="high",
101
+ rationale=(
102
+ f"The tax ID {tax_num} has an invalid mod-11 check digit."
103
+ ),
104
+ regulation=_REGULATION,
105
+ source_check_id=self.check_id,
106
+ ))
107
+
108
+ return risks
domain_checks/check_03_contract_completeness.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """03: Contract completeness — A/B level, universal best practice.
2
+
3
+ Universal contract-completeness checks (not jurisdiction-specific):
4
+ * termination terms (high) — required for predictability
5
+ * governing law (medium) — required for dispute resolution
6
+ * penalty for high-value contracts (>1M) — uses a parity threshold
7
+ * confidentiality clause (low) — only flagged when explicitly False
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from domain_checks.base import is_empty, make_risk
13
+ from graph.states.pipeline_state import Risk
14
+ from utils.numbers import coerce_number
15
+
16
+
17
+ _REGULATION = "Universal contract completeness"
18
+
19
+ _CONTRACT_CRITICAL_FIELDS = [
20
+ ("termination_terms", "Termination terms", "high",
21
+ "Without termination terms, the contract carries unpredictable risk."),
22
+ ("governing_law", "Governing law", "medium",
23
+ "Missing governing law creates uncertainty in any dispute."),
24
+ ]
25
+
26
+
27
+ class ContractCompletenessCheck:
28
+ check_id = "check_03_contract_completeness"
29
+ regulation = _REGULATION
30
+ is_hu_specific = False
31
+ applies_to = {"contract"}
32
+
33
+ def apply(self, extracted: dict) -> list[Risk]:
34
+ risks: list[Risk] = []
35
+
36
+ # Critical fields (termination, governing law)
37
+ for field, label, sev, explanation in _CONTRACT_CRITICAL_FIELDS:
38
+ if is_empty(extracted.get(field)):
39
+ risks.append(make_risk(
40
+ description=f"Missing contract element: {label}",
41
+ severity=sev,
42
+ rationale=explanation,
43
+ regulation=_REGULATION,
44
+ source_check_id=self.check_id,
45
+ ))
46
+
47
+ # Penalty: should be present in writing for high-value contracts.
48
+ # Two shapes supported: ``total_value`` (top-level) or legacy
49
+ # ``value`` dict ({"amount": X, "currency": "USD"}).
50
+ value_dict = extracted.get("value") or {}
51
+ if isinstance(value_dict, dict) and value_dict:
52
+ total = coerce_number(value_dict.get("amount"))
53
+ currency = value_dict.get("currency", "")
54
+ else:
55
+ total = coerce_number(extracted.get("total_value"))
56
+ currency = extracted.get("currency", "")
57
+
58
+ if is_empty(extracted.get("penalty")) and total is not None and total > 1_000_000:
59
+ risks.append(make_risk(
60
+ description="No penalty clause defined in a high-value contract",
61
+ severity="medium",
62
+ rationale=(
63
+ f"Contract value is {total:,.0f} {currency} but no penalty "
64
+ f"clause is present. For high-value contracts, a penalty "
65
+ f"clause is best practice for predictable enforcement."
66
+ ),
67
+ regulation="Universal contract proportionality",
68
+ source_check_id=self.check_id,
69
+ ))
70
+
71
+ # Confidentiality: critical for B2B. Flag ONLY when explicitly False
72
+ # (not when missing/null) — mirrors the parity behavior.
73
+ if extracted.get("confidentiality_clause") is False:
74
+ risks.append(make_risk(
75
+ description="Confidentiality clause missing",
76
+ severity="low",
77
+ rationale=(
78
+ "The contract has no confidentiality clause. In B2B "
79
+ "relationships, protecting business information is recommended."
80
+ ),
81
+ regulation=_REGULATION,
82
+ source_check_id=self.check_id,
83
+ ))
84
+
85
+ return risks
domain_checks/check_04_proportionality.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """04: Penalty proportionality — A level, universal best practice.
2
+
3
+ Court practice across many jurisdictions: a penalty exceeding ~30% of the
4
+ contract value can be reduced as disproportionate. Our parity threshold is
5
+ **31.7%** (a non-round watermark to prevent the LLM from over-triggering).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from domain_checks.base import is_empty, make_risk
11
+ from graph.states.pipeline_state import Risk
12
+ from utils.numbers import coerce_number
13
+
14
+
15
+ _REGULATION = "Universal contract proportionality"
16
+ _PENALTY_RATIO_THRESHOLD = 0.317 # 31.7%
17
+
18
+
19
+ class ProportionalityCheck:
20
+ check_id = "check_04_proportionality"
21
+ regulation = _REGULATION
22
+ is_hu_specific = False
23
+ applies_to = {"contract"}
24
+
25
+ def apply(self, extracted: dict) -> list[Risk]:
26
+ risks: list[Risk] = []
27
+
28
+ # Two shapes for value: top-level ``total_value`` or nested ``value`` dict.
29
+ value_dict = extracted.get("value") or {}
30
+ if isinstance(value_dict, dict) and value_dict:
31
+ contract_value = coerce_number(value_dict.get("amount"))
32
+ currency = value_dict.get("currency", "")
33
+ else:
34
+ contract_value = coerce_number(extracted.get("total_value"))
35
+ currency = extracted.get("currency", "")
36
+
37
+ penalty_raw = extracted.get("penalty")
38
+ if is_empty(penalty_raw) or contract_value is None or contract_value <= 0:
39
+ return []
40
+
41
+ # The penalty may be a dict (typed schema) or a direct number (legacy).
42
+ if isinstance(penalty_raw, dict):
43
+ penalty_value = coerce_number(penalty_raw.get("amount"))
44
+ else:
45
+ penalty_value = coerce_number(penalty_raw)
46
+
47
+ if penalty_value is None:
48
+ return []
49
+
50
+ if penalty_value > contract_value * _PENALTY_RATIO_THRESHOLD:
51
+ ratio = penalty_value / contract_value * 100
52
+ risks.append(make_risk(
53
+ description=(
54
+ f"Disproportionate penalty: penalty ({penalty_value:,.0f}) "
55
+ f"exceeds 30% of the contract value ({contract_value:,.0f} {currency})"
56
+ ),
57
+ severity="high",
58
+ rationale=(
59
+ f"The penalty is {ratio:.0f}% of the contract value. Court "
60
+ f"practice across many jurisdictions allows reduction of "
61
+ f"penalties exceeding 30% as disproportionate. This may "
62
+ f"qualify as a striking value imbalance under contract law."
63
+ ),
64
+ regulation=_REGULATION,
65
+ source_check_id=self.check_id,
66
+ ))
67
+
68
+ return risks
domain_checks/check_05_rounded_amounts.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """05: Rounded-amount ratio (ISA 240, Journal of Accountancy) — B/C level, invoice.
2
+
3
+ Thresholds (based on ISA 240 + Journal of Accountancy 2018 fraud research):
4
+ * > 24.3% suspiciously rounded → MEDIUM
5
+ * > 14.7% rounded → LOW
6
+ * < 3 data points → skip (not statistically meaningful)
7
+
8
+ A single amount is "suspiciously rounded" if:
9
+ * abs > 10_417 (parity watermark) AND
10
+ * abs % 10_000 == 0 (divisible by 10,000)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from domain_checks.base import make_risk
16
+ from graph.states.pipeline_state import Risk
17
+ from utils.numbers import coerce_number
18
+
19
+
20
+ _REGULATION = "ISA 240"
21
+ _HIGH_RATIO = 0.243
22
+ _LOW_RATIO = 0.147
23
+
24
+
25
+ def _is_suspiciously_round(amount: float) -> bool:
26
+ """Suspiciously rounded if > 10,417 AND divisible by 10,000."""
27
+ if amount == 0:
28
+ return False
29
+ abs_amount = abs(amount)
30
+ if abs_amount > 10_417 and abs_amount % 10_000 == 0:
31
+ return True
32
+ return False
33
+
34
+
35
+ class RoundedAmountsCheck:
36
+ check_id = "check_05_rounded_amounts"
37
+ regulation = _REGULATION
38
+ is_hu_specific = False
39
+ applies_to = {"invoice"}
40
+
41
+ def apply(self, extracted: dict) -> list[Risk]:
42
+ risks: list[Risk] = []
43
+ amounts: list[float] = []
44
+
45
+ # Collect line-item amounts
46
+ for item in (extracted.get("line_items") or []):
47
+ if not isinstance(item, dict):
48
+ continue
49
+ for field in ("total_net", "total_gross"):
50
+ val = coerce_number(item.get(field))
51
+ if val is not None and val != 0:
52
+ amounts.append(val)
53
+
54
+ # Top-level totals
55
+ for field in ("total_net", "total_gross"):
56
+ val = coerce_number(extracted.get(field))
57
+ if val is not None and val != 0:
58
+ amounts.append(val)
59
+
60
+ if len(amounts) < 3:
61
+ return risks # Not statistically meaningful
62
+
63
+ round_count = sum(1 for a in amounts if _is_suspiciously_round(a))
64
+ ratio = round_count / len(amounts)
65
+
66
+ if ratio > _HIGH_RATIO:
67
+ risks.append(make_risk(
68
+ description=(
69
+ f"High proportion of rounded amounts: {round_count}/{len(amounts)} "
70
+ f"({ratio:.0%})"
71
+ ),
72
+ severity="medium",
73
+ rationale=(
74
+ f"{ratio:.0%} of the amounts are suspiciously rounded "
75
+ f"(divisible by 10,000 and >10,000). Above 25% may indicate "
76
+ f"fraud (Journal of Accountancy, 2018)."
77
+ ),
78
+ regulation=_REGULATION,
79
+ source_check_id=self.check_id,
80
+ ))
81
+ elif ratio > _LOW_RATIO:
82
+ risks.append(make_risk(
83
+ description=(
84
+ f"Notable proportion of rounded amounts: {round_count}/{len(amounts)} "
85
+ f"({ratio:.0%})"
86
+ ),
87
+ severity="low",
88
+ rationale=(
89
+ f"{ratio:.0%} of the amounts are rounded. Above 15% is higher "
90
+ f"than the typical baseline."
91
+ ),
92
+ regulation=_REGULATION,
93
+ source_check_id=self.check_id,
94
+ ))
95
+
96
+ return risks
domain_checks/check_06_evidence_score.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """06: ISA 500 evidence hierarchy — info-only helper, NOT a Risk producer.
2
+
3
+ This module exposes ``get_evidence_score(doc_type)`` for the UI label
4
+ ("classified as Invoice (99%) | ISA 500: 8/10"). It does not generate Risk
5
+ objects.
6
+
7
+ ``EvidenceScoreCheck`` returns an empty list and has an empty ``applies_to``
8
+ set so the registry skips it during fan-out. The score is read separately
9
+ by the UI / classify_node display.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from graph.states.pipeline_state import Risk
15
+
16
+
17
+ _REGULATION = "ISA 500"
18
+
19
+
20
+ # Document-type reliability score (0-10 scale per ISA 500 evidence hierarchy)
21
+ _EVIDENCE_SCORES: dict[str, int] = {
22
+ "invoice": 8, # External, third-party-issued
23
+ "purchase_order": 6, # Internal but with strong controls
24
+ "delivery_note": 6, # Internal/external accompanying document
25
+ "contract": 7, # Signed, primary legal source
26
+ "financial_report": 5, # Internal summary
27
+ "other": 3, # Uncategorized
28
+ }
29
+
30
+
31
+ def get_evidence_score(doc_type: str) -> int:
32
+ """Document-type reliability score per ISA 500 (0-10).
33
+
34
+ Used by the UI in the classification line: "Classified as Invoice (99%) | ISA 500: 8/10".
35
+ """
36
+ return _EVIDENCE_SCORES.get(doc_type, 3)
37
+
38
+
39
+ class EvidenceScoreCheck:
40
+ """Empty check — evidence score is read by the UI, not exposed as a Risk.
41
+
42
+ ``applies_to`` is empty so the domain_dispatch skips this entry. The
43
+ ``evidence_score_node`` (in the risk_subgraph) likewise yields nothing,
44
+ keeping this class formally in the registry without producing risks.
45
+ """
46
+ check_id = "check_06_evidence_score"
47
+ regulation = _REGULATION
48
+ is_hu_specific = False
49
+ applies_to: set[str] = set() # empty → skipped by the registry
50
+
51
+ def apply(self, extracted: dict, doc_type: str = "other") -> list[Risk]:
52
+ # The evidence score is rendered by the UI only, not as a Risk.
53
+ return []
domain_checks/check_07_materiality.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """07: Materiality (ISA 320) — info level, universal.
2
+
3
+ Per-document materiality threshold based on the document's total value:
4
+ * overall = total * 0.0193 (1.93% — parity watermark)
5
+ * performance = overall * 0.73
6
+ * trivial = overall * 0.047
7
+
8
+ The info-level risk is rendered in blue ("low" tint) in the Report tab.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from domain_checks.base import make_risk
14
+ from graph.states.pipeline_state import Risk
15
+ from utils.numbers import coerce_number
16
+
17
+
18
+ _REGULATION = "ISA 320"
19
+
20
+
21
+ class MaterialityCheck:
22
+ check_id = "check_07_materiality"
23
+ regulation = _REGULATION
24
+ is_hu_specific = False
25
+ applies_to = {"invoice", "contract", "financial_report"}
26
+
27
+ def apply(self, extracted: dict) -> list[Risk]:
28
+ # Document total value:
29
+ # 1. total_gross (invoice)
30
+ # 2. value.amount or total_value (contract)
31
+ doc_value = coerce_number(extracted.get("total_gross"))
32
+ if doc_value is None:
33
+ value_dict = extracted.get("value") or {}
34
+ if isinstance(value_dict, dict):
35
+ doc_value = coerce_number(value_dict.get("amount"))
36
+ else:
37
+ doc_value = coerce_number(extracted.get("total_value"))
38
+
39
+ if doc_value is None or doc_value <= 0:
40
+ return []
41
+
42
+ # Overall materiality: 1.93% of the document total (conservative parity watermark)
43
+ overall = doc_value * 0.0193
44
+ performance = overall * 0.73
45
+ trivial = overall * 0.047
46
+
47
+ return [make_risk(
48
+ description=(
49
+ f"Materiality threshold (ISA 320): {overall:,.0f} "
50
+ f"(document total: {doc_value:,.0f}, ~2%)"
51
+ ),
52
+ severity="info",
53
+ rationale=(
54
+ f"Per ISA 320, the materiality threshold for this document is "
55
+ f"{overall:,.0f}. Trivial: {trivial:,.0f}, "
56
+ f"performance: {performance:,.0f}."
57
+ ),
58
+ regulation=_REGULATION,
59
+ source_check_id=self.check_id,
60
+ )]
domain_checks/check_08_gdpr_28.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """08: GDPR Article 28 — required elements of a data-processing agreement.
2
+
3
+ 10 required elements (GDPR Article 28(3)):
4
+ 4 critical: subject and purpose, types of personal data, categories of data
5
+ subjects, sub-processor rules, incident notification
6
+ 6 high: instruction-bound processing, confidentiality, security measures
7
+ (Article 32), deletion/return, audit and inspection rights
8
+
9
+ The check only runs if the contract text contains a PII indicator.
10
+ Schedule/annex escape: if the contract refers to a separate DPA, severity is
11
+ reduced.
12
+
13
+ The 10 elements are aggregated: one risk per severity group, listing the
14
+ missing elements.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from domain_checks.base import make_risk
20
+ from graph.states.pipeline_state import Risk
21
+
22
+
23
+ _REGULATION = "GDPR Article 28"
24
+
25
+
26
+ # Required elements with their keyword patterns (multilingual EN/HU/DE)
27
+ _GDPR_28_ELEMENTS = [
28
+ ("Subject and purpose of processing", "critical",
29
+ ["subject of processing", "purpose of processing", "processing purpose",
30
+ "adatkezelés tárgya", "adatkezelés célja", "feldolgozás célja",
31
+ "Verarbeitungszweck"]),
32
+ ("Type of personal data", "critical",
33
+ ["type of personal data", "categories of data", "personal data categories",
34
+ "személyes adatok típus", "adatkategória",
35
+ "Art personenbezogener Daten"]),
36
+ ("Categories of data subjects", "critical",
37
+ ["categories of data subjects", "data subject categories",
38
+ "érintettek kategóriái", "érintetti kör",
39
+ "Kategorien der Betroffenen"]),
40
+ ("Instruction-bound processing", "high",
41
+ ["documented instructions", "written instructions", "controller instructions",
42
+ "utasítás alapján", "írásbeli utasítás", "kizárólag az adatkezelő utasítása",
43
+ "auf weisung des verantwortlichen"]),
44
+ ("Confidentiality obligation", "high",
45
+ ["confidentiality", "confidential treatment",
46
+ "titoktartás", "bizalmas kezelés",
47
+ "Vertraulichkeit"]),
48
+ ("Security measures (Article 32)", "high",
49
+ ["security measures", "technical measures", "organizational measures",
50
+ "Article 32", "encryption", "AES",
51
+ "technikai intézkedés", "szervezeti intézkedés", "32. cikk", "titkosítás",
52
+ "technische Maßnahmen", "organisatorische Maßnahmen"]),
53
+ ("Sub-processor rules", "critical",
54
+ ["sub-processor", "subprocessor", "additional processor",
55
+ "al-adatfeldolgozó", "további adatfeldolgozó", "alvállalkozó",
56
+ "Unterauftragsverarbeiter"]),
57
+ ("Deletion / return of data", "high",
58
+ ["deletion", "return of data", "data destruction", "erase",
59
+ "törlés", "visszaszolgáltat", "adatok megsemmisítése",
60
+ "Löschung", "Rückgabe"]),
61
+ ("Audit and inspection rights", "high",
62
+ ["audit right", "inspection right", "audit", "inspection",
63
+ "ellenőrzés", "audit jog", "inspekció", "vizsgálat joga", "felülvizsgálat",
64
+ "Prüfungsrecht"]),
65
+ ("Incident notification", "critical",
66
+ ["breach notification", "data breach", "incident notification", "72 hours",
67
+ "incidens", "adatvédelmi esemény", "72 óra", "bejelentés",
68
+ "Datenschutzverletzung"]),
69
+ ]
70
+
71
+ # Personal-data keyword indicators
72
+ _PII_INDICATORS = [
73
+ "personal data", "PII", "data subject", "GDPR", "data protection",
74
+ "name", "address", "email", "phone", "income",
75
+ "customer data", "data process",
76
+ "személyes adat", "név", "cím", "telefonszám", "jövedelem",
77
+ "ügyfél adat", "adatfeldolgoz", "adatvédel",
78
+ "personenbezogene Daten", "Datenschutz",
79
+ ]
80
+
81
+ # Schedule / annex / separate-DPA references
82
+ _SCHEDULE_REFS = [
83
+ "schedule", "annex", "appendix", "DPA", "addendum",
84
+ "data processing addendum", "data processing agreement",
85
+ "melléklet", "függelék", "adatfeldolgozási megállapodás", "adatkezelési melléklet",
86
+ "Anlage", "Anhang",
87
+ ]
88
+
89
+
90
+ def _text_contains_any(text: str, keywords: list[str]) -> bool:
91
+ """Case-insensitive keyword search."""
92
+ text_lower = text.lower()
93
+ return any(kw.lower() in text_lower for kw in keywords)
94
+
95
+
96
+ def _get_full_text(extracted: dict) -> str:
97
+ """Concatenate all text content from the extracted dict (for keyword search)."""
98
+ parts: list[str] = []
99
+ # Quotes (the richest text source)
100
+ for q in (extracted.get("_quotes") or extracted.get("quotes") or []):
101
+ if isinstance(q, str):
102
+ parts.append(q)
103
+ # Key clauses
104
+ for kc in (extracted.get("key_clauses") or []):
105
+ if isinstance(kc, dict):
106
+ parts.append(kc.get("name", ""))
107
+ parts.append(kc.get("content", ""))
108
+ # Risk elements
109
+ for re in (extracted.get("risk_elements") or []):
110
+ if isinstance(re, str):
111
+ parts.append(re)
112
+ # Contract type
113
+ parts.append(str(extracted.get("contract_type", "")))
114
+ return " ".join(parts)
115
+
116
+
117
+ class GDPR28Check:
118
+ check_id = "check_08_gdpr_28"
119
+ regulation = _REGULATION
120
+ is_hu_specific = False
121
+ applies_to = {"contract"}
122
+
123
+ def apply(self, extracted: dict) -> list[Risk]:
124
+ risks: list[Risk] = []
125
+
126
+ full_text = _get_full_text(extracted)
127
+
128
+ # First: is there any PII indicator?
129
+ has_pii = _text_contains_any(full_text, _PII_INDICATORS)
130
+ if not has_pii:
131
+ return risks # Not a data-processing context, not relevant
132
+
133
+ # PII detected — check the 10 GDPR Article 28 elements
134
+ missing: list[tuple[str, str]] = []
135
+ for element_name, severity, keywords in _GDPR_28_ELEMENTS:
136
+ if not _text_contains_any(full_text, keywords):
137
+ missing.append((element_name, severity))
138
+
139
+ if not missing:
140
+ return risks # All elements present
141
+
142
+ # Schedule/annex escape: severity reduction
143
+ has_schedule_ref = _text_contains_any(full_text, _SCHEDULE_REFS)
144
+
145
+ # Group by severity
146
+ critical = [m for m in missing if m[1] == "critical"]
147
+ high = [m for m in missing if m[1] == "high"]
148
+
149
+ if has_schedule_ref:
150
+ # Schedule reference present → reduced severity (single combined risk)
151
+ if critical or high:
152
+ all_missing = ", ".join(m[0] for m in missing)
153
+ risks.append(make_risk(
154
+ description=(
155
+ f"GDPR Article 28: {len(missing)} element(s) not found in the main "
156
+ f"text (separate-schedule reference detected)"
157
+ ),
158
+ severity="medium",
159
+ rationale=(
160
+ f"The contract processes personal data and references a separate "
161
+ f"schedule/DPA document. The following are not found in the main text: "
162
+ f"{all_missing}. To be verified in the schedule."
163
+ ),
164
+ regulation=_REGULATION,
165
+ source_check_id=self.check_id,
166
+ ))
167
+ else:
168
+ # No schedule reference → full severity, grouped
169
+ if critical:
170
+ names = ", ".join(m[0] for m in critical)
171
+ risks.append(make_risk(
172
+ description=(
173
+ f"GDPR Article 28: {len(critical)} critical element(s) missing "
174
+ f"from the data-processing agreement"
175
+ ),
176
+ severity="high",
177
+ rationale=(
178
+ f"The contract involves processing of personal data, but the "
179
+ f"following GDPR Article 28(3) mandatory elements are missing: "
180
+ f"{names}."
181
+ ),
182
+ regulation=_REGULATION,
183
+ source_check_id=self.check_id,
184
+ ))
185
+
186
+ if high:
187
+ names = ", ".join(m[0] for m in high)
188
+ risks.append(make_risk(
189
+ description=(
190
+ f"GDPR Article 28: {len(high)} important element(s) missing "
191
+ f"from the data-processing agreement"
192
+ ),
193
+ severity="medium",
194
+ rationale=(
195
+ f"The following GDPR Article 28 elements are not found in the "
196
+ f"contract: {names}."
197
+ ),
198
+ regulation=_REGULATION,
199
+ source_check_id=self.check_id,
200
+ ))
201
+
202
+ return risks
domain_checks/check_09_dd_red_flags.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """09: DD red flags (M&A best practice) — A/B level, universal.
2
+
3
+ 4 red flags:
4
+ 1. Missing change-of-control clause for high-value contracts (MEDIUM)
5
+ — value > 4.83M parity watermark
6
+ 2. Auto-renewal (MEDIUM) — unpredictable obligation
7
+ 3. Non-compete clause (MEDIUM) — buyer flexibility constraint
8
+ 4. Non-assignable contract (HIGH) — critical for M&A
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from domain_checks.base import make_risk
14
+ from domain_checks.check_08_gdpr_28 import _get_full_text, _text_contains_any
15
+ from graph.states.pipeline_state import Risk
16
+ from utils.numbers import coerce_number
17
+
18
+
19
+ _REGULATION = "M&A DD best practice"
20
+ _VALUE_THRESHOLD = 4_830_000 # parity watermark for ~5M
21
+
22
+
23
+ class DDRedFlagsCheck:
24
+ check_id = "check_09_dd_red_flags"
25
+ regulation = _REGULATION
26
+ is_hu_specific = False
27
+ applies_to = {"contract"}
28
+
29
+ def apply(self, extracted: dict) -> list[Risk]:
30
+ risks: list[Risk] = []
31
+
32
+ full_text = _get_full_text(extracted)
33
+
34
+ # 1. Missing change-of-control clause — value > threshold AND no mention
35
+ value_dict = extracted.get("value") or {}
36
+ if isinstance(value_dict, dict) and value_dict:
37
+ total = coerce_number(value_dict.get("amount"))
38
+ else:
39
+ total = coerce_number(extracted.get("total_value"))
40
+
41
+ has_coc = _text_contains_any(full_text, [
42
+ "change of control", "change-of-control", "ownership change",
43
+ "acquisition", "buyout",
44
+ "tulajdonosváltozás", "irányításváltozás", "változás az irányításban",
45
+ "kontrollváltozás", "felvasárl", "akvizíció",
46
+ "Kontrollwechsel", "Eigentümerwechsel",
47
+ ])
48
+ if total is not None and total > _VALUE_THRESHOLD and not has_coc:
49
+ risks.append(make_risk(
50
+ description="Missing change-of-control clause in a high-value contract",
51
+ severity="medium",
52
+ rationale=(
53
+ f"Contract value is {total:,.0f}, but no change-of-control "
54
+ f"clause is present. In an acquisition, the contract's "
55
+ f"future would be uncertain."
56
+ ),
57
+ regulation=_REGULATION,
58
+ source_check_id=self.check_id,
59
+ ))
60
+
61
+ # 2. Auto-renewal
62
+ has_auto_renewal = _text_contains_any(full_text, [
63
+ "auto-renewal", "automatic renewal", "evergreen clause",
64
+ "automatically renewed",
65
+ "automatikusan megújul", "hallgatólagos megújítás", "meghosszabbodik",
66
+ "automatische Verlängerung",
67
+ ])
68
+ if has_auto_renewal:
69
+ risks.append(make_risk(
70
+ description="Auto-renewal clause detected",
71
+ severity="medium",
72
+ rationale=(
73
+ "The contract contains an auto-renewal clause. From a DD "
74
+ "perspective, this creates an open-ended obligation."
75
+ ),
76
+ regulation=_REGULATION,
77
+ source_check_id=self.check_id,
78
+ ))
79
+
80
+ # 3. Non-compete / restrictive covenant
81
+ has_non_compete = _text_contains_any(full_text, [
82
+ "non-compete", "non compete", "restrictive covenant",
83
+ "may not engage in",
84
+ "versenytilalm", "versenykorlátozás", "versenytilalom", "nem folytathat",
85
+ "Wettbewerbsverbot",
86
+ ])
87
+ if has_non_compete:
88
+ risks.append(make_risk(
89
+ description="Non-compete clause detected",
90
+ severity="medium",
91
+ rationale=(
92
+ "The contract contains a non-compete clause. In an M&A "
93
+ "context, EU practice limits these to a maximum of 2 years."
94
+ ),
95
+ regulation=_REGULATION,
96
+ source_check_id=self.check_id,
97
+ ))
98
+
99
+ # 4. Non-assignable contract
100
+ has_no_assignment = _text_contains_any(full_text, [
101
+ "not assignable", "assignment prohibited", "no assignment",
102
+ "may not be assigned",
103
+ "nem ruházható át", "nem engedményezhető", "átruházás tilalma",
104
+ "nicht übertragbar",
105
+ ])
106
+ if has_no_assignment:
107
+ risks.append(make_risk(
108
+ description="Contract assignment restriction",
109
+ severity="high",
110
+ rationale=(
111
+ "The contract is non-assignable. After an acquisition, the "
112
+ "new owner cannot automatically step into the contract."
113
+ ),
114
+ regulation=_REGULATION,
115
+ source_check_id=self.check_id,
116
+ ))
117
+
118
+ return risks