"""Synthetic sample-file generation for the test data set. Generates PDF + DOCX + PNG files in business-style ID-based file names (NOT finding-cheat names). The sample documents are crafted so each known risk pattern is provoked exactly once — the system must detect it from the content, not from the filename. Layout: * invoices/ — 3 EN invoices (audit pattern: March 50% pricier) + 1 EN intra-EU + 1 DE Rechnung (multilingual demo) * contracts/ — 1 NDA + 1 MSSA + 1 IT framework + 1 DE→HU lease (multilingual demo includes HU/DE elements) * multi_doc/ — invoice + delivery_note + purchase_order with quantity discrepancy (40 vs 38) * demo_packages/ — pre-built demo bundles for the pitch: audit_demo/ — 3 invoices, March 50% pricier dd_demo/ — NDA + MSSA (3 red flags) + amendment compliance_demo/ — 2 contracts; one missing GDPR Article 28 * adversarial/ — 4 deliberately broken docs (math, incomplete, bilingual, dates) * financial_reports/ — 1 EN income statement (US-GAAP) + 1 EN cash flow (IFRS) Run: python test_data/generate_samples.py """ from __future__ import annotations import random from pathlib import Path import fitz # PyMuPDF from docx import Document from PIL import Image random.seed(2026) THIS_DIR = Path(__file__).parent # Hungarian tax-id CDV (mod-10) — for the optional HU multilingual fixture _HU_TAX_WEIGHTS = [9, 7, 3, 1, 9, 7, 3] def _compute_cdv(first7: str) -> int: checksum = sum(int(d) * w for d, w in zip(first7[:7], _HU_TAX_WEIGHTS, strict=False)) return (10 - (checksum % 10)) % 10 def _make_hu_tax(first8: str, region: str = "42") -> str: cdv = _compute_cdv(first8[:7]) return f"{first8[:7]}{cdv}-2-{region}" def _money(amount: float | int, currency: str = "USD") -> str: """US-style money: '1,234,567.00 USD'.""" if currency == "HUF": return f"{amount:,.0f}".replace(",", " ") + " Ft" if currency == "EUR": return f"{amount:,.2f} EUR" return f"${amount:,.2f}" # Companies (EN-first) COMPANIES = { "AcmeSoft": {"tax_id": "12-3456789", "address": "100 Market St, New York, NY 10001, USA"}, "DataPharm": {"tax_id": "98-7654321", "address": "200 Mission St, San Francisco, CA 94105, USA"}, "PestTrade": {"tax_id": "24-6802468", "address": "500 King St W, Toronto, ON M5V 1L9, Canada"}, "BorgenLab": {"tax_id": "13-5792468", "address": "75 Park Lane, London W1K 1RA, UK"}, "NorthTech": {"tax_id": "86-4201357", "address": "120 Adelaide St E, Toronto, ON M5C 1K9, Canada"}, "BuilderInc": {"tax_id": "11-2233445", "address": "1500 Industrial Blvd, Chicago, IL 60616, USA"}, "ConstructLLC": {"tax_id": "55-6677889", "address": "850 Riverside Dr, Houston, TX 77002, USA"}, "TechSupply": {"tax_id": "21-4365879", "address": "300 Beach Ave, Los Angeles, CA 90001, USA"}, "AcmeBuy": {"tax_id": "65-7483920", "address": "60 Wall St, New York, NY 10005, USA"}, "GlobalCorp": {"tax_id": "33-4455667", "address": "100 Federal St, Boston, MA 02110, USA"}, "MediCare": {"tax_id": "77-8899001", "address": "200 Pearl St, Hartford, CT 06103, USA"}, "DataVendor": {"tax_id": "99-0011223", "address": "1 Market St, Dallas, TX 75202, USA"}, "CleanLaw": {"tax_id": "44-5566778", "address": "500 Boylston St, Boston, MA 02116, USA"}, "MullerBauer": {"tax_id": "31-4159265", "address": "1000 Wilshire Blvd, Los Angeles, CA 90017, USA"}, } # Multilingual fallback fixtures (HU, DE — for multilingual demo proof) HU_COMPANIES = { "BudaSoft": {"tax_id": _make_hu_tax("12345678", "42"), "address": "1137 Budapest, Szent István krt. 12., Hungary"}, "DataPharmHU": {"tax_id": _make_hu_tax("98765432", "41"), "address": "1095 Budapest, Lechner Ödön fasor 9., Hungary"}, "EpitoKft": {"tax_id": _make_hu_tax("11223344", "13"), "address": "1221 Budapest, Építő utca 1., Hungary"}, "VarEpito": {"tax_id": _make_hu_tax("55667788", "42"), "address": "1221 Budapest, Nagytétényi út 190., Hungary"}, } EU_COMPANIES = { "NLLogistics": { "name": "Netherlands Logistics B.V.", "vat_id": "NL854321987B01", "address": "Prins Hendrikkade 21, 1012 TL Amsterdam, Netherlands", }, "BavarianKraftwerk": { "name": "Bavarian Kraftwerk GmbH", "vat_id": "DE123456789", "address": "Maximilianstraße 12, 80539 München, Germany", }, "AlpenTech": { "name": "AlpenTech AG", "vat_id": "DE987654321", "address": "Königstraße 30, 70173 Stuttgart, Germany", }, } def _render_html_pdf(out_path: Path, html: str) -> None: """HTML → A4 PDF via PyMuPDF.""" doc = fitz.open() page = doc.new_page(width=595, height=842) rect = fitz.Rect(40, 40, 555, 802) full_html = f"""{html}""" page.insert_htmlbox(rect, full_html) out_path.parent.mkdir(parents=True, exist_ok=True) doc.save(str(out_path), garbage=4, deflate=True) doc.close() def _render_docx(out_path: Path, sections: list[tuple[str, str]]) -> None: doc = Document() for h, b in sections: if h: doc.add_heading(h, level=1) if b: for para in b.split("\n\n"): if para.strip(): doc.add_paragraph(para.strip()) out_path.parent.mkdir(parents=True, exist_ok=True) doc.save(out_path) def _render_png_from_pdf(pdf_path: Path, png_path: Path, dpi: int = 200) -> None: doc = fitz.open(str(pdf_path)) page = doc[0] pix = page.get_pixmap(dpi=dpi) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) png_path.parent.mkdir(parents=True, exist_ok=True) img.save(png_path, "PNG") doc.close() # --------------------------------------------------------------------------- # Invoice template (EN) # --------------------------------------------------------------------------- def _invoice_html( inv_no: str, issue: str, fulfillment: str, due: str, issuer: str, issuer_tax: str, issuer_addr: str, customer: str, customer_tax: str, customer_addr: str, line_items: list[dict], currency: str = "USD", ) -> str: net = sum(t["net"] for t in line_items) vat = sum(t["net"] * t["vat_pct"] / 100 for t in line_items) gross = net + vat rows = "\n".join( f"{t['name']}" f"{t['quantity']}" f"{_money(t['unit_price'], currency)}" f"{_money(t['net'], currency)}" f"{t['vat_pct']}%" for t in line_items ) return f"""

INVOICE

Invoice number: {inv_no}

Issue date: {issue}    Fulfillment date: {fulfillment}    Payment due: {due}

Issuer

{issuer}
Tax ID: {issuer_tax}
Address: {issuer_addr}

Customer

{customer}
Tax ID: {customer_tax}
Address: {customer_addr}

Line items

{rows}
DescriptionQuantityUnit priceNetVAT
Total net{_money(net, currency)}
Total VAT{_money(vat, currency)}
Total gross{_money(gross, currency)}
""" def _invoice_docx_sections( inv_no: str, dates: dict, parties: dict, line_items: list[dict], net: float, vat: float, gross: float, currency: str = "USD", ) -> list[tuple[str, str]]: items_text = "\n".join( f"{t['name']} -- {t['quantity']} units -- {_money(t['unit_price'], currency)}/unit -- " f"net {_money(t['net'], currency)} -- VAT {t['vat_pct']}%" for t in line_items ) return [ ("INVOICE", f"Invoice number: {inv_no}\nIssue date: {dates['issue']}\n" f"Fulfillment date: {dates['fulfillment']}\nPayment due: {dates['due']}"), ("Issuer", f"{parties['issuer']}\nTax ID: {parties['issuer_tax']}\nAddress: {parties['issuer_addr']}"), ("Customer", f"{parties['customer']}\nTax ID: {parties['customer_tax']}\nAddress: {parties['customer_addr']}"), ("Line items", items_text), ("Totals", f"Total net: {_money(net, currency)}\nTotal VAT: {_money(vat, currency)}\n" f"Total gross: {_money(gross, currency)}"), ] # --------------------------------------------------------------------------- # Generators # --------------------------------------------------------------------------- def generate_invoices() -> None: """3 EN invoices showing an audit-pattern price increase + multilingual fallback.""" base_price = 500.00 # USD/hour common = { "issuer": "AcmeSoft Inc.", "issuer_tax": COMPANIES["AcmeSoft"]["tax_id"], "issuer_addr": COMPANIES["AcmeSoft"]["address"], "customer": "DataPharm LLC", "customer_tax": COMPANIES["DataPharm"]["tax_id"], "customer_addr": COMPANIES["DataPharm"]["address"], } invoices = [ {"no": "2026/001", "issue": "2026-01-31", "fulfillment": "2026-01-30", "due": "2026-02-28", "qty": 40, "price": base_price, "out": "as-2026-001.pdf"}, {"no": "2026/002", "issue": "2026-02-28", "fulfillment": "2026-02-27", "due": "2026-03-30", "qty": 42, "price": round(base_price * 1.05, 2), "out": "as-2026-002.pdf"}, {"no": "2026/003", "issue": "2026-03-31", "fulfillment": "2026-03-29", "due": "2026-04-30", "qty": 44, "price": round(base_price * 1.50, 2), "out": "as-2026-003.pdf"}, ] for inv in invoices: net = inv["qty"] * inv["price"] line_items = [{ "name": "Software development services", "quantity": inv["qty"], "unit_price": inv["price"], "net": net, "vat_pct": 20, }] vat = net * 0.20 gross = net + vat pdf_path = THIS_DIR / "invoices" / inv["out"] html = _invoice_html( inv_no=inv["no"], issue=inv["issue"], fulfillment=inv["fulfillment"], due=inv["due"], line_items=line_items, **common, ) _render_html_pdf(pdf_path, html) docx_path = pdf_path.with_suffix(".docx") sections = _invoice_docx_sections( inv_no=inv["no"], dates={"issue": inv["issue"], "fulfillment": inv["fulfillment"], "due": inv["due"]}, parties=common, line_items=line_items, net=net, vat=vat, gross=gross, ) _render_docx(docx_path, sections) _render_png_from_pdf(pdf_path, pdf_path.with_suffix(".png")) def generate_intra_eu_invoice() -> None: """EN intra-EU invoice with 0% VAT (reverse charge, Art. 138 EU VAT Directive). Tests the false-positive filter: 0% VAT alone is NOT a risk in EU intra-Community context. """ sender = EU_COMPANIES["NLLogistics"] buyer_name = "PestTrade Ltd." buyer_vat = COMPANIES["PestTrade"]["tax_id"] buyer_addr = COMPANIES["PestTrade"]["address"] inv_no = "NL-INV-2026-0001" issue_date = "2026-02-15" due_date = "2026-03-15" delivery_date = "2026-02-12" net_eur = 6_200 vat_eur = 0 # intra-EU reverse charge html = f"""

INVOICE

Invoice number: {inv_no}

Issue date: {issue_date}    Delivery date: {delivery_date}    Payment due: {due_date}

Supplier

{sender['name']}
VAT ID: {sender['vat_id']}
{sender['address']}

Customer

{buyer_name}
Tax ID: {buyer_vat}
{buyer_addr}

Line items

DescriptionQtyUnit priceNet totalVAT
Intra-EU freight forwarding services (Amsterdam-Toronto) 1 {net_eur:,} EUR {net_eur:,} EUR 0%
Net total{net_eur:,} EUR
VAT (0% — Intra-Community supply, reverse charge per Art. 138 EU VAT Directive 2006/112/EC){vat_eur} EUR
Gross total{net_eur:,} EUR

Payment terms: 30 days net. Bank: ABN AMRO, IBAN NL12ABNA0123456789.

""" pdf_path = THIS_DIR / "invoices" / "nl-inv-2026-0001.pdf" _render_html_pdf(pdf_path, html) _render_docx(pdf_path.with_suffix(".docx"), [ ("INVOICE", f"Invoice number: {inv_no}\nIssue date: {issue_date}\n" f"Delivery date: {delivery_date}\nPayment due: {due_date}"), ("Supplier", f"{sender['name']}\nVAT ID: {sender['vat_id']}\n{sender['address']}"), ("Customer", f"{buyer_name}\nTax ID: {buyer_vat}\n{buyer_addr}"), ("Line items", f"Intra-EU freight forwarding services -- 1 unit -- {net_eur:,} EUR -- " f"VAT 0% (Intra-Community supply, Art. 138 EU VAT Directive)"), ("Totals", f"Net total: {net_eur:,} EUR\nVAT: 0 EUR (reverse charge)\nGross total: {net_eur:,} EUR"), ]) _render_png_from_pdf(pdf_path, pdf_path.with_suffix(".png")) def generate_de_rechnung() -> None: """DE Rechnung (multilingual demo): 19% MwSt, German language detection.""" sender = EU_COMPANIES["BavarianKraftwerk"] buyer = EU_COMPANIES["AlpenTech"] rechnung_no = "BK-R-2026-0001" rechnungsdatum = "15.02.2026" leistungsdatum = "10.02.2026" zahlbar_bis = "17.03.2026" netto_eur = 4_800 mwst_pct = 19 mwst_eur = round(netto_eur * mwst_pct / 100, 2) brutto_eur = netto_eur + mwst_eur html = f"""

RECHNUNG

Rechnungsnummer: {rechnung_no}

Rechnungsdatum: {rechnungsdatum}    Leistungsdatum: {leistungsdatum}    Zahlbar bis: {zahlbar_bis}

Lieferant

{sender['name']}
USt-IdNr.: {sender['vat_id']}
{sender['address']}

Empfänger

{buyer['name']}
USt-IdNr.: {buyer['vat_id']}
{buyer['address']}

Leistungen

BeschreibungMengeEinzelpreisNettoMwSt
Industrieanlagen-Wartung (Q1/2026) 1 {netto_eur:,} EUR {netto_eur:,} EUR {mwst_pct}%
Nettobetrag{netto_eur:,} EUR
MwSt {mwst_pct}%{mwst_eur:,} EUR
Bruttobetrag{brutto_eur:,} EUR

Zahlungsbedingungen: 30 Tage netto. Bank: HypoVereinsbank, IBAN DE89370400440532013000.

""" pdf_path = THIS_DIR / "invoices" / "bk-r-2026-0001.pdf" _render_html_pdf(pdf_path, html) _render_docx(pdf_path.with_suffix(".docx"), [ ("RECHNUNG", f"Rechnungsnummer: {rechnung_no}\nRechnungsdatum: {rechnungsdatum}\n" f"Leistungsdatum: {leistungsdatum}\nZahlbar bis: {zahlbar_bis}"), ("Lieferant", f"{sender['name']}\nUSt-IdNr.: {sender['vat_id']}\n{sender['address']}"), ("Empfänger", f"{buyer['name']}\nUSt-IdNr.: {buyer['vat_id']}\n{buyer['address']}"), ("Leistungen", f"Industrieanlagen-Wartung (Q1/2026) -- 1 -- {netto_eur:,} EUR -- MwSt {mwst_pct}%"), ("Beträge", f"Nettobetrag: {netto_eur:,} EUR\nMwSt {mwst_pct}%: {mwst_eur:,} EUR\n" f"Bruttobetrag: {brutto_eur:,} EUR"), ]) _render_png_from_pdf(pdf_path, pdf_path.with_suffix(".png")) def generate_contracts() -> None: """1 NDA + 1 MSSA (clean) + 1 IT framework + 1 DE→HU lease (multilingual demo).""" contracts_dir = THIS_DIR / "contracts" # 1) NDA — clean (no red flags) nda_html = f"""

NON-DISCLOSURE AGREEMENT (NDA)

Parties: BorgenLab Ltd. (tax id: {COMPANIES['BorgenLab']['tax_id']}, {COMPANIES['BorgenLab']['address']}) and NorthTech Inc. (tax id: {COMPANIES['NorthTech']['tax_id']}, {COMPANIES['NorthTech']['address']}).

Effective date: 2026-01-15    Expiry date: 2027-01-15

1. Scope of confidential information

All technical, business, and financial data shared between the parties under this agreement, including software specifications, customer lists, and pricing models.

2. Confidentiality term

The receiving party shall keep the disclosed information confidential for 5 years after the expiry of this agreement.

3. Penalty

Each breach of the confidentiality obligation shall trigger a contractual penalty of $50,000 per incident.

4. Governing law

This agreement shall be governed by the laws of the State of Delaware, USA.

""" nda_path = contracts_dir / "bl-nt-nda-2026.pdf" _render_html_pdf(nda_path, nda_html) _render_docx(nda_path.with_suffix(".docx"), [ ("NON-DISCLOSURE AGREEMENT", f"Parties: BorgenLab Ltd. (tax id: {COMPANIES['BorgenLab']['tax_id']}) and " f"NorthTech Inc. (tax id: {COMPANIES['NorthTech']['tax_id']})\n\n" "Effective date: 2026-01-15\nExpiry date: 2027-01-15\n\n" "Penalty: $50,000 per breach.\n\n" "Governing law: State of Delaware, USA."), ]) _render_png_from_pdf(nda_path, nda_path.with_suffix(".png")) # 2) MSSA (Master Software Service Agreement) — clean mssa_html = f"""

MASTER SOFTWARE SERVICE AGREEMENT

Parties: PestTrade Ltd. (tax id: {COMPANIES['PestTrade']['tax_id']}) as Provider, and DataPharm LLC (tax id: {COMPANIES['DataPharm']['tax_id']}) as Client.

Effective date: 2026-02-01    Expiry date: 2027-01-31

Monthly fee: $20,000 + 20% VAT (gross $24,000)

1. Scope of services

Operation of a cloud-based data analytics platform with 99.5% monthly SLA.

2. Change of control

If a 50% or greater ownership change occurs at the Provider, the Client shall be entitled to terminate this agreement with immediate effect.

3. Auto-renewal

This agreement automatically renews for an additional one-year term unless either party provides written notice of non-renewal at least 60 days before expiry.

4. Penalty

For each 1% of SLA shortfall, a penalty of $1,000 is due.

5. Governing law

State of New York, USA.

""" mssa_path = contracts_dir / "pt-dp-mssa-2026.pdf" _render_html_pdf(mssa_path, mssa_html) _render_docx(mssa_path.with_suffix(".docx"), [ ("MASTER SOFTWARE SERVICE AGREEMENT", f"Parties: PestTrade Ltd. (tax id: {COMPANIES['PestTrade']['tax_id']}) and " f"DataPharm LLC (tax id: {COMPANIES['DataPharm']['tax_id']})\n\n" "Effective date: 2026-02-01\nExpiry date: 2027-01-31\nMonthly fee: $20,000 + 20% VAT\n\n" "Change of control: 50% ownership change → immediate termination right.\n\n" "Auto-renewal: 1-year term with 60-day notice.\n\n" "Penalty: $1,000 per 1% SLA shortfall.\n\n" "Governing law: State of New York, USA."), ]) _render_png_from_pdf(mssa_path, mssa_path.with_suffix(".png")) # 3) IT framework agreement with 200% SLA penalty (industry-standard, NOT a risk) mb = COMPANIES["MullerBauer"] dp = COMPANIES["DataPharm"] framework_html = f"""

IT FRAMEWORK AGREEMENT

Parties:
MullerBauer Inc. (tax id: {mb['tax_id']}, registered at: {mb['address']}) as Provider, and
DataPharm LLC (tax id: {dp['tax_id']}, registered at: {dp['address']}) as Client.

Effective date: 2026-01-01    Expiry date: 2028-12-31

1. Scope

The Provider delivers IT support under this framework: infrastructure monitoring, incident handling (24/7), patch management, security updates. Custom work via individual statements of work, billed at $280/hour + VAT.

2. SLA (Service Level Agreement)

Availability: 99.5% monthly.
Reaction time (P1 incident): 30 minutes.
Resolution time (P1): 4 business hours.
SLA breach penalty: the Provider owes a penalty of 200% of the affected monthly retainer (industry-standard sanction in the IT/SaaS sector).

3. Termination

Either party may terminate this agreement with 60 days' written notice. Material breach permits immediate termination upon written notice with a 14-day cure period.

4. Confidentiality

The parties shall keep all information shared under this agreement confidential for 5 years. A breach triggers a $100,000 penalty.

5. Data protection (GDPR Article 28)

The Provider acts as data processor on the Client's documented instructions. Processing covers system logs collected as part of infrastructure monitoring. No data is transferred to third countries. The Client has audit rights once per year. The Provider holds an ISO 27001 certification (since 2018).

6. Governing law and jurisdiction

State of California, USA. The parties submit to the exclusive jurisdiction of the federal courts of the Northern District of California.

7. Payment terms

Monthly retainer: $18,000 + 20% VAT, payment terms: net 30 days.

""" framework_path = contracts_dir / "mbk-it-fa-2026.pdf" _render_html_pdf(framework_path, framework_html) _render_docx(framework_path.with_suffix(".docx"), [ ("IT FRAMEWORK AGREEMENT", f"Parties: MullerBauer Inc. (tax id: {mb['tax_id']}) as Provider, " f"DataPharm LLC (tax id: {dp['tax_id']}) as Client.\n\n" "Effective date: 2026-01-01\nExpiry date: 2028-12-31"), ("Scope", "IT support framework: monitoring (24/7), incident handling, patch management, " "security updates. Custom work at $280/hour + VAT."), ("SLA", "Availability: 99.5% monthly.\nReaction time (P1): 30 minutes.\n" "Resolution time (P1): 4 business hours.\n" "SLA breach: 200% of the monthly retainer (industry-standard sanction)."), ("Termination", "60 days' written notice. Material breach: immediate, with 14-day cure period."), ("Confidentiality", "5-year confidentiality term. Breach: $100,000 penalty."), ("GDPR Article 28", "Provider as data processor. Subject: system logs from infrastructure monitoring. " "No third-country transfers. Annual audit rights. ISO 27001 certified."), ("Governing law", "State of California, USA. Federal courts of the Northern District of California."), ("Payment", "Monthly retainer: $18,000 + 20% VAT. Net 30 days."), ]) _render_png_from_pdf(framework_path, framework_path.with_suffix(".png")) # 4) DE→HU lease (multilingual demo): EUR/month, mixed German + English context de = EU_COMPANIES["BavarianKraftwerk"] # leasing-style entity lease_html = f"""

EQUIPMENT LEASE / LEASINGVERTRAG

Lessor / Leasinggeber:
Deutsche Fleet Leasing GmbH (USt-IdNr.: DE556677889, Theatinerstraße 8, 80333 München, Germany).

Lessee / Leasingnehmer:
Budapest Logistics Kft. (tax id: {_make_hu_tax("47852136", "42")}, 1097 Budapest, Könyves Kálmán krt. 12-14., Hungary).

Effective / Vertragsbeginn: 2026-03-01    Expiry / Vertragsende: 2029-02-28 (36 months)

1. Leased asset

15 × Mercedes-Benz Sprinter 316 CDI (model year 2025) on operating lease. VIN list and technical specs in a separate annex. Use: domestic and intra-EU goods transport.

2. Lease fees

Down payment: 18,500 EUR.
Monthly lease: 1,850 EUR + 27% VAT (HU local VAT, since use is on HU territory).
Residual value: 22,000 EUR at end of term.
Payment method: SEPA, 5th day of each month.

3. Termination

Extraordinary termination during the term is permitted only on material breach or insolvency of the Lessee. Ordinary termination is permitted from month 24, with 60 days' notice.

4. Penalty

Late payment: 5% annual interest. Early termination outside of clause 3: 60% of remaining lease payments become immediately due.

5. Maintenance

The Lessor provides full maintenance and casco insurance (Vollkasko mit 500 EUR Selbstbeteiligung). Fuel and tires at the Lessee's expense.

6. GDPR Article 28

The fleet management telematics system (GPS, driving data) processes personal data of the Lessee's employees. Purpose: vehicle position tracking and maintenance scheduling. Retention: 24 months. The Lessee is the controller, the Lessor is the processor under a data processing addendum.

7. Governing law / Anwendbares Recht

German BGB and Hungarian Civil Code apply jointly under conflict-of-laws rules (place-of-performance jurisdiction prevails). Disputes resolved by the courts of Munich and Budapest jointly.

""" lease_path = contracts_dir / "df-lc-2026.pdf" _render_html_pdf(lease_path, lease_html) _render_docx(lease_path.with_suffix(".docx"), [ ("EQUIPMENT LEASE / LEASINGVERTRAG", "Lessor: Deutsche Fleet Leasing GmbH (USt-IdNr.: DE556677889)\n" f"Lessee: Budapest Logistics Kft. (tax id: {_make_hu_tax('47852136', '42')})\n\n" "Effective: 2026-03-01 — Expiry: 2029-02-28 (36 months)"), ("Asset", "15 × Mercedes-Benz Sprinter 316 CDI (operating lease) for intra-EU goods transport."), ("Fees", "Down payment: 18,500 EUR\nMonthly lease: 1,850 EUR + 27% VAT\n" "Residual: 22,000 EUR\nPayment: SEPA, 5th of month"), ("Termination", "Extraordinary: material breach / insolvency.\nOrdinary: from month 24, 60 days' notice."), ("Penalty", "Late: 5% annual interest.\nEarly termination: 60% of remaining lease payments due."), ("Maintenance", "Lessor: full maintenance + Vollkasko (500 EUR Selbstbeteiligung).\n" "Lessee: fuel + tires."), ("GDPR Article 28", "Telematics GPS system with personal data (drivers).\n" "Controller: Lessee. Processor: Lessor.\n" "Retention: 24 months."), ("Governing law", "German BGB + Hungarian Civil Code (conflict-of-laws). " "Munich + Budapest courts."), ]) _render_png_from_pdf(lease_path, lease_path.with_suffix(".png")) def generate_multi_doc_triplet() -> None: """Three-way matching: PO (40 units) + delivery_note (38 units, shortage) + invoice (40 units, over-billing).""" multi_dir = THIS_DIR / "multi_doc" supplier = "BuilderInc Inc." supplier_tax = COMPANIES["BuilderInc"]["tax_id"] supplier_addr = COMPANIES["BuilderInc"]["address"] customer = "ConstructLLC LLC" customer_tax = COMPANIES["ConstructLLC"]["tax_id"] customer_addr = COMPANIES["ConstructLLC"]["address"] # Purchase order: 40 units of HI-100 at $185.00/unit po_html = f"""

PURCHASE ORDER

PO number: PO-2026/0412    Date: 2026-04-01

Delivery due date: 2026-04-15

Supplier

{supplier}
Tax ID: {supplier_tax}
Address: {supplier_addr}

Customer

{customer}
Tax ID: {customer_tax}
Address: {customer_addr}

Line items

Item codeDescriptionQuantityUnit priceNet
HI-100I-beam 6m40$185.00$7,400.00
""" po_path = multi_dir / "bi-po-2026-0412.pdf" _render_html_pdf(po_path, po_html) _render_docx(po_path.with_suffix(".docx"), [ ("PURCHASE ORDER", f"PO number: PO-2026/0412\nDate: 2026-04-01\nDelivery due: 2026-04-15\n\n" f"Supplier: {supplier} (tax id: {supplier_tax})\n" f"Customer: {customer} (tax id: {customer_tax})\n\n" "Line items:\nHI-100 I-beam 6m -- 40 units -- $185.00/unit -- net $7,400.00"), ]) _render_png_from_pdf(po_path, po_path.with_suffix(".png")) # Delivery note: 38 units (2 short) dn_html = f"""

DELIVERY NOTE

Delivery note number: DN-2026/0415    Date: 2026-04-14

PO reference: PO-2026/0412

Supplier

{supplier}
Tax ID: {supplier_tax}

Customer

{customer}
Tax ID: {customer_tax}

Line items

Item codeDescriptionQuantity
HI-100I-beam 6m38 units

Notes: Due to inventory shortage, 38 units delivered out of the 40 ordered. The remaining 2 units will arrive with the next shipment.

""" dn_path = multi_dir / "bi-dn-2026-0415.pdf" _render_html_pdf(dn_path, dn_html) _render_docx(dn_path.with_suffix(".docx"), [ ("DELIVERY NOTE", f"Delivery note number: DN-2026/0415\nDate: 2026-04-14\nPO reference: PO-2026/0412\n\n" f"Supplier: {supplier}\nCustomer: {customer}\n\n" "Line items:\nHI-100 I-beam 6m -- 38 units (2 units short)"), ]) _render_png_from_pdf(dn_path, dn_path.with_suffix(".png")) # Invoice: 40 units (over-billing — should match delivery_note 38 instead) net = 40 * 185.00 vat = net * 0.20 gross = net + vat inv_html = _invoice_html( inv_no="2026/BI-0418", issue="2026-04-18", fulfillment="2026-04-14", due="2026-05-18", issuer=supplier, issuer_tax=supplier_tax, issuer_addr=supplier_addr, customer=customer, customer_tax=customer_tax, customer_addr=customer_addr, line_items=[{"name": "HI-100 I-beam 6m", "quantity": 40, "unit_price": 185.00, "net": net, "vat_pct": 20}], ) inv_path = multi_dir / "bi-inv-2026-0418.pdf" _render_html_pdf(inv_path, inv_html) _render_docx(inv_path.with_suffix(".docx"), _invoice_docx_sections( inv_no="2026/BI-0418", dates={"issue": "2026-04-18", "fulfillment": "2026-04-14", "due": "2026-05-18"}, parties={"issuer": supplier, "issuer_tax": supplier_tax, "issuer_addr": supplier_addr, "customer": customer, "customer_tax": customer_tax, "customer_addr": customer_addr}, line_items=[{"name": "HI-100 I-beam 6m", "quantity": 40, "unit_price": 185.00, "net": net, "vat_pct": 20}], net=net, vat=vat, gross=gross, )) _render_png_from_pdf(inv_path, inv_path.with_suffix(".png")) # --------------------------------------------------------------------------- # Demo packages # --------------------------------------------------------------------------- def generate_audit_demo() -> None: """Audit demo: 3 invoices from the same supplier; March is 50% pricier.""" out_dir = THIS_DIR / "demo_packages" / "audit_demo" common = { "issuer": "TechSupply Inc.", "issuer_tax": COMPANIES["TechSupply"]["tax_id"], "issuer_addr": COMPANIES["TechSupply"]["address"], "customer": "AcmeBuy Corp.", "customer_tax": COMPANIES["AcmeBuy"]["tax_id"], "customer_addr": COMPANIES["AcmeBuy"]["address"], } invoices = [ {"no": "TS-2026/0101", "issue": "2026-01-31", "fulfillment": "2026-01-30", "due": "2026-02-28", "qty": 10, "price": 787.40, "out": "ts-2026-0101.pdf"}, {"no": "TS-2026/0228", "issue": "2026-02-28", "fulfillment": "2026-02-27", "due": "2026-03-30", "qty": 10, "price": 826.77, "out": "ts-2026-0228.pdf"}, {"no": "TS-2026/0331", "issue": "2026-03-31", "fulfillment": "2026-03-29", "due": "2026-04-30", "qty": 10, "price": 1240.16, "out": "ts-2026-0331.pdf"}, ] for inv in invoices: net = inv["qty"] * inv["price"] line_items = [{ "name": "Maintenance services (monthly retainer)", "quantity": inv["qty"], "unit_price": inv["price"], "net": net, "vat_pct": 20, }] vat = net * 0.20 gross = net + vat pdf_path = out_dir / inv["out"] html = _invoice_html( inv_no=inv["no"], issue=inv["issue"], fulfillment=inv["fulfillment"], due=inv["due"], line_items=line_items, **common, ) _render_html_pdf(pdf_path, html) docx_path = pdf_path.with_suffix(".docx") sections = _invoice_docx_sections( inv_no=inv["no"], dates={"issue": inv["issue"], "fulfillment": inv["fulfillment"], "due": inv["due"]}, parties=common, line_items=line_items, net=net, vat=vat, gross=gross, ) _render_docx(docx_path, sections) _render_png_from_pdf(pdf_path, pdf_path.with_suffix(".png")) def generate_dd_demo() -> None: """DD demo: NDA + service agreement (3 red flags) + amendment.""" out_dir = THIS_DIR / "demo_packages" / "dd_demo" # 1) NDA — clean nda_html = f"""

NON-DISCLOSURE AGREEMENT (NDA)

Parties: GlobalCorp Inc. (tax id: {COMPANIES['GlobalCorp']['tax_id']}, {COMPANIES['GlobalCorp']['address']}) and DataVendor LLC (tax id: {COMPANIES['DataVendor']['tax_id']}, {COMPANIES['DataVendor']['address']}).

Effective date: 2026-03-01    Expiry date: 2028-03-01

1. Confidential information

Technical, business, and financial information mutually shared between the parties.

2. Confidentiality term

2 years after expiry of this agreement.

3. Governing law

State of Delaware, USA.

""" nda_path = out_dir / "gc-dv-nda-2026-0301.pdf" _render_html_pdf(nda_path, nda_html) _render_docx(nda_path.with_suffix(".docx"), [ ("NON-DISCLOSURE AGREEMENT", f"Parties: GlobalCorp Inc. (tax id: {COMPANIES['GlobalCorp']['tax_id']}) and " f"DataVendor LLC (tax id: {COMPANIES['DataVendor']['tax_id']})\n\n" "Effective date: 2026-03-01\nExpiry date: 2028-03-01\n\n" "Confidentiality term: 2 years post-expiry.\nGoverning law: State of Delaware, USA."), ]) _render_png_from_pdf(nda_path, nda_path.with_suffix(".png")) # 2) MSSA with 3 red flags: change-of-control + non-compete + auto-renewal mssa_html = f"""

MASTER SOFTWARE SERVICE AGREEMENT

Parties: DataVendor LLC (tax id: {COMPANIES['DataVendor']['tax_id']}) as Provider, and GlobalCorp Inc. (tax id: {COMPANIES['GlobalCorp']['tax_id']}) as Client.

Effective date: 2026-03-15    Expiry date: 2027-03-15

Monthly fee: $35,000 + 20% VAT (gross $42,000)

1. Scope

Operation of a cloud-based data analytics platform with 99.9% SLA.

2. Change of control

If a 25% or greater ownership change occurs at the Provider, the Client shall be entitled to terminate this agreement with immediate effect, and the Provider shall pay the full annual fee ($420,000) as a contractual penalty.

3. Non-compete

For 5 years after the termination of this agreement the Provider shall not provide similar services to the Client's competitors in the United States territory.

4. Auto-renewal

This agreement automatically renews for an additional 3-year term unless either party provides written notice of non-renewal at least 90 days before expiry.

5. Penalty

For each 1% of SLA shortfall, a $2,000 penalty is due.

""" mssa_path = out_dir / "gc-dv-mssa-2026-0315.pdf" _render_html_pdf(mssa_path, mssa_html) _render_docx(mssa_path.with_suffix(".docx"), [ ("MASTER SOFTWARE SERVICE AGREEMENT", f"Parties: DataVendor LLC (tax id: {COMPANIES['DataVendor']['tax_id']}) and " f"GlobalCorp Inc. (tax id: {COMPANIES['GlobalCorp']['tax_id']})\n\n" "Effective date: 2026-03-15\nExpiry date: 2027-03-15\nMonthly fee: $35,000 + 20% VAT\n\n" "Change of control: 25% ownership change → immediate termination " "+ full annual fee ($420,000) as penalty.\n\n" "Non-compete: 5 years post-termination.\n\n" "Auto-renewal: 3-year extension with 90-day notice.\n\n" "Penalty: $2,000 per 1% SLA shortfall."), ]) _render_png_from_pdf(mssa_path, mssa_path.with_suffix(".png")) # 3) Amendment — extends the term amd_html = f"""

AMENDMENT NO. 1 TO SERVICE AGREEMENT

Original agreement: Master Software Service Agreement dated 2026-03-15 (DataVendor LLC and GlobalCorp Inc.)

Amendment date: 2026-04-15

1. Term extension

The expiry date is amended from 2027-03-15 to 2028-03-15.

2. Monthly fee unchanged

The $35,000 + 20% VAT monthly fee remains in effect for the full extended term.

3. Other terms

The other provisions of the original agreement — including change-of-control, non-compete, and auto-renewal clauses — remain unchanged and in full force.

""" amd_path = out_dir / "gc-dv-mssa-amd1-2026-0415.pdf" _render_html_pdf(amd_path, amd_html) _render_docx(amd_path.with_suffix(".docx"), [ ("AMENDMENT NO. 1 TO SERVICE AGREEMENT", "Original agreement: Master Software Service Agreement dated 2026-03-15\n" "Amendment date: 2026-04-15\n\n" "Term extension: 2027-03-15 → 2028-03-15\n" "Monthly fee unchanged: $35,000 + 20% VAT\n\n" "All other provisions of the original agreement remain unchanged."), ]) _render_png_from_pdf(amd_path, amd_path.with_suffix(".png")) def generate_compliance_demo() -> None: """Compliance demo: 2 contracts — one with full GDPR Article 28, one missing it.""" out_dir = THIS_DIR / "demo_packages" / "compliance_demo" # 1) Contract A: contains the full GDPR Article 28 clause ok_html = f"""

DATA PROCESSING AGREEMENT (DPA)

Parties: MediCare Inc. (tax id: {COMPANIES['MediCare']['tax_id']}) as Controller, and CleanLaw LLP (tax id: {COMPANIES['CleanLaw']['tax_id']}) as Processor.

Effective date: 2026-04-01    Expiry date: 2027-04-01

1. Scope

The Processor processes patient data on behalf of the Controller (health data, a special category under GDPR Article 9).

2. GDPR Article 28 provisions

(a) Subject and duration: The term of this agreement and the duration of the service related to processing of patient data.

(b) Nature and purpose of processing: Patient registry, clinical data storage.

(c) Type of personal data: Name, ID, address, health records.

(d) Categories of data subjects: The Controller's patients.

(e) Controller's rights and duties per GDPR Article 28(3).

(f) Documented instructions are required for any processing.

(g) Confidentiality: All Processor staff are under a confidentiality obligation.

(h) Security measures per GDPR Article 32 (encryption, access control).

(i) Sub-processor engagement only with prior written consent.

(j) Assistance with data-subject rights requests.

(k) Deletion or return of personal data on contract termination.

3. Governing law

EU data protection law (GDPR) and the laws of the State of New York, USA.

""" ok_path = out_dir / "mc-cl-dpa-2026-0401.pdf" _render_html_pdf(ok_path, ok_html) _render_docx(ok_path.with_suffix(".docx"), [ ("DATA PROCESSING AGREEMENT (DPA)", f"Parties: MediCare Inc. (tax id: {COMPANIES['MediCare']['tax_id']}) as Controller, " f"CleanLaw LLP (tax id: {COMPANIES['CleanLaw']['tax_id']}) as Processor\n\n" "Effective: 2026-04-01 — Expiry: 2027-04-01\n\n" "Full GDPR Article 28 clauses:\n" "(a) Subject and duration\n(b) Nature and purpose\n(c) Type of personal data\n" "(d) Categories of data subjects\n(e) Controller's rights\n(f) Documented instructions\n" "(g) Confidentiality\n(h) Security measures (Art. 32)\n(i) Sub-processor consent\n" "(j) Data-subject rights assistance\n(k) Deletion / return of data\n\n" "Governing law: GDPR + State of New York, USA."), ]) _render_png_from_pdf(ok_path, ok_path.with_suffix(".png")) # 2) Contract B: NO GDPR Article 28 clause despite processing PII bad_html = f"""

SERVICE AGREEMENT

Parties: MediCare Inc. (tax id: {COMPANIES['MediCare']['tax_id']}) and DataVendor LLC (tax id: {COMPANIES['DataVendor']['tax_id']}).

Effective date: 2026-04-10    Expiry date: 2027-04-10

Monthly fee: $8,000 + 20% VAT

1. Scope

DataVendor LLC operates a patient registry on behalf of MediCare Inc., including patient names, addresses, social security numbers, and clinical data.

2. Data processing

During the service DataVendor LLC processes personal data (special-category health data) on behalf of the controller.

3. Liability

DataVendor LLC is responsible for the secure handling of the data within its own liability framework.

4. Governing law

State of New York, USA.

""" bad_path = out_dir / "mc-dv-msa-2026-0410.pdf" _render_html_pdf(bad_path, bad_html) _render_docx(bad_path.with_suffix(".docx"), [ ("SERVICE AGREEMENT", f"Parties: MediCare Inc. (tax id: {COMPANIES['MediCare']['tax_id']}) and " f"DataVendor LLC (tax id: {COMPANIES['DataVendor']['tax_id']})\n\n" "Effective: 2026-04-10 — Expiry: 2027-04-10\nMonthly fee: $8,000 + 20% VAT\n\n" "Scope: patient registry (name, SSN, address, clinical data).\n\n" "Data security is the Provider's own responsibility within its general liability framework.\n\n" "Governing law: State of New York, USA."), ]) _render_png_from_pdf(bad_path, bad_path.with_suffix(".png")) # --------------------------------------------------------------------------- # Adversarial — deliberately broken documents # --------------------------------------------------------------------------- def generate_adversarial() -> None: """4 deliberately broken documents to validate detection logic.""" out_dir = THIS_DIR / "adversarial" # 1) Invoice with math error ($760 difference between net+VAT and gross) net_correct = 55_000 vat_correct = 11_000 # 55_000 × 0.20 gross_wrong = 67_500 # net+VAT = 66,000 actually, but invoice says 67,500 inv_html = f"""

INVOICE

Invoice number: ME-2026/0001    Issue date: 2026-03-15    Fulfillment date: 2026-03-10    Payment due: 2026-04-14

Issuer

Alpha Logistics Inc.
Tax ID: {COMPANIES['AcmeSoft']['tax_id']}
Address: 555 Logistics Way, Chicago, IL 60616, USA

Customer

BuilderInc Inc.
Tax ID: {COMPANIES['BuilderInc']['tax_id']}
Address: 1500 Industrial Blvd, Chicago, IL 60616, USA

Line items

DescriptionQtyUnit priceNetVAT
Warehouse rental (monthly)1 mo$15,000.00$15,000.0020%
Logistics services1 mo$28,000.00$28,000.0020%
Shipping cost1 lot$12,000.00$12,000.0020%
Total net{_money(net_correct)}
Total VAT{_money(vat_correct)}
Total gross{_money(gross_wrong)}
""" inv_path = out_dir / "adv-inv-2026-0001.pdf" _render_html_pdf(inv_path, inv_html) _render_docx(inv_path.with_suffix(".docx"), [ ("INVOICE", "Invoice number: ME-2026/0001\nIssue: 2026-03-15\nFulfillment: 2026-03-10\nPayment due: 2026-04-14"), ("Issuer", f"Alpha Logistics Inc.\nTax ID: {COMPANIES['AcmeSoft']['tax_id']}"), ("Customer", f"BuilderInc Inc.\nTax ID: {COMPANIES['BuilderInc']['tax_id']}"), ("Line items", "Warehouse rental -- 1 mo -- $15,000.00 -- 20%\n" "Logistics services -- 1 mo -- $28,000.00 -- 20%\n" "Shipping cost -- 1 lot -- $12,000.00 -- 20%"), ("Totals", f"Net: {_money(net_correct)}\n" f"VAT: {_money(vat_correct)}\n" f"Gross: {_money(gross_wrong)}"), ]) _render_png_from_pdf(inv_path, inv_path.with_suffix(".png")) # 2) Incomplete contract (no termination, no penalty, no expiry date) incomplete_html = f"""

SERVICE AGREEMENT

Contract number: HI-2026-001    Signing date: 2026-02-15

Parties: Gyros Longrun LLC (tax id: {COMPANIES['AcmeSoft']['tax_id']}) and Provider Inc. (tax id: {COMPANIES['DataPharm']['tax_id']}).

1. Scope

Provision of web development services.

2. Compensation

$12,000 monthly + 20% VAT.

""" inc_path = out_dir / "adv-ctr-2026-001.pdf" _render_html_pdf(inc_path, incomplete_html) _render_docx(inc_path.with_suffix(".docx"), [ ("SERVICE AGREEMENT", f"Contract number: HI-2026-001\nSigning date: 2026-02-15\n\n" "Parties: Gyros Longrun LLC and Provider Inc.\n\n" "Scope: Web development.\nMonthly fee: $12,000 + 20% VAT."), ]) _render_png_from_pdf(inc_path, inc_path.with_suffix(".png")) # 3) Bilingual contract (HU + EN) with Incoterms CIP bilingual_html = f"""

SUPPLY AGREEMENT / SZÁLLÍTÁSI SZERZŐDÉS

Contract number: ML-2026-001    Signed: 2026-03-10

Parties / Felek: GlobalCorp Ltd. (UK) and Magyar Industrial Park Inc. (tax id: {COMPANIES['ConstructLLC']['tax_id']}).

Effective / Hatály: 2026-04-01 -- 2027-03-31

Value / Érték: 450,000 EUR

1. Delivery terms / Szállítási feltételek

CIP Budapest (Incoterms 2020). The Supplier bears insurance and shipping cost.

2. Termination / Felmondás

Either party may terminate with 90 days notice. / Bármely fél 90 nappal előre felmondhat.

3. Governing law / Irányadó jog

English law. / Angol jog.

""" bil_path = out_dir / "adv-ctr-2026-002.pdf" _render_html_pdf(bil_path, bilingual_html) _render_docx(bil_path.with_suffix(".docx"), [ ("SUPPLY AGREEMENT / SZÁLLÍTÁSI SZERZŐDÉS", f"Contract number: ML-2026-001\nSigned: 2026-03-10\n\n" f"Parties: GlobalCorp Ltd. and Magyar Industrial Park Inc.\n\n" "Effective: 2026-04-01 -- 2027-03-31\nValue: 450,000 EUR\n\n" "Delivery: CIP Budapest (Incoterms 2020).\n" "Termination: 90 days notice.\nGoverning law: English law."), ]) _render_png_from_pdf(bil_path, bil_path.with_suffix(".png")) # 4) Contract with date-logic errors (signing after start, expiry before start) illogical_html = f"""

WORK AGREEMENT

Contract number: ED-2026-001    Signing date: 2026-02-15

Parties: Spring Autoservice Inc. (tax id: {COMPANIES['TechSupply']['tax_id']}) and Customer Corp. (tax id: {COMPANIES['AcmeBuy']['tax_id']}).

Effective date: 2026-01-01 (back-dated)

Expiry date: 2025-12-15

Fulfillment date: 2025-12-15

Payment due: 2026-02-01 (before signing!)

1. Scope

Vehicle maintenance services.

2. Fee

$8,000 + 20% VAT.

""" ill_path = out_dir / "adv-ctr-2026-003.pdf" _render_html_pdf(ill_path, illogical_html) _render_docx(ill_path.with_suffix(".docx"), [ ("WORK AGREEMENT", "Contract number: ED-2026-001\nSigning date: 2026-02-15\n\n" "Parties: Spring Autoservice Inc. and Customer Corp.\n\n" "Effective date: 2026-01-01 (back-dated)\nExpiry date: 2025-12-15 (BEFORE start!)\n" "Fulfillment: 2025-12-15\nPayment due: 2026-02-01 (BEFORE signing!)\n\n" "Scope: Vehicle maintenance.\nFee: $8,000 + 20% VAT."), ]) _render_png_from_pdf(ill_path, ill_path.with_suffix(".png")) # --------------------------------------------------------------------------- # Financial reports (multilingual demo) # --------------------------------------------------------------------------- def generate_financial_reports() -> None: """1 EN income statement (US-GAAP) + 1 EN cash flow (IFRS, multilingual demo).""" out_dir = THIS_DIR / "financial_reports" # 1) Income statement (US-GAAP) is_html = f"""

INCOME STATEMENT

Company: FutureTech Inc. (tax id: {COMPANIES['AcmeSoft']['tax_id']})

Period: 2025-01-01 to 2025-12-31 (audited)

Standard: US-GAAP

Currency: USD (thousands)

Revenue

Item2025 (kUSD)2024 (kUSD)
Net sales revenue485,000412,000
Other income12,5008,700
Total revenue497,500420,700

Costs

Cost of goods sold187,200165,100
Personnel costs154,800132,400
Depreciation28,60031,200
Operating costs total370,600328,700

Operating income (EBIT): 126,900 kUSD (2024: 92,000, +37.9%)

Pretax income: 122,400 kUSD

""" is_path = out_dir / "fin-en-is-2025.pdf" _render_html_pdf(is_path, is_html) _render_docx(is_path.with_suffix(".docx"), [ ("INCOME STATEMENT", "Company: FutureTech Inc.\nPeriod: 2025-01-01 to 2025-12-31\nStandard: US-GAAP\n\n" "Total revenue 2025: 497,500 kUSD (2024: 420,700 kUSD, +18.3%)\n" "Operating costs 2025: 370,600 kUSD (2024: 328,700 kUSD)\n" "EBIT 2025: 126,900 kUSD (2024: 92,000, +37.9%)\n" "Pretax income: 122,400 kUSD"), ]) _render_png_from_pdf(is_path, is_path.with_suffix(".png")) # 2) Cash flow (IFRS, Alpine Biotech AG) cf_html = """

CASH FLOW STATEMENT

Company: Alpine Biotech AG (Switzerland)

Period: 2025-01-01 to 2025-12-31 (audited)

Standard: IFRS (International Financial Reporting Standards)

Currency: CHF (thousands)

Operating activities

Item2025 (kCHF)
Net income42,800
Depreciation & amortization18,200
Working capital changes-3,400
Cash from operating57,600

Investing activities

Production line CapEx-67,400
R&D investments-12,100
Cash from investing-79,500

Financing activities

Bond issuance (5y, 4.2%)35,000
Dividend paid-8,200
Cash from financing26,800

Net change in cash: 4,900 kCHF

""" cf_path = out_dir / "fin-en-cf-2025.pdf" _render_html_pdf(cf_path, cf_html) _render_docx(cf_path.with_suffix(".docx"), [ ("CASH FLOW STATEMENT", "Company: Alpine Biotech AG\nPeriod: 2025-01-01 to 2025-12-31\nStandard: IFRS\nCurrency: CHF\n\n" "Operating: +57,600 kCHF\n" "Investing: -79,500 kCHF (Production CapEx -67,400, R&D -12,100)\n" "Financing: +26,800 kCHF (Bond 35,000, Dividend -8,200)\n" "Net change in cash: +4,900 kCHF"), ]) _render_png_from_pdf(cf_path, cf_path.with_suffix(".png")) # --------------------------------------------------------------------------- # Cleanup # --------------------------------------------------------------------------- def _cleanup_test_data_dirs() -> None: """Clear stale generated files before regeneration.""" target_dirs = [ THIS_DIR / "invoices", THIS_DIR / "contracts", THIS_DIR / "multi_doc", THIS_DIR / "adversarial", THIS_DIR / "financial_reports", THIS_DIR / "demo_packages" / "audit_demo", THIS_DIR / "demo_packages" / "dd_demo", THIS_DIR / "demo_packages" / "compliance_demo", ] for d in target_dirs: d.mkdir(parents=True, exist_ok=True) deleted = 0 for d in target_dirs: if not d.exists(): continue for ext in (".pdf", ".docx", ".png"): for f in d.glob(f"*{ext}"): f.unlink() deleted += 1 if deleted > 0: print(f" Cleanup: {deleted} stale files removed") def main() -> None: print("Generating sample data...") _cleanup_test_data_dirs() generate_invoices() print(" 3 EN invoices (audit pattern: March 50% pricier)") generate_intra_eu_invoice() print(" 1 EN intra-EU invoice (0% VAT, reverse charge)") generate_de_rechnung() print(" 1 DE Rechnung (19% MwSt, multilingual demo)") generate_contracts() print(" 4 contracts (NDA + MSSA + IT framework + DE→HU lease)") generate_multi_doc_triplet() print(" 3 multi_doc (PO + delivery_note + invoice with quantity discrepancy)") generate_audit_demo() print(" Audit demo package (3 invoices, 50% price increase)") generate_dd_demo() print(" DD demo package (NDA + MSSA + amendment)") generate_compliance_demo() print(" Compliance demo package (2 contracts, GDPR asymmetry)") generate_adversarial() print(" 4 adversarial documents (math error, incomplete, bilingual, date errors)") generate_financial_reports() print(" 2 financial reports (US-GAAP IS + IFRS CF)") pdf_count = sum(1 for _ in THIS_DIR.rglob("*.pdf")) docx_count = sum(1 for _ in THIS_DIR.rglob("*.docx")) png_count = sum(1 for _ in THIS_DIR.rglob("*.png")) print(f"\nTotal: {pdf_count} PDF, {docx_count} DOCX, {png_count} PNG") if __name__ == "__main__": main()