| """Pytest fixtures — used across the whole test suite.""" |
|
|
| from __future__ import annotations |
|
|
| import pytest |
|
|
|
|
| @pytest.fixture |
| def sample_pdf_bytes() -> bytes: |
| """Bytes for a minimal English invoice PDF (PyMuPDF-rendered). |
| |
| The full ``test_data/generate_samples.py`` produces much richer files; this |
| fixture exists for ingest-level unit tests so they don't depend on the |
| full ``test_data/`` regeneration. |
| """ |
| import fitz |
|
|
| doc = fitz.open() |
| page = doc.new_page(width=595, height=842) |
| text = ( |
| "INVOICE\n\n" |
| "Invoice number: 2026/001\n" |
| "Issue date: 2026-01-31\n\n" |
| "Issuer: AcmeSoft Inc.\n" |
| "Tax ID: 12-3456789\n\n" |
| "Customer: BudaData LLC\n" |
| "Tax ID: 98-7654321\n\n" |
| "Line items:\n" |
| "Software development services 40 hours $500.00 $20,000.00\n\n" |
| "Total net: $20,000.00\n" |
| "Total VAT: $4,000.00 (20%)\n" |
| "Total gross: $24,000.00\n" |
| ) |
| page.insert_text((50, 50), text, fontsize=11) |
| pdf_bytes = doc.tobytes() |
| doc.close() |
| return pdf_bytes |
|
|
|
|
| @pytest.fixture |
| def sample_docx_bytes() -> bytes: |
| """Bytes for a minimal English contract DOCX.""" |
| import io |
|
|
| import docx |
|
|
| doc = docx.Document() |
| doc.add_heading("Non-Disclosure Agreement", level=1) |
| doc.add_paragraph( |
| "Parties: SmartSensors Inc. (tax id: 13-5792468) and " |
| "InfoTech Ltd. (tax id: 86-4201357)" |
| ) |
| doc.add_paragraph("Effective date: 2026-01-15") |
| doc.add_paragraph("Expiry date: 2027-01-15") |
| doc.add_paragraph( |
| "Penalty: A breach of this confidentiality obligation triggers a $50,000 penalty per incident." |
| ) |
|
|
| buf = io.BytesIO() |
| doc.save(buf) |
| return buf.getvalue() |
|
|
|
|
| @pytest.fixture |
| def sample_png_bytes() -> bytes: |
| """Bytes for a minimal PNG (white background + caption).""" |
| import io |
|
|
| from PIL import Image, ImageDraw |
|
|
| img = Image.new("RGB", (800, 600), "white") |
| d = ImageDraw.Draw(img) |
| d.text((50, 50), "Invoice test PNG", fill="black") |
|
|
| buf = io.BytesIO() |
| img.save(buf, format="PNG") |
| return buf.getvalue() |
|
|