paperhawk / tests /conftest.py
Nándorfi Vince
Initial paperhawk push to HF Space (LFS for binaries)
7ff7119
raw
history blame
2.14 kB
"""Pytest fixtures — used across the whole test suite."""
from __future__ import annotations
import pytest
@pytest.fixture
def sample_pdf_bytes() -> bytes:
"""Bytes for a minimal English invoice PDF (PyMuPDF-rendered).
The full ``test_data/generate_samples.py`` produces much richer files; this
fixture exists for ingest-level unit tests so they don't depend on the
full ``test_data/`` regeneration.
"""
import fitz
doc = fitz.open()
page = doc.new_page(width=595, height=842) # A4
text = (
"INVOICE\n\n"
"Invoice number: 2026/001\n"
"Issue date: 2026-01-31\n\n"
"Issuer: AcmeSoft Inc.\n"
"Tax ID: 12-3456789\n\n"
"Customer: BudaData LLC\n"
"Tax ID: 98-7654321\n\n"
"Line items:\n"
"Software development services 40 hours $500.00 $20,000.00\n\n"
"Total net: $20,000.00\n"
"Total VAT: $4,000.00 (20%)\n"
"Total gross: $24,000.00\n"
)
page.insert_text((50, 50), text, fontsize=11)
pdf_bytes = doc.tobytes()
doc.close()
return pdf_bytes
@pytest.fixture
def sample_docx_bytes() -> bytes:
"""Bytes for a minimal English contract DOCX."""
import io
import docx
doc = docx.Document()
doc.add_heading("Non-Disclosure Agreement", level=1)
doc.add_paragraph(
"Parties: SmartSensors Inc. (tax id: 13-5792468) and "
"InfoTech Ltd. (tax id: 86-4201357)"
)
doc.add_paragraph("Effective date: 2026-01-15")
doc.add_paragraph("Expiry date: 2027-01-15")
doc.add_paragraph(
"Penalty: A breach of this confidentiality obligation triggers a $50,000 penalty per incident."
)
buf = io.BytesIO()
doc.save(buf)
return buf.getvalue()
@pytest.fixture
def sample_png_bytes() -> bytes:
"""Bytes for a minimal PNG (white background + caption)."""
import io
from PIL import Image, ImageDraw
img = Image.new("RGB", (800, 600), "white")
d = ImageDraw.Draw(img)
d.text((50, 50), "Invoice test PNG", fill="black")
buf = io.BytesIO()
img.save(buf, format="PNG")
return buf.getvalue()