File size: 2,141 Bytes
7ff7119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""Pytest fixtures — used across the whole test suite."""

from __future__ import annotations

import pytest


@pytest.fixture
def sample_pdf_bytes() -> bytes:
    """Bytes for a minimal English invoice PDF (PyMuPDF-rendered).

    The full ``test_data/generate_samples.py`` produces much richer files; this
    fixture exists for ingest-level unit tests so they don't depend on the
    full ``test_data/`` regeneration.
    """
    import fitz

    doc = fitz.open()
    page = doc.new_page(width=595, height=842)  # A4
    text = (
        "INVOICE\n\n"
        "Invoice number: 2026/001\n"
        "Issue date: 2026-01-31\n\n"
        "Issuer: AcmeSoft Inc.\n"
        "Tax ID: 12-3456789\n\n"
        "Customer: BudaData LLC\n"
        "Tax ID: 98-7654321\n\n"
        "Line items:\n"
        "Software development services   40 hours   $500.00   $20,000.00\n\n"
        "Total net: $20,000.00\n"
        "Total VAT: $4,000.00 (20%)\n"
        "Total gross: $24,000.00\n"
    )
    page.insert_text((50, 50), text, fontsize=11)
    pdf_bytes = doc.tobytes()
    doc.close()
    return pdf_bytes


@pytest.fixture
def sample_docx_bytes() -> bytes:
    """Bytes for a minimal English contract DOCX."""
    import io

    import docx

    doc = docx.Document()
    doc.add_heading("Non-Disclosure Agreement", level=1)
    doc.add_paragraph(
        "Parties: SmartSensors Inc. (tax id: 13-5792468) and "
        "InfoTech Ltd. (tax id: 86-4201357)"
    )
    doc.add_paragraph("Effective date: 2026-01-15")
    doc.add_paragraph("Expiry date: 2027-01-15")
    doc.add_paragraph(
        "Penalty: A breach of this confidentiality obligation triggers a $50,000 penalty per incident."
    )

    buf = io.BytesIO()
    doc.save(buf)
    return buf.getvalue()


@pytest.fixture
def sample_png_bytes() -> bytes:
    """Bytes for a minimal PNG (white background + caption)."""
    import io

    from PIL import Image, ImageDraw

    img = Image.new("RGB", (800, 600), "white")
    d = ImageDraw.Draw(img)
    d.text((50, 50), "Invoice test PNG", fill="black")

    buf = io.BytesIO()
    img.save(buf, format="PNG")
    return buf.getvalue()