paperhawk / tests /integration /test_pipeline_smoke.py
Nándorfi Vince
Initial paperhawk push to HF Space (LFS for binaries)
7ff7119
raw
history blame
4.69 kB
"""pipeline_graph end-to-end smoke test (dummy LLM mode).
Walks one PDF through ingest → classify → extract → rag-index → quote-validate
→ compare → risk → report. Verifies that:
* the documents list is populated
* the risks list contains at least a basic or domain rule finding
* report.performance.speedup > 1.0 (real speedup vs the manual estimate)
"""
from __future__ import annotations
import pytest
from store import HybridStore
@pytest.mark.integration
@pytest.mark.asyncio
async def test_pipeline_e2e_single_invoice(sample_pdf_bytes, tmp_path):
from graph.pipeline_graph import build_pipeline_graph
store = HybridStore(
chroma_path=str(tmp_path / "chroma"),
collection_name="test_pipeline_invoice",
)
graph = build_pipeline_graph(store)
state = await graph.ainvoke({
"files": [("invoice_january.pdf", sample_pdf_bytes)],
})
documents = state.get("documents") or []
assert len(documents) == 1, "Single uploaded PDF → 1 ProcessedDocument"
pd = documents[0]
assert pd.ingested is not None
assert pd.classification is not None
assert pd.classification.doc_type == "invoice"
assert pd.extracted is not None
assert pd.extracted.raw.get("invoice_number") == "2026/001"
# RAG indexed
assert pd.rag_chunks_indexed >= 1
assert store.chunk_count >= 1
# Risks
risks = state.get("risks") or []
# ISA 500 evidence score is UI-only (not in risks). Materiality (ISA 320)
# is an info-level risk that lands in the list.
assert any(r.source_check_id == "check_07_materiality" for r in risks)
# Report
report = state.get("report")
assert report is not None
assert report["document_count"] == 1
assert report["performance"]["documents"] == 1
assert report["performance"]["manual_estimate_minutes"] > 0
# Speedup > 1 (8 minutes manual → < 8*60 sec automated)
assert report["performance"]["speedup"] > 1.0
@pytest.mark.integration
@pytest.mark.asyncio
async def test_pipeline_three_doc_compare(sample_pdf_bytes, tmp_path):
"""3 docs (invoice + delivery_note + purchase_order) → three-way matching."""
from graph.pipeline_graph import build_pipeline_graph
# Same PDF reused 3× with different filenames + classifier picks via name prefix
store = HybridStore(
chroma_path=str(tmp_path / "chroma_three"),
collection_name="test_three_way",
)
graph = build_pipeline_graph(store)
state = await graph.ainvoke({
"files": [
("invoice_construction.pdf", sample_pdf_bytes),
("delivery_note_construction.pdf", sample_pdf_bytes),
("purchase_order_construction.pdf", sample_pdf_bytes),
],
})
documents = state.get("documents") or []
assert len(documents) == 3
# Classifier splits types based on filename prefixes
types = {d.classification.doc_type for d in documents if d.classification}
assert "invoice" in types
assert "delivery_note" in types
assert "purchase_order" in types
@pytest.mark.integration
@pytest.mark.asyncio
async def test_risk_subgraph_runs_on_minimal_input(tmp_path):
"""The risk subgraph runs end-to-end on minimal extracted data without crashing."""
from datetime import datetime
from graph.states.pipeline_state import (
Classification,
ExtractedData,
IngestedDocument,
PageContent,
ProcessedDocument,
)
from subgraphs.risk_subgraph import build_risk_subgraph
ingested = IngestedDocument(
file_name="incomplete_invoice.pdf",
file_type="pdf",
pages=[PageContent(page_number=1, text="Incomplete invoice — partial text only")],
full_text="Incomplete invoice — partial text only",
)
classification = Classification(
doc_type="invoice",
doc_type_display="Invoice",
confidence=0.5,
language="en",
used_vision=False,
)
extracted = ExtractedData(
raw={"_quotes": [], "_confidence": {}},
_quotes=[],
_confidence={},
)
pd = ProcessedDocument(
ingested=ingested,
classification=classification,
extracted=extracted,
)
risk_graph = build_risk_subgraph()
state_in = {
"documents": [pd],
"risks": [],
"started_at": datetime.now(),
"processing_seconds": 0.0,
}
out = await risk_graph.ainvoke(state_in)
risks = out.get("risks") or []
# Subgraph runs without error; risks may or may not include items
# depending on the dummy classifier path. We just assert it returned a list.
assert isinstance(risks, list)