Spaces:

lablab-ai-amd-developer-hackathon
/

paperhawk

Running

paperhawk / tests /integration /test_pipeline_smoke.py

Nándorfi Vince

Initial paperhawk push to HF Space (LFS for binaries)

7ff7119 4 days ago

4.69 kB

	"""pipeline_graph end-to-end smoke test (dummy LLM mode).

	Walks one PDF through ingest → classify → extract → rag-index → quote-validate
	→ compare → risk → report. Verifies that:
	* the documents list is populated
	* the risks list contains at least a basic or domain rule finding
	* report.performance.speedup > 1.0 (real speedup vs the manual estimate)
	"""

	from __future__ import annotations

	import pytest

	from store import HybridStore


	@pytest.mark.integration
	@pytest.mark.asyncio
	async def test_pipeline_e2e_single_invoice(sample_pdf_bytes, tmp_path):
	from graph.pipeline_graph import build_pipeline_graph

	store = HybridStore(
	chroma_path=str(tmp_path / "chroma"),
	collection_name="test_pipeline_invoice",
	)
	graph = build_pipeline_graph(store)

	state = await graph.ainvoke({
	"files": [("invoice_january.pdf", sample_pdf_bytes)],
	})

	documents = state.get("documents") or []
	assert len(documents) == 1, "Single uploaded PDF → 1 ProcessedDocument"

	pd = documents[0]
	assert pd.ingested is not None
	assert pd.classification is not None
	assert pd.classification.doc_type == "invoice"
	assert pd.extracted is not None
	assert pd.extracted.raw.get("invoice_number") == "2026/001"

	# RAG indexed
	assert pd.rag_chunks_indexed >= 1
	assert store.chunk_count >= 1

	# Risks
	risks = state.get("risks") or []
	# ISA 500 evidence score is UI-only (not in risks). Materiality (ISA 320)
	# is an info-level risk that lands in the list.
	assert any(r.source_check_id == "check_07_materiality" for r in risks)

	# Report
	report = state.get("report")
	assert report is not None
	assert report["document_count"] == 1
	assert report["performance"]["documents"] == 1
	assert report["performance"]["manual_estimate_minutes"] > 0
	# Speedup > 1 (8 minutes manual → < 8*60 sec automated)
	assert report["performance"]["speedup"] > 1.0


	@pytest.mark.integration
	@pytest.mark.asyncio
	async def test_pipeline_three_doc_compare(sample_pdf_bytes, tmp_path):
	"""3 docs (invoice + delivery_note + purchase_order) → three-way matching."""
	from graph.pipeline_graph import build_pipeline_graph

	# Same PDF reused 3× with different filenames + classifier picks via name prefix
	store = HybridStore(
	chroma_path=str(tmp_path / "chroma_three"),
	collection_name="test_three_way",
	)
	graph = build_pipeline_graph(store)

	state = await graph.ainvoke({
	"files": [
	("invoice_construction.pdf", sample_pdf_bytes),
	("delivery_note_construction.pdf", sample_pdf_bytes),
	("purchase_order_construction.pdf", sample_pdf_bytes),
	],
	})

	documents = state.get("documents") or []
	assert len(documents) == 3

	# Classifier splits types based on filename prefixes
	types = {d.classification.doc_type for d in documents if d.classification}
	assert "invoice" in types
	assert "delivery_note" in types
	assert "purchase_order" in types


	@pytest.mark.integration
	@pytest.mark.asyncio
	async def test_risk_subgraph_runs_on_minimal_input(tmp_path):
	"""The risk subgraph runs end-to-end on minimal extracted data without crashing."""
	from datetime import datetime

	from graph.states.pipeline_state import (
	Classification,
	ExtractedData,
	IngestedDocument,
	PageContent,
	ProcessedDocument,
	)
	from subgraphs.risk_subgraph import build_risk_subgraph

	ingested = IngestedDocument(
	file_name="incomplete_invoice.pdf",
	file_type="pdf",
	pages=[PageContent(page_number=1, text="Incomplete invoice — partial text only")],
	full_text="Incomplete invoice — partial text only",
	)
	classification = Classification(
	doc_type="invoice",
	doc_type_display="Invoice",
	confidence=0.5,
	language="en",
	used_vision=False,
	)
	extracted = ExtractedData(
	raw={"_quotes": [], "_confidence": {}},
	_quotes=[],
	_confidence={},
	)
	pd = ProcessedDocument(
	ingested=ingested,
	classification=classification,
	extracted=extracted,
	)

	risk_graph = build_risk_subgraph()
	state_in = {
	"documents": [pd],
	"risks": [],
	"started_at": datetime.now(),
	"processing_seconds": 0.0,
	}
	out = await risk_graph.ainvoke(state_in)
	risks = out.get("risks") or []
	# Subgraph runs without error; risks may or may not include items
	# depending on the dummy classifier path. We just assert it returned a list.
	assert isinstance(risks, list)