paperhawk / tests /e2e_api /test_e2e_paritas.py
Nándorfi Vince
Initial paperhawk push to HF Space (LFS for binaries)
7ff7119
raw
history blame
27.4 kB
"""10-csoportos E2E API paritás-teszt — a `prototype-agentic/test_e2e.py` ekvivalense.
Közvetlenül a `pipeline_graph.ainvoke()`-ot hívja (UI nélkül). Per-csoport JSON
output a `tests/e2e_api/results/` mappában, plusz determinisztikus assertek és
Claude AI-validáció.
Csoportok:
01 -- Egyedi számlák (5 fájl: 3 HU + 1 EN intra-EU + 1 DE)
02 -- Egyedi szerződések (4 fájl: NDA + MSSA + IT support + leasing)
03 -- Pénzügyi kimutatások (2 fájl: HU eredménykim + EN cash flow IFRS)
04 -- Multi-doc three-way matching (3 PDF: PO + DN + INV)
05 -- Adversarial egyenként (4 fájl: math/incomplete/bilingual/date)
06 -- Adversarial kombinált (mind a 4 együtt)
07 -- Audit demo (3 számla, +50% árnövekedés)
08 -- DD demo (NDA + MSSA + amendment, 3 piros zászló)
09 -- Compliance demo (2 szerz, GDPR-aszimmetria)
10 -- 14 chat kérdés (8 multi_doc + 3 audit + 3 compliance)
Futtatás:
pytest tests/e2e_api/ -v -s
Idő: ~10-15 perc Claude Haiku-val.
"""
from __future__ import annotations
import asyncio
import json
import time
import traceback
from dataclasses import asdict, dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any
import pytest
from tests.e2e_api.conftest import RESULTS_DIR, TEST_DATA
from tests.e2e_api.expected_findings import CHAT_SCENARIOS, EXPECTED_FINDINGS
# ---------------------------------------------------------------------------
# Lazy import — csak akkor töltünk graph-ot ha a teszt valóban fut
# ---------------------------------------------------------------------------
def _build_pipeline():
from graph.pipeline_graph import build_pipeline_graph
from providers import get_chat_model
from store import HybridStore
store = HybridStore()
llm = get_chat_model()
graph = build_pipeline_graph(store, llm=llm)
return graph, store, llm
def _build_package_insights():
from graph.package_insights_graph import build_package_insights_graph
from providers import get_chat_model
return build_package_insights_graph(llm=get_chat_model())
def _build_dd():
from graph.dd_graph import build_dd_graph
from providers import get_chat_model
return build_dd_graph(llm=get_chat_model())
# ---------------------------------------------------------------------------
# Helper dataclass + serializálás
# ---------------------------------------------------------------------------
@dataclass
class ParitasResult:
test_name: str
files: list[str]
timestamp: str
pipeline_seconds: float
document_count: int
risk_count: int
risks: list[dict] = field(default_factory=list)
classifications: list[dict] = field(default_factory=list)
extracted: list[dict] = field(default_factory=list)
comparison: dict | None = None
package_insights: dict | None = None
dd_report: dict | None = None
chat_responses: list[dict] = field(default_factory=list)
errors: list[str] = field(default_factory=list)
paritas_assertions: list[dict] = field(default_factory=list)
def to_dict(self) -> dict:
return asdict(self)
def _load_files(file_paths: list[Path]) -> list[tuple[str, bytes]]:
return [(p.name, p.read_bytes()) for p in file_paths if p.exists()]
def _serialize_pipeline_state(state: dict) -> dict:
out: dict[str, Any] = {}
docs = state.get("documents") or []
out["document_count"] = len(docs)
out["classifications"] = [
{
"file_name": d.ingested.file_name if d.ingested else None,
"doc_type": d.classification.doc_type if d.classification else None,
"doc_type_display": d.classification.doc_type_display if d.classification else None,
"confidence": d.classification.confidence if d.classification else None,
"language": d.classification.language if d.classification else None,
}
for d in docs
]
out["extracted"] = [
{
"file_name": d.ingested.file_name if d.ingested else None,
"raw": d.extracted.raw if d.extracted else None,
}
for d in docs
]
risks = state.get("risks") or []
out["risks"] = [
{
"leiras": r.leiras,
"sulyossag": r.sulyossag,
"indoklas": r.indoklas,
"tipus": r.tipus,
"jogszabaly": r.jogszabaly,
"erinto_dokumentum": r.erinto_dokumentum,
}
for r in risks
]
out["risk_count"] = len(risks)
comp = state.get("comparison")
if comp is not None:
out["comparison"] = comp.model_dump() if hasattr(comp, "model_dump") else dict(comp)
pkg = state.get("package_insights")
if pkg is not None:
out["package_insights"] = pkg.model_dump() if hasattr(pkg, "model_dump") else dict(pkg)
dd = state.get("dd_report")
if dd is not None:
out["dd_report"] = dd.model_dump() if hasattr(dd, "model_dump") else dict(dd)
out["pipeline_seconds"] = state.get("processing_seconds", 0.0)
return out
def _save_result(name: str, data: ParitasResult | dict) -> None:
payload = data.to_dict() if isinstance(data, ParitasResult) else data
out_path = RESULTS_DIR / f"{name}.json"
out_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2, default=str), encoding="utf-8")
# ---------------------------------------------------------------------------
# Determinisztikus paritás-checkek
# ---------------------------------------------------------------------------
def _flatten_state_text(serialized: dict) -> str:
parts: list[str] = []
for r in serialized.get("risks", []):
parts.append(r.get("leiras", ""))
parts.append(r.get("indoklas", ""))
comp = serialized.get("comparison") or {}
if isinstance(comp, dict):
for m in comp.get("matches", []):
if isinstance(m, dict):
parts.append(str(m.get("message", "")))
parts.append(str(m.get("field", "")))
pkg = serialized.get("package_insights") or {}
if isinstance(pkg, dict):
parts.append(str(pkg.get("executive_summary", "")))
for f in pkg.get("findings", []) or []:
if isinstance(f, dict):
parts.append(str(f.get("leiras", "")))
parts.append(str(f.get("indoklas", "")))
for o in pkg.get("key_observations", []) or []:
parts.append(str(o))
dd = serialized.get("dd_report") or {}
if isinstance(dd, dict):
parts.append(str(dd.get("executive_summary", "")))
for flag in dd.get("top_red_flags", []) or []:
parts.append(str(flag))
return " ".join(parts).lower()
def _check_must_contain(text: str, expected: dict) -> list[dict]:
results = []
for kw in expected.get("must_contain_keywords", []):
results.append({
"type": "must_contain_keyword",
"keyword": kw,
"passed": kw.lower() in text,
})
if expected.get("must_contain_any_of"):
keywords = expected["must_contain_any_of"]
any_passed = any(kw.lower() in text for kw in keywords)
results.append({
"type": "must_contain_any_of",
"keywords": keywords,
"passed": any_passed,
})
for kw in expected.get("must_not_contain", []):
results.append({
"type": "must_not_contain",
"keyword": kw,
"passed": kw.lower() not in text,
})
return results
def _check_risk_count(serialized: dict, expected: dict) -> list[dict]:
actual = serialized.get("risk_count", 0)
results = []
if "expected_risks_min" in expected:
results.append({
"type": "risk_count_min",
"min": expected["expected_risks_min"],
"actual": actual,
"passed": actual >= expected["expected_risks_min"],
})
if "expected_risks_max" in expected:
results.append({
"type": "risk_count_max",
"max": expected["expected_risks_max"],
"actual": actual,
"passed": actual <= expected["expected_risks_max"],
})
return results
def _check_severity(serialized: dict, expected: dict) -> list[dict]:
if "expected_severity_max" not in expected:
return []
sev_order = {"info": 0, "alacsony": 1, "kozepes": 2, "magas": 3}
max_allowed = sev_order.get(expected["expected_severity_max"], 3)
actual_max = 0
actual_max_label = "info"
for r in serialized.get("risks", []):
s = r.get("sulyossag") or "info"
if sev_order.get(s, 0) > actual_max:
actual_max = sev_order.get(s, 0)
actual_max_label = s
return [{
"type": "severity_max",
"max_allowed": expected["expected_severity_max"],
"actual_max": actual_max_label,
"passed": actual_max <= max_allowed,
}]
def _check_doc_type(serialized: dict, expected: dict) -> list[dict]:
results = []
if "expected_doc_type" in expected:
actual = (serialized.get("classifications") or [{}])[0].get("doc_type")
results.append({
"type": "doc_type",
"expected": expected["expected_doc_type"],
"actual": actual,
"passed": actual == expected["expected_doc_type"],
})
if "expected_doc_types" in expected:
actual_types = sorted(c.get("doc_type") for c in serialized.get("classifications", []))
expected_types = sorted(expected["expected_doc_types"])
results.append({
"type": "doc_types_set",
"expected": expected_types,
"actual": actual_types,
"passed": actual_types == expected_types,
})
if "expected_doc_types_all" in expected:
target = expected["expected_doc_types_all"]
all_match = all(
c.get("doc_type") == target
for c in serialized.get("classifications", [])
)
results.append({
"type": "doc_types_all",
"expected_all": target,
"passed": all_match,
})
return results
def _evaluate_paritas(serialized: dict, expected: dict) -> tuple[list[dict], bool]:
assertions = []
assertions.extend(_check_doc_type(serialized, expected))
assertions.extend(_check_risk_count(serialized, expected))
assertions.extend(_check_severity(serialized, expected))
text = _flatten_state_text(serialized)
assertions.extend(_check_must_contain(text, expected))
overall_pass = all(a.get("passed", False) for a in assertions)
return assertions, overall_pass
def _run_pipeline_for_files(files: list[Path]) -> tuple[dict, dict, Any]:
graph, store, llm = _build_pipeline()
file_tuples = _load_files(files)
state = asyncio.run(graph.ainvoke({"files": file_tuples}))
return _serialize_pipeline_state(state), state, store
# ===========================================================================
# 01. Egyedi számlák — 5 fájl
# ===========================================================================
@pytest.mark.e2e_paritas
@pytest.mark.parametrize("file_rel", [
"szamlak/bs-2026-001.pdf",
"szamlak/bs-2026-002.pdf",
"szamlak/bs-2026-003.pdf",
"szamlak/nl-inv-2026-0001.pdf",
"szamlak/bk-r-2026-0001.pdf",
])
def test_01_single_invoices(file_rel):
expected = EXPECTED_FINDINGS[file_rel]
pdf = TEST_DATA / file_rel
assert pdf.exists(), f"Hiányzik: {pdf}"
t0 = time.time()
serialized, _, _ = _run_pipeline_for_files([pdf])
elapsed = time.time() - t0
assertions, overall = _evaluate_paritas(serialized, expected)
result = ParitasResult(
test_name=f"01_single_{pdf.stem}",
files=[file_rel],
timestamp=datetime.now().isoformat(),
pipeline_seconds=elapsed,
document_count=serialized["document_count"],
risk_count=serialized["risk_count"],
risks=serialized["risks"],
classifications=serialized["classifications"],
extracted=serialized["extracted"],
paritas_assertions=assertions,
)
_save_result(f"01_single_{pdf.stem}", result)
assert overall, f"Paritás FAIL: {[a for a in assertions if not a.get('passed')]}"
# ===========================================================================
# 02. Egyedi szerződések — 4 fájl
# ===========================================================================
@pytest.mark.e2e_paritas
@pytest.mark.parametrize("file_rel", [
"szerzodesek/bl-nt-nda-2026.pdf",
"szerzodesek/pt-dp-mssa-2026.pdf",
"szerzodesek/mbk-it-fa-2026.pdf",
"szerzodesek/df-lc-2026.pdf",
])
def test_02_single_contracts(file_rel):
expected = EXPECTED_FINDINGS[file_rel]
pdf = TEST_DATA / file_rel
assert pdf.exists()
t0 = time.time()
serialized, _, _ = _run_pipeline_for_files([pdf])
elapsed = time.time() - t0
assertions, overall = _evaluate_paritas(serialized, expected)
result = ParitasResult(
test_name=f"02_contract_{pdf.stem}",
files=[file_rel],
timestamp=datetime.now().isoformat(),
pipeline_seconds=elapsed,
document_count=serialized["document_count"],
risk_count=serialized["risk_count"],
risks=serialized["risks"],
classifications=serialized["classifications"],
extracted=serialized["extracted"],
paritas_assertions=assertions,
)
_save_result(f"02_contract_{pdf.stem}", result)
assert overall, f"Paritás FAIL: {[a for a in assertions if not a.get('passed')]}"
# ===========================================================================
# 03. Pénzügyi kimutatások — 2 fájl
# ===========================================================================
@pytest.mark.e2e_paritas
@pytest.mark.parametrize("file_rel", [
"penzugyi_riportok/fin-hu-er-2025.pdf",
"penzugyi_riportok/fin-en-cf-2025.pdf",
])
def test_03_financial_reports(file_rel):
expected = EXPECTED_FINDINGS[file_rel]
pdf = TEST_DATA / file_rel
assert pdf.exists()
t0 = time.time()
serialized, _, _ = _run_pipeline_for_files([pdf])
elapsed = time.time() - t0
assertions, overall = _evaluate_paritas(serialized, expected)
result = ParitasResult(
test_name=f"03_financial_{pdf.stem}",
files=[file_rel],
timestamp=datetime.now().isoformat(),
pipeline_seconds=elapsed,
document_count=serialized["document_count"],
risk_count=serialized["risk_count"],
risks=serialized["risks"],
classifications=serialized["classifications"],
extracted=serialized["extracted"],
paritas_assertions=assertions,
)
_save_result(f"03_financial_{pdf.stem}", result)
assert overall, f"Paritás FAIL: {[a for a in assertions if not a.get('passed')]}"
# ===========================================================================
# 04. Multi-doc three-way matching — HI-100 hiány
# ===========================================================================
@pytest.mark.e2e_paritas
def test_04_multi_doc():
expected = EXPECTED_FINDINGS["multi_doc/__triplet__"]
files = [
TEST_DATA / "multi_doc" / "epkft-po-2026-0412.pdf",
TEST_DATA / "multi_doc" / "epkft-dn-2026-0415.pdf",
TEST_DATA / "multi_doc" / "epkft-inv-2026-0418.pdf",
]
for f in files:
assert f.exists()
t0 = time.time()
serialized, state, _ = _run_pipeline_for_files(files)
elapsed = time.time() - t0
assertions, overall = _evaluate_paritas(serialized, expected)
result = ParitasResult(
test_name="04_multi_doc_cross_check",
files=[str(f.relative_to(TEST_DATA)) for f in files],
timestamp=datetime.now().isoformat(),
pipeline_seconds=elapsed,
document_count=serialized["document_count"],
risk_count=serialized["risk_count"],
risks=serialized["risks"],
classifications=serialized["classifications"],
extracted=serialized["extracted"],
comparison=serialized.get("comparison"),
paritas_assertions=assertions,
)
_save_result("04_multi_doc_cross_check", result)
critical_failed = [
a for a in assertions
if not a.get("passed") and a.get("type") in ("doc_types_set", "must_contain_keyword", "must_contain_any_of")
]
assert not critical_failed, f"Kritikus paritás FAIL: {critical_failed}"
# ===========================================================================
# 05. Adversarial egyenként — 4 fájl
# ===========================================================================
@pytest.mark.e2e_paritas
@pytest.mark.parametrize("file_rel", [
"adversarial/adv-inv-2026-0001.pdf",
"adversarial/adv-ctr-2026-001.pdf",
"adversarial/adv-ctr-2026-002.pdf",
"adversarial/adv-ctr-2026-003.pdf",
])
def test_05_adversarial(file_rel):
expected = EXPECTED_FINDINGS[file_rel]
pdf = TEST_DATA / file_rel
assert pdf.exists()
t0 = time.time()
serialized, _, _ = _run_pipeline_for_files([pdf])
elapsed = time.time() - t0
assertions, overall = _evaluate_paritas(serialized, expected)
result = ParitasResult(
test_name=f"05_adversarial_{pdf.stem}",
files=[file_rel],
timestamp=datetime.now().isoformat(),
pipeline_seconds=elapsed,
document_count=serialized["document_count"],
risk_count=serialized["risk_count"],
risks=serialized["risks"],
classifications=serialized["classifications"],
extracted=serialized["extracted"],
paritas_assertions=assertions,
)
_save_result(f"05_adversarial_{pdf.stem}", result)
critical_failed = [
a for a in assertions
if not a.get("passed") and a.get("type") in ("must_contain_keyword", "must_contain_any_of")
]
assert not critical_failed, f"Adversarial finding hiányzik: {critical_failed}"
# ===========================================================================
# 06. Adversarial kombinált — mind a 4 együtt
# ===========================================================================
@pytest.mark.e2e_paritas
def test_06_adversarial_combined():
expected = EXPECTED_FINDINGS["adversarial/__combined__"]
files = sorted((TEST_DATA / "adversarial").glob("*.pdf"))
assert len(files) == 4
t0 = time.time()
serialized, _, _ = _run_pipeline_for_files(files)
elapsed = time.time() - t0
assertions, overall = _evaluate_paritas(serialized, expected)
result = ParitasResult(
test_name="06_adversarial_combined",
files=[str(f.relative_to(TEST_DATA)) for f in files],
timestamp=datetime.now().isoformat(),
pipeline_seconds=elapsed,
document_count=serialized["document_count"],
risk_count=serialized["risk_count"],
risks=serialized["risks"],
classifications=serialized["classifications"],
extracted=serialized["extracted"],
paritas_assertions=assertions,
)
_save_result("06_adversarial_combined", result)
critical_failed = [
a for a in assertions
if not a.get("passed") and a.get("type") in ("must_contain_keyword",)
]
assert not critical_failed, f"Cross-doc finding hiányzik: {critical_failed}"
# ===========================================================================
# 07-09. Demo csomagok
# ===========================================================================
def _run_demo_package(pkg_key: str) -> tuple[dict, list[Path]]:
pkg_dir = TEST_DATA / "demo_csomagok" / pkg_key
files = sorted(pkg_dir.glob("*.pdf"))
assert files, f"Üres demo csomag: {pkg_key}"
graph, store, llm = _build_pipeline()
state = asyncio.run(graph.ainvoke({"files": _load_files(files)}))
pkg_type_map = {"audit_demo": "audit", "dd_demo": "dd", "compliance_demo": "compliance"}
pkg_type = pkg_type_map.get(pkg_key, "general")
pkg_graph = _build_package_insights()
pkg_state = asyncio.run(pkg_graph.ainvoke({
"documents": state.get("documents") or [],
"package_type": pkg_type,
}))
# A graph state-ben a kulcs `final_insights` (lásd app/main.py:218); átmappeljük
state["package_insights"] = pkg_state.get("final_insights")
contracts = [
d for d in (state.get("documents") or [])
if d.classification and d.classification.doc_type == "szerzodes"
]
if contracts:
dd_graph = _build_dd()
dd_state = asyncio.run(dd_graph.ainvoke({"documents": contracts}))
state["dd_report"] = dd_state.get("dd_report")
return _serialize_pipeline_state(state), files
@pytest.mark.e2e_paritas
def test_07_audit_demo():
expected = EXPECTED_FINDINGS["demo_csomagok/audit_demo/__package__"]
t0 = time.time()
serialized, files = _run_demo_package("audit_demo")
elapsed = time.time() - t0
assertions, overall = _evaluate_paritas(serialized, expected)
result = ParitasResult(
test_name="07_audit_demo",
files=[str(f.relative_to(TEST_DATA)) for f in files],
timestamp=datetime.now().isoformat(),
pipeline_seconds=elapsed,
document_count=serialized["document_count"],
risk_count=serialized["risk_count"],
risks=serialized["risks"],
classifications=serialized["classifications"],
extracted=serialized["extracted"],
package_insights=serialized.get("package_insights"),
paritas_assertions=assertions,
)
_save_result("07_audit_demo", result)
critical_failed = [
a for a in assertions
if not a.get("passed") and a.get("type") in ("must_contain_any_of", "doc_types_all")
]
assert not critical_failed, f"Audit demo paritás FAIL: {critical_failed}"
@pytest.mark.e2e_paritas
def test_08_dd_demo():
expected = EXPECTED_FINDINGS["demo_csomagok/dd_demo/__package__"]
t0 = time.time()
serialized, files = _run_demo_package("dd_demo")
elapsed = time.time() - t0
assertions, overall = _evaluate_paritas(serialized, expected)
result = ParitasResult(
test_name="08_dd_demo",
files=[str(f.relative_to(TEST_DATA)) for f in files],
timestamp=datetime.now().isoformat(),
pipeline_seconds=elapsed,
document_count=serialized["document_count"],
risk_count=serialized["risk_count"],
risks=serialized["risks"],
classifications=serialized["classifications"],
extracted=serialized["extracted"],
package_insights=serialized.get("package_insights"),
dd_report=serialized.get("dd_report"),
paritas_assertions=assertions,
)
_save_result("08_dd_demo", result)
critical_failed = [
a for a in assertions
if not a.get("passed") and a.get("type") in ("must_contain_any_of",)
]
assert not critical_failed, f"DD demo paritás FAIL: {critical_failed}"
@pytest.mark.e2e_paritas
def test_09_compliance_demo():
expected = EXPECTED_FINDINGS["demo_csomagok/compliance_demo/__package__"]
t0 = time.time()
serialized, files = _run_demo_package("compliance_demo")
elapsed = time.time() - t0
assertions, overall = _evaluate_paritas(serialized, expected)
result = ParitasResult(
test_name="09_compliance_demo",
files=[str(f.relative_to(TEST_DATA)) for f in files],
timestamp=datetime.now().isoformat(),
pipeline_seconds=elapsed,
document_count=serialized["document_count"],
risk_count=serialized["risk_count"],
risks=serialized["risks"],
classifications=serialized["classifications"],
extracted=serialized["extracted"],
package_insights=serialized.get("package_insights"),
paritas_assertions=assertions,
)
_save_result("09_compliance_demo", result)
critical_failed = [
a for a in assertions
if not a.get("passed") and a.get("type") in ("must_contain_any_of", "must_contain_keyword")
]
assert not critical_failed, f"Compliance demo paritás FAIL: {critical_failed}"
# ===========================================================================
# 10. 14 chat kérdés
# ===========================================================================
def _run_chat_scenario(scenario_key: str) -> dict:
from langchain_core.messages import AIMessage, HumanMessage
scenario = CHAT_SCENARIOS[scenario_key]
files = [TEST_DATA / f for f in scenario["context_files"]]
for f in files:
assert f.exists(), f"Hiányzik: {f}"
graph, store, llm = _build_pipeline()
pipeline_state = asyncio.run(graph.ainvoke({"files": _load_files(files)}))
from tools.context import ChatToolContext
tool_context = ChatToolContext(store=store)
for d in pipeline_state.get("documents") or []:
tool_context.add_document(d)
from graph.chat_graph import build_chat_graph
chat_graph = build_chat_graph(llm, tool_context)
chat_results = []
chat_history: list = []
for q_def in scenario["questions"]:
question = q_def["q"]
try:
chat_history.append(HumanMessage(content=question))
chat_state = asyncio.run(chat_graph.ainvoke({"messages": chat_history}))
answer = chat_state.get("final_answer", "")
sources = chat_state.get("sources_cited") or []
chat_history.append(AIMessage(content=answer))
answer_lc = answer.lower()
assertions = []
must_any = q_def.get("must_contain_any_of", [])
if must_any:
assertions.append({
"type": "must_contain_any_of",
"keywords": must_any,
"passed": any(kw.lower() in answer_lc for kw in must_any),
})
for kw in q_def.get("must_not_contain", []):
assertions.append({
"type": "must_not_contain",
"keyword": kw,
"passed": kw.lower() not in answer_lc,
})
chat_results.append({
"q": question,
"a": answer,
"sources": sources,
"assertions": assertions,
"follow_up": q_def.get("follow_up", False),
})
except Exception as exc:
chat_results.append({
"q": question,
"a": "",
"error": f"{type(exc).__name__}: {exc}",
"trace": traceback.format_exc(),
})
return {"scenario": scenario_key, "context_files": scenario["context_files"], "qa": chat_results}
@pytest.mark.e2e_paritas
@pytest.mark.parametrize("scenario_key", list(CHAT_SCENARIOS.keys()))
def test_10_chat_scenarios(scenario_key):
t0 = time.time()
out = _run_chat_scenario(scenario_key)
elapsed = time.time() - t0
out["elapsed_seconds"] = elapsed
out["timestamp"] = datetime.now().isoformat()
_save_result(f"10_chat_{scenario_key}", out)
errors = [r for r in out["qa"] if r.get("error")]
failed = [
r for r in out["qa"]
if not r.get("error") and any(not a["passed"] for a in r.get("assertions", []))
]
if errors or failed:
msg = []
if errors:
msg.append(f"{len(errors)} chat hiba")
if failed:
msg.append(f"{len(failed)} kérdésre nem teljesült az assertion")
raise AssertionError("; ".join(msg))