File size: 3,449 Bytes
7ff7119 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | """Plausibility checks — flag unusual values as info-level warnings.
Does not drop anything; only marks. Language- and country-agnostic.
"""
from __future__ import annotations
from utils.dates import parse_date_safe
from utils.numbers import coerce_number, is_null_alias
# Known VAT rates across countries
KNOWN_VAT_RATES = {0, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27}
def validate_plausibility(extracted: dict) -> list[dict]:
"""Flag unusual values as warnings.
Returns: list of {"type": "plausibility", "severity": ..., "message": ...}
"""
warnings: list[dict] = []
# VAT rate per line item
items = extracted.get("line_items") or []
for item in items:
if not isinstance(item, dict):
continue
vat_rate = coerce_number(item.get("vat_rate"))
if vat_rate is None:
continue
name = item.get("description", "?")
if vat_rate < 0:
warnings.append({
"type": "plausibility",
"severity": "medium",
"message": f"Negative VAT rate ({vat_rate:g}%) on line '{name}'",
})
elif vat_rate > 50:
warnings.append({
"type": "plausibility",
"severity": "medium",
"message": f"Unusually high VAT rate ({vat_rate:g}%) on line '{name}'",
})
elif int(vat_rate) not in KNOWN_VAT_RATES and vat_rate != 0:
warnings.append({
"type": "plausibility",
"severity": "low",
"message": f"Non-standard VAT rate ({vat_rate:g}%) on line '{name}'",
})
# Negative totals
for field in ("total_net", "total_vat", "total_gross", "amount"):
amount = coerce_number(extracted.get(field))
if amount is not None and amount < 0:
warnings.append({
"type": "plausibility",
"severity": "medium",
"message": f"Negative amount: {field} = {amount:.0f}",
})
# Date plausibility (skip null aliases)
for field in (
"issue_date", "fulfillment_date", "payment_due_date",
"order_date", "delivery_due_date", "delivery_date",
"effective_date", "expiry_date",
):
date_str = extracted.get(field)
if not date_str or not isinstance(date_str, str):
continue
if is_null_alias(date_str):
continue
# parse_date_safe supports YYYY-MM-DD, YYYY.MM.DD, YYYY/MM/DD, DD.MM.YYYY
# — multilingual helper for HU/DE/EN dates.
dt = parse_date_safe(date_str)
if dt is None:
warnings.append({
"type": "plausibility",
"severity": "low",
"message": f"Unparseable date: {field} = '{date_str}'",
})
elif dt.year < 2000:
warnings.append({
"type": "plausibility",
"severity": "low",
"message": f"Old date: {field} = {date_str} (before 2000)",
})
elif dt.year > 2030 and field not in ("expiry_date", "effective_date"):
# Contract expiry can naturally be in the distant future
warnings.append({
"type": "plausibility",
"severity": "low",
"message": f"Future date: {field} = {date_str} (after 2030)",
})
return warnings
|