| """Plausibility checks — flag unusual values as info-level warnings. |
| |
| Does not drop anything; only marks. Language- and country-agnostic. |
| """ |
|
|
| from __future__ import annotations |
|
|
| from utils.dates import parse_date_safe |
| from utils.numbers import coerce_number, is_null_alias |
|
|
|
|
| |
| KNOWN_VAT_RATES = {0, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27} |
|
|
|
|
| def validate_plausibility(extracted: dict) -> list[dict]: |
| """Flag unusual values as warnings. |
| |
| Returns: list of {"type": "plausibility", "severity": ..., "message": ...} |
| """ |
| warnings: list[dict] = [] |
|
|
| |
| items = extracted.get("line_items") or [] |
| for item in items: |
| if not isinstance(item, dict): |
| continue |
| vat_rate = coerce_number(item.get("vat_rate")) |
| if vat_rate is None: |
| continue |
| name = item.get("description", "?") |
| if vat_rate < 0: |
| warnings.append({ |
| "type": "plausibility", |
| "severity": "medium", |
| "message": f"Negative VAT rate ({vat_rate:g}%) on line '{name}'", |
| }) |
| elif vat_rate > 50: |
| warnings.append({ |
| "type": "plausibility", |
| "severity": "medium", |
| "message": f"Unusually high VAT rate ({vat_rate:g}%) on line '{name}'", |
| }) |
| elif int(vat_rate) not in KNOWN_VAT_RATES and vat_rate != 0: |
| warnings.append({ |
| "type": "plausibility", |
| "severity": "low", |
| "message": f"Non-standard VAT rate ({vat_rate:g}%) on line '{name}'", |
| }) |
|
|
| |
| for field in ("total_net", "total_vat", "total_gross", "amount"): |
| amount = coerce_number(extracted.get(field)) |
| if amount is not None and amount < 0: |
| warnings.append({ |
| "type": "plausibility", |
| "severity": "medium", |
| "message": f"Negative amount: {field} = {amount:.0f}", |
| }) |
|
|
| |
| for field in ( |
| "issue_date", "fulfillment_date", "payment_due_date", |
| "order_date", "delivery_due_date", "delivery_date", |
| "effective_date", "expiry_date", |
| ): |
| date_str = extracted.get(field) |
| if not date_str or not isinstance(date_str, str): |
| continue |
| if is_null_alias(date_str): |
| continue |
| |
| |
| dt = parse_date_safe(date_str) |
| if dt is None: |
| warnings.append({ |
| "type": "plausibility", |
| "severity": "low", |
| "message": f"Unparseable date: {field} = '{date_str}'", |
| }) |
| elif dt.year < 2000: |
| warnings.append({ |
| "type": "plausibility", |
| "severity": "low", |
| "message": f"Old date: {field} = {date_str} (before 2000)", |
| }) |
| elif dt.year > 2030 and field not in ("expiry_date", "effective_date"): |
| |
| warnings.append({ |
| "type": "plausibility", |
| "severity": "low", |
| "message": f"Future date: {field} = {date_str} (after 2030)", |
| }) |
|
|
| return warnings |
|
|