{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "german-ocr-3/schemas/generic_document.json", "title": "GermanOCR3 Generic Document", "description": "Allgemeines deutsches Dokument-Extraktionsschema. Felder dürfen null sein, wenn nicht eindeutig erkennbar.", "type": "object", "additionalProperties": false, "required": ["document_type", "language", "raw_text", "confidence"], "properties": { "document_type": { "description": "z.B. invoice, receipt, letter, form, contract, id_card, other, unknown", "type": ["string", "null"] }, "language": { "description": "BCP-47 Sprachcode des Dokuments (typisch 'de').", "type": "string", "default": "de" }, "sender": { "type": ["object", "null"], "additionalProperties": false, "properties": { "name": {"type": ["string", "null"]}, "address": {"type": ["string", "null"]}, "email": {"type": ["string", "null"]}, "phone": {"type": ["string", "null"]}, "tax_id": {"type": ["string", "null"]}, "vat_id": {"type": ["string", "null"]} } }, "recipient": { "type": ["object", "null"], "additionalProperties": false, "properties": { "name": {"type": ["string", "null"]}, "address": {"type": ["string", "null"]}, "customer_id": {"type": ["string", "null"]} } }, "date": { "description": "Hauptdatum des Dokuments im Format YYYY-MM-DD, falls eindeutig erkennbar.", "type": ["string", "null"] }, "reference_numbers": { "type": "array", "items": { "type": "object", "additionalProperties": false, "required": ["label", "value"], "properties": { "label": {"type": "string"}, "value": {"type": "string"} } }, "default": [] }, "amounts": { "type": "array", "items": { "type": "object", "additionalProperties": false, "required": ["label", "value"], "properties": { "label": {"type": "string"}, "value": {"type": ["number", "string"]}, "currency": {"type": ["string", "null"]} } }, "default": [] }, "tables": { "type": "array", "default": [], "items": { "type": "object", "additionalProperties": false, "required": ["headers", "rows"], "properties": { "title": {"type": ["string", "null"]}, "headers": {"type": "array", "items": {"type": "string"}}, "rows": { "type": "array", "items": { "type": "array", "items": {"type": ["string", "number", "null"]} } } } } }, "raw_text": { "description": "Roher OCR-Text, möglichst layouterhaltend, deutsche Originalschreibweise behalten.", "type": ["string", "null"] }, "confidence": { "description": "Subjektive Selbsteinschätzung 0..1.", "type": ["number", "null"], "minimum": 0, "maximum": 1 }, "notes": { "type": "array", "items": {"type": "string"}, "default": [] } } }