| { |
| "$schema": "https://json-schema.org/draft/2020-12/schema", |
| "$id": "german-ocr-3/schemas/generic_document.json", |
| "title": "GermanOCR3 Generic Document", |
| "description": "Allgemeines deutsches Dokument-Extraktionsschema. Felder dürfen null sein, wenn nicht eindeutig erkennbar.", |
| "type": "object", |
| "additionalProperties": false, |
| "required": ["document_type", "language", "raw_text", "confidence"], |
| "properties": { |
| "document_type": { |
| "description": "z.B. invoice, receipt, letter, form, contract, id_card, other, unknown", |
| "type": ["string", "null"] |
| }, |
| "language": { |
| "description": "BCP-47 Sprachcode des Dokuments (typisch 'de').", |
| "type": "string", |
| "default": "de" |
| }, |
| "sender": { |
| "type": ["object", "null"], |
| "additionalProperties": false, |
| "properties": { |
| "name": {"type": ["string", "null"]}, |
| "address": {"type": ["string", "null"]}, |
| "email": {"type": ["string", "null"]}, |
| "phone": {"type": ["string", "null"]}, |
| "tax_id": {"type": ["string", "null"]}, |
| "vat_id": {"type": ["string", "null"]} |
| } |
| }, |
| "recipient": { |
| "type": ["object", "null"], |
| "additionalProperties": false, |
| "properties": { |
| "name": {"type": ["string", "null"]}, |
| "address": {"type": ["string", "null"]}, |
| "customer_id": {"type": ["string", "null"]} |
| } |
| }, |
| "date": { |
| "description": "Hauptdatum des Dokuments im Format YYYY-MM-DD, falls eindeutig erkennbar.", |
| "type": ["string", "null"] |
| }, |
| "reference_numbers": { |
| "type": "array", |
| "items": { |
| "type": "object", |
| "additionalProperties": false, |
| "required": ["label", "value"], |
| "properties": { |
| "label": {"type": "string"}, |
| "value": {"type": "string"} |
| } |
| }, |
| "default": [] |
| }, |
| "amounts": { |
| "type": "array", |
| "items": { |
| "type": "object", |
| "additionalProperties": false, |
| "required": ["label", "value"], |
| "properties": { |
| "label": {"type": "string"}, |
| "value": {"type": ["number", "string"]}, |
| "currency": {"type": ["string", "null"]} |
| } |
| }, |
| "default": [] |
| }, |
| "tables": { |
| "type": "array", |
| "default": [], |
| "items": { |
| "type": "object", |
| "additionalProperties": false, |
| "required": ["headers", "rows"], |
| "properties": { |
| "title": {"type": ["string", "null"]}, |
| "headers": {"type": "array", "items": {"type": "string"}}, |
| "rows": { |
| "type": "array", |
| "items": { |
| "type": "array", |
| "items": {"type": ["string", "number", "null"]} |
| } |
| } |
| } |
| } |
| }, |
| "raw_text": { |
| "description": "Roher OCR-Text, möglichst layouterhaltend, deutsche Originalschreibweise behalten.", |
| "type": ["string", "null"] |
| }, |
| "confidence": { |
| "description": "Subjektive Selbsteinschätzung 0..1.", |
| "type": ["number", "null"], |
| "minimum": 0, |
| "maximum": 1 |
| }, |
| "notes": { |
| "type": "array", |
| "items": {"type": "string"}, |
| "default": [] |
| } |
| } |
| } |
|
|