german-ocr-3 / schemas /form.json
Keyven's picture
Upload schemas/form.json with huggingface_hub
fd206e2 verified
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "german-ocr-3/schemas/form.json",
"title": "GermanOCR3 Form (Formular)",
"description": "Generisches Schema fuer ausgefuellte deutsche Formulare. Felder werden als Liste von Label/Value-Paaren geliefert, plus optionale Checkbox-Liste und Unterschriften.",
"type": "object",
"additionalProperties": false,
"required": ["document_type", "language", "fields"],
"properties": {
"document_type": {"const": "form"},
"language": {"type": "string", "default": "de"},
"form_title": {"type": ["string", "null"]},
"form_id": {"description": "z.B. Antragsnummer / Formularkennung", "type": ["string", "null"]},
"issuing_authority": {"type": ["string", "null"]},
"fields": {
"type": "array",
"description": "Erkannte Label/Wert-Paare in Lesereihenfolge.",
"items": {
"type": "object",
"additionalProperties": false,
"required": ["label", "value"],
"properties": {
"label": {"type": "string"},
"value": {"type": ["string", "number", "boolean", "null"]},
"section": {"type": ["string", "null"]},
"page": {"type": ["integer", "null"]}
}
}
},
"checkboxes": {
"type": "array",
"default": [],
"items": {
"type": "object",
"additionalProperties": false,
"required": ["label", "checked"],
"properties": {
"label": {"type": "string"},
"checked": {"type": "boolean"},
"section": {"type": ["string", "null"]}
}
}
},
"signatures": {
"type": "array",
"default": [],
"items": {
"type": "object",
"additionalProperties": false,
"properties": {
"by": {"type": ["string", "null"]},
"place": {"type": ["string", "null"]},
"date": {"type": ["string", "null"]},
"present": {"type": "boolean"}
}
}
},
"raw_text": {"type": ["string", "null"]},
"confidence": {"type": ["number", "null"], "minimum": 0, "maximum": 1},
"notes": {"type": "array", "items": {"type": "string"}, "default": []}
}
}