File size: 3,230 Bytes
a81a250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "german-ocr-3/schemas/generic_document.json",
  "title": "GermanOCR3 Generic Document",
  "description": "Allgemeines deutsches Dokument-Extraktionsschema. Felder dürfen null sein, wenn nicht eindeutig erkennbar.",
  "type": "object",
  "additionalProperties": false,
  "required": ["document_type", "language", "raw_text", "confidence"],
  "properties": {
    "document_type": {
      "description": "z.B. invoice, receipt, letter, form, contract, id_card, other, unknown",
      "type": ["string", "null"]
    },
    "language": {
      "description": "BCP-47 Sprachcode des Dokuments (typisch 'de').",
      "type": "string",
      "default": "de"
    },
    "sender": {
      "type": ["object", "null"],
      "additionalProperties": false,
      "properties": {
        "name": {"type": ["string", "null"]},
        "address": {"type": ["string", "null"]},
        "email": {"type": ["string", "null"]},
        "phone": {"type": ["string", "null"]},
        "tax_id": {"type": ["string", "null"]},
        "vat_id": {"type": ["string", "null"]}
      }
    },
    "recipient": {
      "type": ["object", "null"],
      "additionalProperties": false,
      "properties": {
        "name": {"type": ["string", "null"]},
        "address": {"type": ["string", "null"]},
        "customer_id": {"type": ["string", "null"]}
      }
    },
    "date": {
      "description": "Hauptdatum des Dokuments im Format YYYY-MM-DD, falls eindeutig erkennbar.",
      "type": ["string", "null"]
    },
    "reference_numbers": {
      "type": "array",
      "items": {
        "type": "object",
        "additionalProperties": false,
        "required": ["label", "value"],
        "properties": {
          "label": {"type": "string"},
          "value": {"type": "string"}
        }
      },
      "default": []
    },
    "amounts": {
      "type": "array",
      "items": {
        "type": "object",
        "additionalProperties": false,
        "required": ["label", "value"],
        "properties": {
          "label": {"type": "string"},
          "value": {"type": ["number", "string"]},
          "currency": {"type": ["string", "null"]}
        }
      },
      "default": []
    },
    "tables": {
      "type": "array",
      "default": [],
      "items": {
        "type": "object",
        "additionalProperties": false,
        "required": ["headers", "rows"],
        "properties": {
          "title": {"type": ["string", "null"]},
          "headers": {"type": "array", "items": {"type": "string"}},
          "rows": {
            "type": "array",
            "items": {
              "type": "array",
              "items": {"type": ["string", "number", "null"]}
            }
          }
        }
      }
    },
    "raw_text": {
      "description": "Roher OCR-Text, möglichst layouterhaltend, deutsche Originalschreibweise behalten.",
      "type": ["string", "null"]
    },
    "confidence": {
      "description": "Subjektive Selbsteinschätzung 0..1.",
      "type": ["number", "null"],
      "minimum": 0,
      "maximum": 1
    },
    "notes": {
      "type": "array",
      "items": {"type": "string"},
      "default": []
    }
  }
}