| - dataset: |
| id: allenai/olmOCR-bench |
| task_id: overall |
| value: 76.1 |
| notes: "Excluding Headers & Footers category" |
| source: |
| url: https://huggingface.co/papers/2601.14251 |
| name: LightOnOCR technical report |
| user: Bapt120 |
| - dataset: |
| id: allenai/olmOCR-bench |
| task_id: arxiv_math |
| value: 81.4 |
| source: |
| url: https://huggingface.co/papers/2601.14251 |
| name: LightOnOCR technical report |
| user: Bapt120 |
| - dataset: |
| id: allenai/olmOCR-bench |
| task_id: old_scans_math |
| value: 71.6 |
| source: |
| url: https://huggingface.co/papers/2601.14251 |
| name: LightOnOCR technical report |
| user: Bapt120 |
| - dataset: |
| id: allenai/olmOCR-bench |
| task_id: table_tests |
| value: 76.4 |
| source: |
| url: https://huggingface.co/papers/2601.14251 |
| name: LightOnOCR technical report |
| user: Bapt120 |
| - dataset: |
| id: allenai/olmOCR-bench |
| task_id: old_scans |
| value: 35.2 |
| source: |
| url: https://huggingface.co/papers/2601.14251 |
| name: LightOnOCR technical report |
| user: Bapt120 |
| - dataset: |
| id: allenai/olmOCR-bench |
| task_id: multi_column |
| value: 80.0 |
| source: |
| url: https://huggingface.co/papers/2601.14251 |
| name: LightOnOCR technical report |
| user: Bapt120 |
| - dataset: |
| id: allenai/olmOCR-bench |
| task_id: long_tiny_text |
| value: 88.7 |
| source: |
| url: https://huggingface.co/papers/2601.14251 |
| name: LightOnOCR technical report |
| user: Bapt120 |
| - dataset: |
| id: allenai/olmOCR-bench |
| task_id: headers_footers |
| value: 35.5 |
| notes: "Instead of removing headers and footers, our model is trained for full-page transcription and explicitly rewards their presence (via flipped RLVR tests), which lowers this score under the original benchmark scoring." |
| source: |
| url: https://huggingface.co/papers/2601.14251 |
| name: LightOnOCR technical report |
| user: Bapt120 |
| - dataset: |
| id: allenai/olmOCR-bench |
| task_id: baseline |
| value: 99.6 |
| source: |
| url: https://huggingface.co/papers/2601.14251 |
| name: LightOnOCR technical report |
| user: Bapt120 |