Delete .eval_results
Browse filesHey! Nathan from HF here. Would you be ok with deleting the eval_result folder so it doesn't appear as a duplicate on the leaderboards?
Thanks!
- .eval_results/MathArena--aime_2026.yaml +0 -8
- .eval_results/MathArena--hmmt_feb_2026.yaml +0 -8
- .eval_results/gpqa.yaml +0 -8
- .eval_results/hle.yaml +0 -8
- .eval_results/hle_with_tools.yaml +0 -9
- .eval_results/swe_bench_pro.yaml +0 -8
- .eval_results/terminal_bench_2.yaml +0 -8
- .eval_results/terminal_bench_2_claudecode.yaml +0 -9
.eval_results/MathArena--aime_2026.yaml
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
- dataset:
|
| 2 |
-
id: MathArena/aime_2026
|
| 3 |
-
task_id: MathArena/aime_2026
|
| 4 |
-
value: 95.3
|
| 5 |
-
date: '2026-04-07'
|
| 6 |
-
source:
|
| 7 |
-
url: https://huggingface.co/zai-org/GLM-5.1
|
| 8 |
-
name: Model Card
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.eval_results/MathArena--hmmt_feb_2026.yaml
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
- dataset:
|
| 2 |
-
id: MathArena/hmmt_feb_2026
|
| 3 |
-
task_id: MathArena/hmmt_feb_2026
|
| 4 |
-
value: 82.6
|
| 5 |
-
date: '2026-04-07'
|
| 6 |
-
source:
|
| 7 |
-
url: https://huggingface.co/zai-org/GLM-5.1
|
| 8 |
-
name: Model Card
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.eval_results/gpqa.yaml
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
- dataset:
|
| 2 |
-
id: Idavidrein/gpqa
|
| 3 |
-
task_id: diamond
|
| 4 |
-
value: 86.2
|
| 5 |
-
date: '2026-04-07'
|
| 6 |
-
source:
|
| 7 |
-
url: https://huggingface.co/zai-org/GLM-5.1
|
| 8 |
-
name: Model Card
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.eval_results/hle.yaml
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
- dataset:
|
| 2 |
-
id: cais/hle
|
| 3 |
-
task_id: hle
|
| 4 |
-
value: 31.0
|
| 5 |
-
date: '2026-04-07'
|
| 6 |
-
source:
|
| 7 |
-
url: https://huggingface.co/zai-org/GLM-5.1
|
| 8 |
-
name: Model Card
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.eval_results/hle_with_tools.yaml
DELETED
|
@@ -1,9 +0,0 @@
|
|
| 1 |
-
- dataset:
|
| 2 |
-
id: cais/hle
|
| 3 |
-
task_id: hle
|
| 4 |
-
value: 52.3
|
| 5 |
-
date: '2026-04-07'
|
| 6 |
-
source:
|
| 7 |
-
url: https://huggingface.co/zai-org/GLM-5.1
|
| 8 |
-
name: Model Card
|
| 9 |
-
notes: "With tools"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.eval_results/swe_bench_pro.yaml
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
- dataset:
|
| 2 |
-
id: ScaleAI/SWE-bench_Pro
|
| 3 |
-
task_id: SWE_Bench_Pro
|
| 4 |
-
value: 58.4
|
| 5 |
-
source:
|
| 6 |
-
url: https://huggingface.co/zai-org/GLM-5.1
|
| 7 |
-
name: Model Card
|
| 8 |
-
notes: high reasoning
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.eval_results/terminal_bench_2.yaml
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
- dataset:
|
| 2 |
-
id: harborframework/terminal-bench-2.0
|
| 3 |
-
task_id: terminalbench_2
|
| 4 |
-
value: 63.5
|
| 5 |
-
date: '2026-04-07'
|
| 6 |
-
source:
|
| 7 |
-
url: https://huggingface.co/zai-org/GLM-5.1
|
| 8 |
-
name: Model Card
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.eval_results/terminal_bench_2_claudecode.yaml
DELETED
|
@@ -1,9 +0,0 @@
|
|
| 1 |
-
- dataset:
|
| 2 |
-
id: harborframework/terminal-bench-2.0
|
| 3 |
-
task_id: terminalbench_2
|
| 4 |
-
value: 69.0
|
| 5 |
-
date: '2026-04-07'
|
| 6 |
-
source:
|
| 7 |
-
url: https://huggingface.co/zai-org/GLM-5.1
|
| 8 |
-
name: Model Card
|
| 9 |
-
notes: "agent: Terminus 2(Claude Code)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|