Delete .eval_results

#2
by SaylorTwift HF Staff - opened
.eval_results/MathArena--aime_2026.yaml DELETED
@@ -1,8 +0,0 @@
1
- - dataset:
2
- id: MathArena/aime_2026
3
- task_id: MathArena/aime_2026
4
- value: 95.3
5
- date: '2026-04-07'
6
- source:
7
- url: https://huggingface.co/zai-org/GLM-5.1
8
- name: Model Card
 
 
 
 
 
 
 
 
 
.eval_results/MathArena--hmmt_feb_2026.yaml DELETED
@@ -1,8 +0,0 @@
1
- - dataset:
2
- id: MathArena/hmmt_feb_2026
3
- task_id: MathArena/hmmt_feb_2026
4
- value: 82.6
5
- date: '2026-04-07'
6
- source:
7
- url: https://huggingface.co/zai-org/GLM-5.1
8
- name: Model Card
 
 
 
 
 
 
 
 
 
.eval_results/gpqa.yaml DELETED
@@ -1,8 +0,0 @@
1
- - dataset:
2
- id: Idavidrein/gpqa
3
- task_id: diamond
4
- value: 86.2
5
- date: '2026-04-07'
6
- source:
7
- url: https://huggingface.co/zai-org/GLM-5.1
8
- name: Model Card
 
 
 
 
 
 
 
 
 
.eval_results/hle.yaml DELETED
@@ -1,8 +0,0 @@
1
- - dataset:
2
- id: cais/hle
3
- task_id: hle
4
- value: 31.0
5
- date: '2026-04-07'
6
- source:
7
- url: https://huggingface.co/zai-org/GLM-5.1
8
- name: Model Card
 
 
 
 
 
 
 
 
 
.eval_results/hle_with_tools.yaml DELETED
@@ -1,9 +0,0 @@
1
- - dataset:
2
- id: cais/hle
3
- task_id: hle
4
- value: 52.3
5
- date: '2026-04-07'
6
- source:
7
- url: https://huggingface.co/zai-org/GLM-5.1
8
- name: Model Card
9
- notes: "With tools"
 
 
 
 
 
 
 
 
 
 
.eval_results/swe_bench_pro.yaml DELETED
@@ -1,8 +0,0 @@
1
- - dataset:
2
- id: ScaleAI/SWE-bench_Pro
3
- task_id: SWE_Bench_Pro
4
- value: 58.4
5
- source:
6
- url: https://huggingface.co/zai-org/GLM-5.1
7
- name: Model Card
8
- notes: high reasoning
 
 
 
 
 
 
 
 
 
.eval_results/terminal_bench_2.yaml DELETED
@@ -1,8 +0,0 @@
1
- - dataset:
2
- id: harborframework/terminal-bench-2.0
3
- task_id: terminalbench_2
4
- value: 63.5
5
- date: '2026-04-07'
6
- source:
7
- url: https://huggingface.co/zai-org/GLM-5.1
8
- name: Model Card
 
 
 
 
 
 
 
 
 
.eval_results/terminal_bench_2_claudecode.yaml DELETED
@@ -1,9 +0,0 @@
1
- - dataset:
2
- id: harborframework/terminal-bench-2.0
3
- task_id: terminalbench_2
4
- value: 69.0
5
- date: '2026-04-07'
6
- source:
7
- url: https://huggingface.co/zai-org/GLM-5.1
8
- name: Model Card
9
- notes: "agent: Terminus 2(Claude Code)"