Jerry999 commited on
Commit
987f783
·
verified ·
1 Parent(s): da1e89d

Upload checkpoints/math_operations/base_model_eval

Browse files
.gitattributes CHANGED
@@ -62,3 +62,4 @@ checkpoints/knowledge/atomic_full_then_2step_full_sft_t20260305/checkpoint-710/t
62
  checkpoints/knowledge/lora_sft_atomic_50ep_t20260305/checkpoint-1248/tokenizer.json filter=lfs diff=lfs merge=lfs -text
63
  checkpoints/knowledge/lora_sft_atomic_50ep_t20260305/checkpoint-1274/tokenizer.json filter=lfs diff=lfs merge=lfs -text
64
  checkpoints/knowledge/lora_sft_atomic_50ep_t20260305/checkpoint-1300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
62
  checkpoints/knowledge/lora_sft_atomic_50ep_t20260305/checkpoint-1248/tokenizer.json filter=lfs diff=lfs merge=lfs -text
63
  checkpoints/knowledge/lora_sft_atomic_50ep_t20260305/checkpoint-1274/tokenizer.json filter=lfs diff=lfs merge=lfs -text
64
  checkpoints/knowledge/lora_sft_atomic_50ep_t20260305/checkpoint-1300/tokenizer.json filter=lfs diff=lfs merge=lfs -text
65
+ checkpoints/math_operations/base_model_eval/eval_results_easy_ops/balanced_test_alpaca_results.jsonl filter=lfs diff=lfs merge=lfs -text
checkpoints/math_operations/base_model_eval/eval_results_easy_ops/balanced_test_alpaca_converted.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/math_operations/base_model_eval/eval_results_easy_ops/balanced_test_alpaca_results.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa82938ac18adb04d956ffed23da0b3f07a79e3692c2989432a310eab98ab4c
3
+ size 11059849
checkpoints/math_operations/base_model_eval/eval_results_easy_ops/eval_results.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ category,filename,total,correct,accuracy,format_found,format_accuracy,errors_count
2
+ math_operations,balanced_test_alpaca_results,500,413,82.60,425,85.00,87
checkpoints/math_operations/base_model_eval/eval_results_easy_ops/eval_summary.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "overall": {
3
+ "total": 500,
4
+ "correct": 413,
5
+ "accuracy": 82.6,
6
+ "format_found": 425,
7
+ "format_accuracy": 85.0
8
+ },
9
+ "per_operation": {
10
+ "a": {
11
+ "total": 25,
12
+ "correct": 25,
13
+ "accuracy": 100.0,
14
+ "format_found": 25
15
+ },
16
+ "b": {
17
+ "total": 25,
18
+ "correct": 17,
19
+ "accuracy": 68.0,
20
+ "format_found": 25
21
+ },
22
+ "c": {
23
+ "total": 25,
24
+ "correct": 25,
25
+ "accuracy": 100.0,
26
+ "format_found": 25
27
+ },
28
+ "d": {
29
+ "total": 25,
30
+ "correct": 25,
31
+ "accuracy": 100.0,
32
+ "format_found": 25
33
+ },
34
+ "e": {
35
+ "total": 25,
36
+ "correct": 13,
37
+ "accuracy": 52.0,
38
+ "format_found": 11
39
+ },
40
+ "f": {
41
+ "total": 25,
42
+ "correct": 25,
43
+ "accuracy": 100.0,
44
+ "format_found": 25
45
+ },
46
+ "g": {
47
+ "total": 25,
48
+ "correct": 25,
49
+ "accuracy": 100.0,
50
+ "format_found": 25
51
+ },
52
+ "h": {
53
+ "total": 25,
54
+ "correct": 3,
55
+ "accuracy": 12.0,
56
+ "format_found": 2
57
+ },
58
+ "i": {
59
+ "total": 25,
60
+ "correct": 25,
61
+ "accuracy": 100.0,
62
+ "format_found": 25
63
+ },
64
+ "j": {
65
+ "total": 25,
66
+ "correct": 4,
67
+ "accuracy": 16.0,
68
+ "format_found": 2
69
+ },
70
+ "k": {
71
+ "total": 25,
72
+ "correct": 25,
73
+ "accuracy": 100.0,
74
+ "format_found": 25
75
+ },
76
+ "l": {
77
+ "total": 25,
78
+ "correct": 23,
79
+ "accuracy": 92.0,
80
+ "format_found": 24
81
+ },
82
+ "m": {
83
+ "total": 25,
84
+ "correct": 25,
85
+ "accuracy": 100.0,
86
+ "format_found": 25
87
+ },
88
+ "n": {
89
+ "total": 25,
90
+ "correct": 24,
91
+ "accuracy": 96.0,
92
+ "format_found": 24
93
+ },
94
+ "o": {
95
+ "total": 25,
96
+ "correct": 25,
97
+ "accuracy": 100.0,
98
+ "format_found": 25
99
+ },
100
+ "p": {
101
+ "total": 25,
102
+ "correct": 22,
103
+ "accuracy": 88.0,
104
+ "format_found": 22
105
+ },
106
+ "q": {
107
+ "total": 25,
108
+ "correct": 15,
109
+ "accuracy": 60.0,
110
+ "format_found": 15
111
+ },
112
+ "r": {
113
+ "total": 25,
114
+ "correct": 17,
115
+ "accuracy": 68.0,
116
+ "format_found": 25
117
+ },
118
+ "s": {
119
+ "total": 25,
120
+ "correct": 25,
121
+ "accuracy": 100.0,
122
+ "format_found": 25
123
+ },
124
+ "t": {
125
+ "total": 25,
126
+ "correct": 25,
127
+ "accuracy": 100.0,
128
+ "format_found": 25
129
+ }
130
+ },
131
+ "n_errors": 87,
132
+ "results_file": "/home/jiaruil5/math_rl/mix_teachers/r3lit_rl/mix_teachers/checkpoints/math_operations/base_model_eval/eval_results_easy_ops/balanced_test_alpaca_results.jsonl"
133
+ }