umer07 commited on
Commit
9aceba2
·
verified ·
1 Parent(s): 87920dd

Fathom: upload expert-e9-cot/training_log.json

Browse files
adapters/expert-e9-cot/training_log.json ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 1.4019,
4
+ "grad_norm": 0.6986930966377258,
5
+ "learning_rate": 2.25e-05,
6
+ "entropy": 1.199985957145691,
7
+ "num_tokens": 647929.0,
8
+ "mean_token_accuracy": 0.6914159089326859,
9
+ "epoch": 0.19607843137254902,
10
+ "step": 10
11
+ },
12
+ {
13
+ "loss": 1.3734,
14
+ "grad_norm": 0.18671388924121857,
15
+ "learning_rate": 4.75e-05,
16
+ "entropy": 1.325679862499237,
17
+ "num_tokens": 1300854.0,
18
+ "mean_token_accuracy": 0.6935183644294739,
19
+ "epoch": 0.39215686274509803,
20
+ "step": 20
21
+ },
22
+ {
23
+ "loss": 1.3317,
24
+ "grad_norm": 0.18022498488426208,
25
+ "learning_rate": 4.9819267987317665e-05,
26
+ "entropy": 1.4015559554100037,
27
+ "num_tokens": 1947357.0,
28
+ "mean_token_accuracy": 0.6976928591728211,
29
+ "epoch": 0.5882352941176471,
30
+ "step": 30
31
+ },
32
+ {
33
+ "loss": 1.2446,
34
+ "grad_norm": 0.18924492597579956,
35
+ "learning_rate": 4.9197869469162815e-05,
36
+ "entropy": 1.219199287891388,
37
+ "num_tokens": 2595628.0,
38
+ "mean_token_accuracy": 0.7132954180240632,
39
+ "epoch": 0.7843137254901961,
40
+ "step": 40
41
+ },
42
+ {
43
+ "loss": 1.2139,
44
+ "grad_norm": 0.23499619960784912,
45
+ "learning_rate": 4.814465939707259e-05,
46
+ "entropy": 1.2155689418315887,
47
+ "num_tokens": 3245496.0,
48
+ "mean_token_accuracy": 0.7180316507816314,
49
+ "epoch": 0.9803921568627451,
50
+ "step": 50
51
+ },
52
+ {
53
+ "loss": 1.1282,
54
+ "grad_norm": 0.25000059604644775,
55
+ "learning_rate": 4.6678432329734434e-05,
56
+ "entropy": 1.1309684544801712,
57
+ "num_tokens": 3876930.0,
58
+ "mean_token_accuracy": 0.734208858013153,
59
+ "epoch": 1.1764705882352942,
60
+ "step": 60
61
+ },
62
+ {
63
+ "loss": 1.1264,
64
+ "grad_norm": 0.3251831531524658,
65
+ "learning_rate": 4.482535312390058e-05,
66
+ "entropy": 1.1345468521118165,
67
+ "num_tokens": 4525171.0,
68
+ "mean_token_accuracy": 0.7357972204685211,
69
+ "epoch": 1.3725490196078431,
70
+ "step": 70
71
+ },
72
+ {
73
+ "loss": 1.0226,
74
+ "grad_norm": 0.2027675360441208,
75
+ "learning_rate": 4.2618490021899384e-05,
76
+ "entropy": 1.0273457378149033,
77
+ "num_tokens": 5172588.0,
78
+ "mean_token_accuracy": 0.7597126334905624,
79
+ "epoch": 1.5686274509803921,
80
+ "step": 80
81
+ },
82
+ {
83
+ "loss": 1.0134,
84
+ "grad_norm": 0.2039007991552353,
85
+ "learning_rate": 4.009722454806761e-05,
86
+ "entropy": 1.0195463865995407,
87
+ "num_tokens": 5819977.0,
88
+ "mean_token_accuracy": 0.761520317196846,
89
+ "epoch": 1.7647058823529411,
90
+ "step": 90
91
+ },
92
+ {
93
+ "loss": 0.9796,
94
+ "grad_norm": 0.1867203265428543,
95
+ "learning_rate": 3.730654874451569e-05,
96
+ "entropy": 0.9862680763006211,
97
+ "num_tokens": 6472798.0,
98
+ "mean_token_accuracy": 0.7683205276727676,
99
+ "epoch": 1.9607843137254903,
100
+ "step": 100
101
+ },
102
+ {
103
+ "loss": 0.9679,
104
+ "grad_norm": 0.21607249975204468,
105
+ "learning_rate": 3.429626228707034e-05,
106
+ "entropy": 0.9721104234457016,
107
+ "num_tokens": 7112529.0,
108
+ "mean_token_accuracy": 0.7711095601320267,
109
+ "epoch": 2.156862745098039,
110
+ "step": 110
111
+ },
112
+ {
113
+ "loss": 0.9666,
114
+ "grad_norm": 0.23092280328273773,
115
+ "learning_rate": 3.112008380887966e-05,
116
+ "entropy": 0.9724053025245667,
117
+ "num_tokens": 7758907.0,
118
+ "mean_token_accuracy": 0.7709219127893447,
119
+ "epoch": 2.3529411764705883,
120
+ "step": 120
121
+ },
122
+ {
123
+ "loss": 0.9683,
124
+ "grad_norm": 0.20999926328659058,
125
+ "learning_rate": 2.7834692290132052e-05,
126
+ "entropy": 0.9730658024549484,
127
+ "num_tokens": 8405368.0,
128
+ "mean_token_accuracy": 0.769457995891571,
129
+ "epoch": 2.549019607843137,
130
+ "step": 130
131
+ },
132
+ {
133
+ "loss": 0.9385,
134
+ "grad_norm": 0.7688829898834229,
135
+ "learning_rate": 2.449871562031194e-05,
136
+ "entropy": 0.9452048629522324,
137
+ "num_tokens": 9053347.0,
138
+ "mean_token_accuracy": 0.7750110507011414,
139
+ "epoch": 2.7450980392156863,
140
+ "step": 140
141
+ },
142
+ {
143
+ "loss": 0.9832,
144
+ "grad_norm": 0.26651284098625183,
145
+ "learning_rate": 2.1171684382123e-05,
146
+ "entropy": 0.9904936224222183,
147
+ "num_tokens": 9703053.0,
148
+ "mean_token_accuracy": 0.7652157843112946,
149
+ "epoch": 2.9411764705882355,
150
+ "step": 150
151
+ },
152
+ {
153
+ "loss": 0.9586,
154
+ "grad_norm": 0.22833150625228882,
155
+ "learning_rate": 1.7912969526829558e-05,
156
+ "entropy": 0.9607909858226776,
157
+ "num_tokens": 10340639.0,
158
+ "mean_token_accuracy": 0.7710874438285827,
159
+ "epoch": 3.1372549019607843,
160
+ "step": 160
161
+ },
162
+ {
163
+ "loss": 0.9382,
164
+ "grad_norm": 0.2782645523548126,
165
+ "learning_rate": 1.4780722898224707e-05,
166
+ "entropy": 0.9437746345996857,
167
+ "num_tokens": 10987284.0,
168
+ "mean_token_accuracy": 0.7756666958332061,
169
+ "epoch": 3.3333333333333335,
170
+ "step": 170
171
+ },
172
+ {
173
+ "loss": 0.9441,
174
+ "grad_norm": 0.2571350038051605,
175
+ "learning_rate": 1.1830839511600211e-05,
176
+ "entropy": 0.9467583298683167,
177
+ "num_tokens": 11638213.0,
178
+ "mean_token_accuracy": 0.7747708618640899,
179
+ "epoch": 3.5294117647058822,
180
+ "step": 180
181
+ },
182
+ {
183
+ "loss": 0.9175,
184
+ "grad_norm": 0.26882949471473694,
185
+ "learning_rate": 9.11596010587441e-06,
186
+ "entropy": 0.923730057477951,
187
+ "num_tokens": 12288333.0,
188
+ "mean_token_accuracy": 0.7791785061359405,
189
+ "epoch": 3.7254901960784315,
190
+ "step": 190
191
+ },
192
+ {
193
+ "loss": 0.91,
194
+ "grad_norm": 0.22646215558052063,
195
+ "learning_rate": 6.684531768359173e-06,
196
+ "entropy": 0.9165982186794281,
197
+ "num_tokens": 12936318.0,
198
+ "mean_token_accuracy": 0.7812086254358291,
199
+ "epoch": 3.9215686274509802,
200
+ "step": 200
201
+ },
202
+ {
203
+ "loss": 0.9193,
204
+ "grad_norm": 0.2461111843585968,
205
+ "learning_rate": 4.579943395339062e-06,
206
+ "entropy": 0.9269091933965683,
207
+ "num_tokens": 13570476.0,
208
+ "mean_token_accuracy": 0.7785391122102737,
209
+ "epoch": 4.117647058823529,
210
+ "step": 210
211
+ },
212
+ {
213
+ "loss": 0.9259,
214
+ "grad_norm": 0.30823367834091187,
215
+ "learning_rate": 2.8397514161892486e-06,
216
+ "entropy": 0.9287486761808396,
217
+ "num_tokens": 14217790.0,
218
+ "mean_token_accuracy": 0.7776476472616196,
219
+ "epoch": 4.313725490196078,
220
+ "step": 220
221
+ },
222
+ {
223
+ "loss": 0.8904,
224
+ "grad_norm": 0.29259830713272095,
225
+ "learning_rate": 1.4950095980035772e-06,
226
+ "entropy": 0.8954251229763031,
227
+ "num_tokens": 14867755.0,
228
+ "mean_token_accuracy": 0.7863523244857789,
229
+ "epoch": 4.509803921568627,
230
+ "step": 230
231
+ },
232
+ {
233
+ "loss": 0.9222,
234
+ "grad_norm": 0.2476108819246292,
235
+ "learning_rate": 5.697148903850868e-07,
236
+ "entropy": 0.92759590446949,
237
+ "num_tokens": 15512923.0,
238
+ "mean_token_accuracy": 0.7787990540266037,
239
+ "epoch": 4.705882352941177,
240
+ "step": 240
241
+ },
242
+ {
243
+ "loss": 0.9369,
244
+ "grad_norm": 0.2094818353652954,
245
+ "learning_rate": 8.037919931187244e-08,
246
+ "entropy": 0.9443553179502487,
247
+ "num_tokens": 16165639.0,
248
+ "mean_token_accuracy": 0.7741728782653808,
249
+ "epoch": 4.901960784313726,
250
+ "step": 250
251
+ },
252
+ {
253
+ "train_runtime": 4077.9021,
254
+ "train_samples_per_second": 1.994,
255
+ "train_steps_per_second": 0.063,
256
+ "total_flos": 4.655305572168499e+18,
257
+ "train_loss": 1.0347469572927437,
258
+ "entropy": 0.9348823964595795,
259
+ "num_tokens": 16478955.0,
260
+ "mean_token_accuracy": 0.7756926536560058,
261
+ "epoch": 5.0,
262
+ "step": 255
263
+ }
264
+ ]