josephmayo commited on
Commit
23b2c11
·
verified ·
1 Parent(s): 396932b

Upload proof trainer_log_history.json

Browse files
Files changed (1) hide show
  1. trainer_log_history.json +291 -0
trainer_log_history.json ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 2.7602264404296877,
4
+ "grad_norm": 3.374150276184082,
5
+ "learning_rate": 6.666666666666667e-05,
6
+ "epoch": 0.078125,
7
+ "step": 5
8
+ },
9
+ {
10
+ "loss": 1.8698038101196288,
11
+ "grad_norm": 1.457783579826355,
12
+ "learning_rate": 9.994100796397954e-05,
13
+ "epoch": 0.15625,
14
+ "step": 10
15
+ },
16
+ {
17
+ "loss": 1.319937038421631,
18
+ "grad_norm": 0.707484245300293,
19
+ "learning_rate": 9.958100506132127e-05,
20
+ "epoch": 0.234375,
21
+ "step": 15
22
+ },
23
+ {
24
+ "loss": 1.3707428932189942,
25
+ "grad_norm": 0.43754199147224426,
26
+ "learning_rate": 9.889612861977853e-05,
27
+ "epoch": 0.3125,
28
+ "step": 20
29
+ },
30
+ {
31
+ "loss": 1.3082144737243653,
32
+ "grad_norm": 0.35036444664001465,
33
+ "learning_rate": 9.789086620939936e-05,
34
+ "epoch": 0.390625,
35
+ "step": 25
36
+ },
37
+ {
38
+ "loss": 1.283499526977539,
39
+ "grad_norm": 0.26873722672462463,
40
+ "learning_rate": 9.657180469054213e-05,
41
+ "epoch": 0.46875,
42
+ "step": 30
43
+ },
44
+ {
45
+ "loss": 1.1428126335144042,
46
+ "grad_norm": 0.31287550926208496,
47
+ "learning_rate": 9.494758705426978e-05,
48
+ "epoch": 0.546875,
49
+ "step": 35
50
+ },
51
+ {
52
+ "loss": 1.1817453384399415,
53
+ "grad_norm": 0.2988075911998749,
54
+ "learning_rate": 9.302885579019627e-05,
55
+ "epoch": 0.625,
56
+ "step": 40
57
+ },
58
+ {
59
+ "loss": 1.1872751235961914,
60
+ "grad_norm": 0.2937701642513275,
61
+ "learning_rate": 9.082818315286055e-05,
62
+ "epoch": 0.703125,
63
+ "step": 45
64
+ },
65
+ {
66
+ "loss": 1.146270751953125,
67
+ "grad_norm": 0.3410361111164093,
68
+ "learning_rate": 8.835998878354931e-05,
69
+ "epoch": 0.78125,
70
+ "step": 50
71
+ },
72
+ {
73
+ "loss": 1.167984962463379,
74
+ "grad_norm": 0.3269799053668976,
75
+ "learning_rate": 8.564044522734147e-05,
76
+ "epoch": 0.859375,
77
+ "step": 55
78
+ },
79
+ {
80
+ "loss": 1.0844226837158204,
81
+ "grad_norm": 0.44009652733802795,
82
+ "learning_rate": 8.268737196446264e-05,
83
+ "epoch": 0.9375,
84
+ "step": 60
85
+ },
86
+ {
87
+ "loss": 1.1493185043334961,
88
+ "grad_norm": 0.383281409740448,
89
+ "learning_rate": 7.952011865029614e-05,
90
+ "epoch": 1.015625,
91
+ "step": 65
92
+ },
93
+ {
94
+ "loss": 1.1071245193481445,
95
+ "grad_norm": 0.40226107835769653,
96
+ "learning_rate": 7.61594383291065e-05,
97
+ "epoch": 1.09375,
98
+ "step": 70
99
+ },
100
+ {
101
+ "loss": 1.062960433959961,
102
+ "grad_norm": 0.3419550359249115,
103
+ "learning_rate": 7.262735145222696e-05,
104
+ "epoch": 1.171875,
105
+ "step": 75
106
+ },
107
+ {
108
+ "loss": 1.1302300453186036,
109
+ "grad_norm": 0.39782464504241943,
110
+ "learning_rate": 6.894700159171534e-05,
111
+ "epoch": 1.25,
112
+ "step": 80
113
+ },
114
+ {
115
+ "loss": 1.0111728668212892,
116
+ "grad_norm": 0.3370531499385834,
117
+ "learning_rate": 6.514250379489753e-05,
118
+ "epoch": 1.328125,
119
+ "step": 85
120
+ },
121
+ {
122
+ "loss": 1.0343121528625487,
123
+ "grad_norm": 0.35490792989730835,
124
+ "learning_rate": 6.123878657343648e-05,
125
+ "epoch": 1.40625,
126
+ "step": 90
127
+ },
128
+ {
129
+ "loss": 1.161344337463379,
130
+ "grad_norm": 0.4397459328174591,
131
+ "learning_rate": 5.726142856227452e-05,
132
+ "epoch": 1.484375,
133
+ "step": 95
134
+ },
135
+ {
136
+ "loss": 1.1250411987304687,
137
+ "grad_norm": 0.375924289226532,
138
+ "learning_rate": 5.3236490918721794e-05,
139
+ "epoch": 1.5625,
140
+ "step": 100
141
+ },
142
+ {
143
+ "loss": 1.0718464851379395,
144
+ "grad_norm": 0.41714778542518616,
145
+ "learning_rate": 4.919034655987493e-05,
146
+ "epoch": 1.640625,
147
+ "step": 105
148
+ },
149
+ {
150
+ "loss": 1.0154043197631837,
151
+ "grad_norm": 0.37521687150001526,
152
+ "learning_rate": 4.51495073572676e-05,
153
+ "epoch": 1.71875,
154
+ "step": 110
155
+ },
156
+ {
157
+ "loss": 0.9917967796325684,
158
+ "grad_norm": 0.42887604236602783,
159
+ "learning_rate": 4.114045042103887e-05,
160
+ "epoch": 1.796875,
161
+ "step": 115
162
+ },
163
+ {
164
+ "loss": 1.1146905899047852,
165
+ "grad_norm": 0.4208148717880249,
166
+ "learning_rate": 3.718944461187138e-05,
167
+ "epoch": 1.875,
168
+ "step": 120
169
+ },
170
+ {
171
+ "loss": 0.9283761978149414,
172
+ "grad_norm": 0.3849687874317169,
173
+ "learning_rate": 3.332237841745898e-05,
174
+ "epoch": 1.953125,
175
+ "step": 125
176
+ },
177
+ {
178
+ "loss": 1.113053035736084,
179
+ "grad_norm": 0.4142734110355377,
180
+ "learning_rate": 2.9564590321322207e-05,
181
+ "epoch": 2.03125,
182
+ "step": 130
183
+ },
184
+ {
185
+ "loss": 0.9842248916625976,
186
+ "grad_norm": 0.44529953598976135,
187
+ "learning_rate": 2.5940702775459747e-05,
188
+ "epoch": 2.109375,
189
+ "step": 135
190
+ },
191
+ {
192
+ "loss": 0.9449721336364746,
193
+ "grad_norm": 0.3756776750087738,
194
+ "learning_rate": 2.2474460864709824e-05,
195
+ "epoch": 2.1875,
196
+ "step": 140
197
+ },
198
+ {
199
+ "loss": 1.0590093612670899,
200
+ "grad_norm": 0.4192875325679779,
201
+ "learning_rate": 1.9188576719953633e-05,
202
+ "epoch": 2.265625,
203
+ "step": 145
204
+ },
205
+ {
206
+ "loss": 0.9768091201782226,
207
+ "grad_norm": 0.5095818638801575,
208
+ "learning_rate": 1.6104580699624837e-05,
209
+ "epoch": 2.34375,
210
+ "step": 150
211
+ },
212
+ {
213
+ "loss": 1.038302516937256,
214
+ "grad_norm": 0.41709497570991516,
215
+ "learning_rate": 1.3242680314639993e-05,
216
+ "epoch": 2.421875,
217
+ "step": 155
218
+ },
219
+ {
220
+ "loss": 0.9975608825683594,
221
+ "grad_norm": 0.5563586354255676,
222
+ "learning_rate": 1.0621627821127289e-05,
223
+ "epoch": 2.5,
224
+ "step": 160
225
+ },
226
+ {
227
+ "loss": 0.9714397430419922,
228
+ "grad_norm": 0.8915637135505676,
229
+ "learning_rate": 8.25859734853645e-06,
230
+ "epoch": 2.578125,
231
+ "step": 165
232
+ },
233
+ {
234
+ "loss": 0.9948483467102051,
235
+ "grad_norm": 0.4391196370124817,
236
+ "learning_rate": 6.16907236823262e-06,
237
+ "epoch": 2.65625,
238
+ "step": 170
239
+ },
240
+ {
241
+ "loss": 0.9389057159423828,
242
+ "grad_norm": 0.4650712311267853,
243
+ "learning_rate": 4.366744239922998e-06,
244
+ "epoch": 2.734375,
245
+ "step": 175
246
+ },
247
+ {
248
+ "loss": 1.06390380859375,
249
+ "grad_norm": 0.4836062788963318,
250
+ "learning_rate": 2.8634225006782865e-06,
251
+ "epoch": 2.8125,
252
+ "step": 180
253
+ },
254
+ {
255
+ "loss": 1.008359718322754,
256
+ "grad_norm": 0.45215511322021484,
257
+ "learning_rate": 1.6689574843694433e-06,
258
+ "epoch": 2.890625,
259
+ "step": 185
260
+ },
261
+ {
262
+ "loss": 1.0110493659973145,
263
+ "grad_norm": 0.5408219695091248,
264
+ "learning_rate": 7.911757785462881e-07,
265
+ "epoch": 2.96875,
266
+ "step": 190
267
+ },
268
+ {
269
+ "loss": 0.911649227142334,
270
+ "grad_norm": 0.4599083364009857,
271
+ "learning_rate": 2.3582894166930268e-07,
272
+ "epoch": 3.046875,
273
+ "step": 195
274
+ },
275
+ {
276
+ "loss": 0.9673548698425293,
277
+ "grad_norm": 0.43304941058158875,
278
+ "learning_rate": 6.5558167183898955e-09,
279
+ "epoch": 3.125,
280
+ "step": 200
281
+ },
282
+ {
283
+ "train_runtime": 4256.6409,
284
+ "train_samples_per_second": 0.752,
285
+ "train_steps_per_second": 0.047,
286
+ "total_flos": 4.259313762009523e+16,
287
+ "train_loss": 1.142699921131134,
288
+ "epoch": 3.125,
289
+ "step": 200
290
+ }
291
+ ]