Marta102 commited on
Commit
f56e36c
·
verified ·
1 Parent(s): 4c0635d

checkpoint 9690 contra

Browse files
Files changed (5) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +3 -0
  3. rng_state.pth +3 -0
  4. scheduler.pt +3 -0
  5. trainer_state.json +1010 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24ebf42534f62a2ab566e1be10934d76767145d097b329777b529c10243d1aa7
3
  size 1192135096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:728e0719bfc2b88b533e21bdb4f2454d9fd976656cdd246883e6a01c7411a720
3
  size 1192135096
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45ae3f39d88743b1662eb6134b02ad750d8cfaf6d404377e8efc8e59b6194d0
3
+ size 2384460363
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91e62442493ae4f2963ab198bb4e1be4db65ab5cdd6b158248d98219357cb2b0
3
+ size 14645
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af0f0a49e234227ff12101e0294b33302da01a67e316f5465c9475b2201d4ef
3
+ size 1465
trainer_state.json ADDED
@@ -0,0 +1,1010 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 500,
3
+ "best_metric": 279.8433532714844,
4
+ "best_model_checkpoint": "output-dpo-contrastive-new/checkpoint-500",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 9690,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.03096634345545685,
14
+ "grad_norm": 1144.0,
15
+ "learning_rate": 9.997424714962634e-06,
16
+ "loss": 478.5263,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.0619326869109137,
21
+ "grad_norm": 249.0,
22
+ "learning_rate": 9.989597261051275e-06,
23
+ "loss": 5.8995,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.09289903036637055,
28
+ "grad_norm": 1416.0,
29
+ "learning_rate": 9.976525602356642e-06,
30
+ "loss": 1.974,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.1238653738218274,
35
+ "grad_norm": 124.0,
36
+ "learning_rate": 9.958223477553715e-06,
37
+ "loss": 1.2967,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.15483171727728426,
42
+ "grad_norm": 185.0,
43
+ "learning_rate": 9.93471012268208e-06,
44
+ "loss": 0.9707,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.15483171727728426,
49
+ "eval_logits/chosen": NaN,
50
+ "eval_logits/rejected": NaN,
51
+ "eval_logps/chosen": -7700.236328125,
52
+ "eval_logps/rejected": -5658.39892578125,
53
+ "eval_loss": 279.8433532714844,
54
+ "eval_rewards/accuracies": 0.3200867176055908,
55
+ "eval_rewards/chosen": -724.652587890625,
56
+ "eval_rewards/margins": -192.2952880859375,
57
+ "eval_rewards/rejected": -532.3572998046875,
58
+ "eval_runtime": 1360.3414,
59
+ "eval_samples_per_second": 14.243,
60
+ "eval_steps_per_second": 14.243,
61
+ "step": 500
62
+ },
63
+ {
64
+ "epoch": 0.1857980607327411,
65
+ "grad_norm": 136.0,
66
+ "learning_rate": 9.906010250928317e-06,
67
+ "loss": 0.8311,
68
+ "step": 600
69
+ },
70
+ {
71
+ "epoch": 0.21676440418819795,
72
+ "grad_norm": 66.5,
73
+ "learning_rate": 9.872154026651767e-06,
74
+ "loss": 0.6984,
75
+ "step": 700
76
+ },
77
+ {
78
+ "epoch": 0.2477307476436548,
79
+ "grad_norm": 768.0,
80
+ "learning_rate": 9.833177033680945e-06,
81
+ "loss": 0.7479,
82
+ "step": 800
83
+ },
84
+ {
85
+ "epoch": 0.27869709109911167,
86
+ "grad_norm": 73.5,
87
+ "learning_rate": 9.789120237913954e-06,
88
+ "loss": 0.5771,
89
+ "step": 900
90
+ },
91
+ {
92
+ "epoch": 0.3096634345545685,
93
+ "grad_norm": 71.5,
94
+ "learning_rate": 9.740029944262193e-06,
95
+ "loss": 0.5536,
96
+ "step": 1000
97
+ },
98
+ {
99
+ "epoch": 0.3096634345545685,
100
+ "eval_logits/chosen": NaN,
101
+ "eval_logits/rejected": NaN,
102
+ "eval_logps/chosen": -8134.36328125,
103
+ "eval_logps/rejected": -5972.11669921875,
104
+ "eval_loss": 297.4208679199219,
105
+ "eval_rewards/accuracies": 0.31869322061538696,
106
+ "eval_rewards/chosen": -768.0653686523438,
107
+ "eval_rewards/margins": -204.33627319335938,
108
+ "eval_rewards/rejected": -563.7291259765625,
109
+ "eval_runtime": 1355.184,
110
+ "eval_samples_per_second": 14.298,
111
+ "eval_steps_per_second": 14.298,
112
+ "step": 1000
113
+ },
114
+ {
115
+ "epoch": 0.34062977801002536,
116
+ "grad_norm": 53.5,
117
+ "learning_rate": 9.685957747982618e-06,
118
+ "loss": 0.5381,
119
+ "step": 1100
120
+ },
121
+ {
122
+ "epoch": 0.3715961214654822,
123
+ "grad_norm": 84.5,
124
+ "learning_rate": 9.626960480449692e-06,
125
+ "loss": 0.5969,
126
+ "step": 1200
127
+ },
128
+ {
129
+ "epoch": 0.40256246492093906,
130
+ "grad_norm": 70.5,
131
+ "learning_rate": 9.563100149424045e-06,
132
+ "loss": 0.4972,
133
+ "step": 1300
134
+ },
135
+ {
136
+ "epoch": 0.4335288083763959,
137
+ "grad_norm": 85.5,
138
+ "learning_rate": 9.4944438738806e-06,
139
+ "loss": 0.4799,
140
+ "step": 1400
141
+ },
142
+ {
143
+ "epoch": 0.46449515183185275,
144
+ "grad_norm": 79.0,
145
+ "learning_rate": 9.421063813464661e-06,
146
+ "loss": 0.4689,
147
+ "step": 1500
148
+ },
149
+ {
150
+ "epoch": 0.46449515183185275,
151
+ "eval_logits/chosen": NaN,
152
+ "eval_logits/rejected": NaN,
153
+ "eval_logps/chosen": -8173.689453125,
154
+ "eval_logps/rejected": -5999.046875,
155
+ "eval_loss": 298.9976806640625,
156
+ "eval_rewards/accuracies": 0.3185900151729584,
157
+ "eval_rewards/chosen": -771.9979248046875,
158
+ "eval_rewards/margins": -205.575927734375,
159
+ "eval_rewards/rejected": -566.4219360351562,
160
+ "eval_runtime": 1358.7276,
161
+ "eval_samples_per_second": 14.26,
162
+ "eval_steps_per_second": 14.26,
163
+ "step": 1500
164
+ },
165
+ {
166
+ "epoch": 0.4954614952873096,
167
+ "grad_norm": 64.5,
168
+ "learning_rate": 9.343037092650156e-06,
169
+ "loss": 0.4951,
170
+ "step": 1600
171
+ },
172
+ {
173
+ "epoch": 0.5264278387427664,
174
+ "grad_norm": 55.25,
175
+ "learning_rate": 9.260445719679651e-06,
176
+ "loss": 0.4569,
177
+ "step": 1700
178
+ },
179
+ {
180
+ "epoch": 0.5573941821982233,
181
+ "grad_norm": 71.0,
182
+ "learning_rate": 9.173376500371441e-06,
183
+ "loss": 0.4647,
184
+ "step": 1800
185
+ },
186
+ {
187
+ "epoch": 0.5883605256536801,
188
+ "grad_norm": 76.0,
189
+ "learning_rate": 9.081920946884217e-06,
190
+ "loss": 0.4954,
191
+ "step": 1900
192
+ },
193
+ {
194
+ "epoch": 0.619326869109137,
195
+ "grad_norm": 48.75,
196
+ "learning_rate": 8.986175181535266e-06,
197
+ "loss": 0.4217,
198
+ "step": 2000
199
+ },
200
+ {
201
+ "epoch": 0.619326869109137,
202
+ "eval_logits/chosen": NaN,
203
+ "eval_logits/rejected": NaN,
204
+ "eval_logps/chosen": -8315.658203125,
205
+ "eval_logps/rejected": -6106.26806640625,
206
+ "eval_loss": 304.197021484375,
207
+ "eval_rewards/accuracies": 0.3195706009864807,
208
+ "eval_rewards/chosen": -786.1946411132812,
209
+ "eval_rewards/margins": -209.05052185058594,
210
+ "eval_rewards/rejected": -577.1441650390625,
211
+ "eval_runtime": 1355.5933,
212
+ "eval_samples_per_second": 14.293,
213
+ "eval_steps_per_second": 14.293,
214
+ "step": 2000
215
+ },
216
+ {
217
+ "epoch": 0.6502932125645938,
218
+ "grad_norm": 43.75,
219
+ "learning_rate": 8.886239835773252e-06,
220
+ "loss": 0.4427,
221
+ "step": 2100
222
+ },
223
+ {
224
+ "epoch": 0.6812595560200507,
225
+ "grad_norm": 47.25,
226
+ "learning_rate": 8.782219944411774e-06,
227
+ "loss": 0.4756,
228
+ "step": 2200
229
+ },
230
+ {
231
+ "epoch": 0.7122258994755075,
232
+ "grad_norm": 64.5,
233
+ "learning_rate": 8.674224835234879e-06,
234
+ "loss": 0.4246,
235
+ "step": 2300
236
+ },
237
+ {
238
+ "epoch": 0.7431922429309644,
239
+ "grad_norm": 71.5,
240
+ "learning_rate": 8.562368014090532e-06,
241
+ "loss": 0.4434,
242
+ "step": 2400
243
+ },
244
+ {
245
+ "epoch": 0.7741585863864212,
246
+ "grad_norm": 77.0,
247
+ "learning_rate": 8.446767045592829e-06,
248
+ "loss": 0.5978,
249
+ "step": 2500
250
+ },
251
+ {
252
+ "epoch": 0.7741585863864212,
253
+ "eval_logits/chosen": NaN,
254
+ "eval_logits/rejected": NaN,
255
+ "eval_logps/chosen": -8511.75,
256
+ "eval_logps/rejected": -6250.36181640625,
257
+ "eval_loss": 311.8685607910156,
258
+ "eval_rewards/accuracies": 0.3185900151729584,
259
+ "eval_rewards/chosen": -805.803955078125,
260
+ "eval_rewards/margins": -214.25033569335938,
261
+ "eval_rewards/rejected": -591.5535888671875,
262
+ "eval_runtime": 1359.1867,
263
+ "eval_samples_per_second": 14.256,
264
+ "eval_steps_per_second": 14.256,
265
+ "step": 2500
266
+ },
267
+ {
268
+ "epoch": 0.8051249298418781,
269
+ "grad_norm": 49.75,
270
+ "learning_rate": 8.327543429558335e-06,
271
+ "loss": 0.4466,
272
+ "step": 2600
273
+ },
274
+ {
275
+ "epoch": 0.836091273297335,
276
+ "grad_norm": 52.25,
277
+ "learning_rate": 8.20482247330641e-06,
278
+ "loss": 0.4372,
279
+ "step": 2700
280
+ },
281
+ {
282
+ "epoch": 0.8670576167527918,
283
+ "grad_norm": 62.0,
284
+ "learning_rate": 8.07873315995776e-06,
285
+ "loss": 0.4154,
286
+ "step": 2800
287
+ },
288
+ {
289
+ "epoch": 0.8980239602082487,
290
+ "grad_norm": 69.0,
291
+ "learning_rate": 7.9494080128696e-06,
292
+ "loss": 0.4073,
293
+ "step": 2900
294
+ },
295
+ {
296
+ "epoch": 0.9289903036637055,
297
+ "grad_norm": 46.75,
298
+ "learning_rate": 7.816982956349941e-06,
299
+ "loss": 0.399,
300
+ "step": 3000
301
+ },
302
+ {
303
+ "epoch": 0.9289903036637055,
304
+ "eval_logits/chosen": NaN,
305
+ "eval_logits/rejected": NaN,
306
+ "eval_logps/chosen": -8489.6416015625,
307
+ "eval_logps/rejected": -6233.3359375,
308
+ "eval_loss": 311.04168701171875,
309
+ "eval_rewards/accuracies": 0.31926095485687256,
310
+ "eval_rewards/chosen": -803.5930786132812,
311
+ "eval_rewards/margins": -213.7421875,
312
+ "eval_rewards/rejected": -589.8509521484375,
313
+ "eval_runtime": 1346.8135,
314
+ "eval_samples_per_second": 14.387,
315
+ "eval_steps_per_second": 14.387,
316
+ "step": 3000
317
+ },
318
+ {
319
+ "epoch": 0.9599566471191624,
320
+ "grad_norm": 40.75,
321
+ "learning_rate": 7.681597172797377e-06,
322
+ "loss": 0.4016,
323
+ "step": 3100
324
+ },
325
+ {
326
+ "epoch": 0.9909229905746192,
327
+ "grad_norm": 61.5,
328
+ "learning_rate": 7.543392956416542e-06,
329
+ "loss": 0.4017,
330
+ "step": 3200
331
+ },
332
+ {
333
+ "epoch": 1.0216764404188199,
334
+ "grad_norm": 49.75,
335
+ "learning_rate": 7.4025155636629546e-06,
336
+ "loss": 0.3778,
337
+ "step": 3300
338
+ },
339
+ {
340
+ "epoch": 1.0526427838742767,
341
+ "grad_norm": 45.0,
342
+ "learning_rate": 7.25911306057447e-06,
343
+ "loss": 0.3591,
344
+ "step": 3400
345
+ },
346
+ {
347
+ "epoch": 1.0836091273297335,
348
+ "grad_norm": 61.5,
349
+ "learning_rate": 7.113336167149775e-06,
350
+ "loss": 0.3661,
351
+ "step": 3500
352
+ },
353
+ {
354
+ "epoch": 1.0836091273297335,
355
+ "eval_logits/chosen": NaN,
356
+ "eval_logits/rejected": NaN,
357
+ "eval_logps/chosen": -8532.7568359375,
358
+ "eval_logps/rejected": -6264.232421875,
359
+ "eval_loss": 312.83892822265625,
360
+ "eval_rewards/accuracies": 0.31828033924102783,
361
+ "eval_rewards/chosen": -807.90478515625,
362
+ "eval_rewards/margins": -214.964111328125,
363
+ "eval_rewards/rejected": -592.940673828125,
364
+ "eval_runtime": 1355.1706,
365
+ "eval_samples_per_second": 14.298,
366
+ "eval_steps_per_second": 14.298,
367
+ "step": 3500
368
+ },
369
+ {
370
+ "epoch": 1.1145754707851903,
371
+ "grad_norm": 41.25,
372
+ "learning_rate": 6.965338098937496e-06,
373
+ "loss": 0.3844,
374
+ "step": 3600
375
+ },
376
+ {
377
+ "epoch": 1.1455418142406473,
378
+ "grad_norm": 62.5,
379
+ "learning_rate": 6.815274406002428e-06,
380
+ "loss": 0.3653,
381
+ "step": 3700
382
+ },
383
+ {
384
+ "epoch": 1.176508157696104,
385
+ "grad_norm": 95.0,
386
+ "learning_rate": 6.663302809438097e-06,
387
+ "loss": 0.3915,
388
+ "step": 3800
389
+ },
390
+ {
391
+ "epoch": 1.2074745011515609,
392
+ "grad_norm": 50.25,
393
+ "learning_rate": 6.509583035597538e-06,
394
+ "loss": 0.3743,
395
+ "step": 3900
396
+ },
397
+ {
398
+ "epoch": 1.2384408446070179,
399
+ "grad_norm": 57.75,
400
+ "learning_rate": 6.35427664821648e-06,
401
+ "loss": 0.3638,
402
+ "step": 4000
403
+ },
404
+ {
405
+ "epoch": 1.2384408446070179,
406
+ "eval_logits/chosen": NaN,
407
+ "eval_logits/rejected": NaN,
408
+ "eval_logps/chosen": -8631.439453125,
409
+ "eval_logps/rejected": -6336.66796875,
410
+ "eval_loss": 316.6842346191406,
411
+ "eval_rewards/accuracies": 0.3185900151729584,
412
+ "eval_rewards/chosen": -817.7728271484375,
413
+ "eval_rewards/margins": -217.5886993408203,
414
+ "eval_rewards/rejected": -600.1841430664062,
415
+ "eval_runtime": 1356.2341,
416
+ "eval_samples_per_second": 14.287,
417
+ "eval_steps_per_second": 14.287,
418
+ "step": 4000
419
+ },
420
+ {
421
+ "epoch": 1.2694071880624747,
422
+ "grad_norm": 68.0,
423
+ "learning_rate": 6.197546878605376e-06,
424
+ "loss": 0.4077,
425
+ "step": 4100
426
+ },
427
+ {
428
+ "epoch": 1.3003735315179314,
429
+ "grad_norm": 44.0,
430
+ "learning_rate": 6.039558454088796e-06,
431
+ "loss": 0.3837,
432
+ "step": 4200
433
+ },
434
+ {
435
+ "epoch": 1.3313398749733882,
436
+ "grad_norm": 47.75,
437
+ "learning_rate": 5.88047742487244e-06,
438
+ "loss": 0.3916,
439
+ "step": 4300
440
+ },
441
+ {
442
+ "epoch": 1.362306218428845,
443
+ "grad_norm": 70.5,
444
+ "learning_rate": 5.720470989519773e-06,
445
+ "loss": 0.3604,
446
+ "step": 4400
447
+ },
448
+ {
449
+ "epoch": 1.393272561884302,
450
+ "grad_norm": 42.5,
451
+ "learning_rate": 5.559707319221725e-06,
452
+ "loss": 0.3803,
453
+ "step": 4500
454
+ },
455
+ {
456
+ "epoch": 1.393272561884302,
457
+ "eval_logits/chosen": NaN,
458
+ "eval_logits/rejected": NaN,
459
+ "eval_logps/chosen": -8557.1044921875,
460
+ "eval_logps/rejected": -6282.44775390625,
461
+ "eval_loss": 313.7301940917969,
462
+ "eval_rewards/accuracies": 0.3190028965473175,
463
+ "eval_rewards/chosen": -810.3394165039062,
464
+ "eval_rewards/margins": -215.5771942138672,
465
+ "eval_rewards/rejected": -594.7621459960938,
466
+ "eval_runtime": 1359.1293,
467
+ "eval_samples_per_second": 14.256,
468
+ "eval_steps_per_second": 14.256,
469
+ "step": 4500
470
+ },
471
+ {
472
+ "epoch": 1.4242389053397588,
473
+ "grad_norm": 37.25,
474
+ "learning_rate": 5.3983553810441004e-06,
475
+ "loss": 0.3853,
476
+ "step": 4600
477
+ },
478
+ {
479
+ "epoch": 1.4552052487952156,
480
+ "grad_norm": 49.0,
481
+ "learning_rate": 5.236584760338523e-06,
482
+ "loss": 0.3786,
483
+ "step": 4700
484
+ },
485
+ {
486
+ "epoch": 1.4861715922506726,
487
+ "grad_norm": 38.75,
488
+ "learning_rate": 5.074565482503543e-06,
489
+ "loss": 0.3855,
490
+ "step": 4800
491
+ },
492
+ {
493
+ "epoch": 1.5171379357061294,
494
+ "grad_norm": 36.5,
495
+ "learning_rate": 4.912467834283229e-06,
496
+ "loss": 0.3645,
497
+ "step": 4900
498
+ },
499
+ {
500
+ "epoch": 1.5481042791615862,
501
+ "grad_norm": 49.25,
502
+ "learning_rate": 4.750462184791093e-06,
503
+ "loss": 0.3757,
504
+ "step": 5000
505
+ },
506
+ {
507
+ "epoch": 1.5481042791615862,
508
+ "eval_logits/chosen": NaN,
509
+ "eval_logits/rejected": NaN,
510
+ "eval_logps/chosen": -8567.33203125,
511
+ "eval_logps/rejected": -6289.53173828125,
512
+ "eval_loss": 314.1842346191406,
513
+ "eval_rewards/accuracies": 0.3201383054256439,
514
+ "eval_rewards/chosen": -811.3621215820312,
515
+ "eval_rewards/margins": -215.89157104492188,
516
+ "eval_rewards/rejected": -595.470458984375,
517
+ "eval_runtime": 1365.2519,
518
+ "eval_samples_per_second": 14.192,
519
+ "eval_steps_per_second": 14.192,
520
+ "step": 5000
521
+ },
522
+ {
523
+ "epoch": 1.579070622617043,
524
+ "grad_norm": 82.5,
525
+ "learning_rate": 4.588718806447441e-06,
526
+ "loss": 0.3887,
527
+ "step": 5100
528
+ },
529
+ {
530
+ "epoch": 1.6100369660724998,
531
+ "grad_norm": 46.0,
532
+ "learning_rate": 4.427407696018343e-06,
533
+ "loss": 0.3759,
534
+ "step": 5200
535
+ },
536
+ {
537
+ "epoch": 1.6410033095279568,
538
+ "grad_norm": 39.25,
539
+ "learning_rate": 4.266698395944332e-06,
540
+ "loss": 0.3836,
541
+ "step": 5300
542
+ },
543
+ {
544
+ "epoch": 1.6719696529834136,
545
+ "grad_norm": 42.5,
546
+ "learning_rate": 4.1067598161466e-06,
547
+ "loss": 0.3727,
548
+ "step": 5400
549
+ },
550
+ {
551
+ "epoch": 1.7029359964388706,
552
+ "grad_norm": 64.5,
553
+ "learning_rate": 3.947760056498012e-06,
554
+ "loss": 0.3917,
555
+ "step": 5500
556
+ },
557
+ {
558
+ "epoch": 1.7029359964388706,
559
+ "eval_logits/chosen": NaN,
560
+ "eval_logits/rejected": NaN,
561
+ "eval_logps/chosen": -8585.994140625,
562
+ "eval_logps/rejected": -6303.09765625,
563
+ "eval_loss": 314.93072509765625,
564
+ "eval_rewards/accuracies": 0.31920933723449707,
565
+ "eval_rewards/chosen": -813.2284545898438,
566
+ "eval_rewards/margins": -216.4012451171875,
567
+ "eval_rewards/rejected": -596.8271484375,
568
+ "eval_runtime": 1366.4753,
569
+ "eval_samples_per_second": 14.18,
570
+ "eval_steps_per_second": 14.18,
571
+ "step": 5500
572
+ },
573
+ {
574
+ "epoch": 1.7339023398943274,
575
+ "grad_norm": 52.5,
576
+ "learning_rate": 3.7898662301454724e-06,
577
+ "loss": 0.3941,
578
+ "step": 5600
579
+ },
580
+ {
581
+ "epoch": 1.7648686833497842,
582
+ "grad_norm": 42.75,
583
+ "learning_rate": 3.6332442878693896e-06,
584
+ "loss": 0.3701,
585
+ "step": 5700
586
+ },
587
+ {
588
+ "epoch": 1.795835026805241,
589
+ "grad_norm": 42.75,
590
+ "learning_rate": 3.4780588436648223e-06,
591
+ "loss": 0.3707,
592
+ "step": 5800
593
+ },
594
+ {
595
+ "epoch": 1.8268013702606978,
596
+ "grad_norm": 52.75,
597
+ "learning_rate": 3.3244730017275974e-06,
598
+ "loss": 0.3729,
599
+ "step": 5900
600
+ },
601
+ {
602
+ "epoch": 1.8577677137161548,
603
+ "grad_norm": 49.75,
604
+ "learning_rate": 3.172648185027306e-06,
605
+ "loss": 0.3716,
606
+ "step": 6000
607
+ },
608
+ {
609
+ "epoch": 1.8577677137161548,
610
+ "eval_logits/chosen": NaN,
611
+ "eval_logits/rejected": NaN,
612
+ "eval_logps/chosen": -8575.375,
613
+ "eval_logps/rejected": -6295.24853515625,
614
+ "eval_loss": 314.5245361328125,
615
+ "eval_rewards/accuracies": 0.3183319568634033,
616
+ "eval_rewards/chosen": -812.16650390625,
617
+ "eval_rewards/margins": -216.12428283691406,
618
+ "eval_rewards/rejected": -596.042236328125,
619
+ "eval_runtime": 1368.3845,
620
+ "eval_samples_per_second": 14.16,
621
+ "eval_steps_per_second": 14.16,
622
+ "step": 6000
623
+ },
624
+ {
625
+ "epoch": 1.8887340571716116,
626
+ "grad_norm": 54.25,
627
+ "learning_rate": 3.0227439656472878e-06,
628
+ "loss": 0.3626,
629
+ "step": 6100
630
+ },
631
+ {
632
+ "epoch": 1.9197004006270686,
633
+ "grad_norm": 38.5,
634
+ "learning_rate": 2.87491789706995e-06,
635
+ "loss": 0.393,
636
+ "step": 6200
637
+ },
638
+ {
639
+ "epoch": 1.9506667440825254,
640
+ "grad_norm": 40.5,
641
+ "learning_rate": 2.729325348583711e-06,
642
+ "loss": 0.3619,
643
+ "step": 6300
644
+ },
645
+ {
646
+ "epoch": 1.9816330875379822,
647
+ "grad_norm": 80.0,
648
+ "learning_rate": 2.5861193419855634e-06,
649
+ "loss": 0.4156,
650
+ "step": 6400
651
+ },
652
+ {
653
+ "epoch": 2.0123865373821825,
654
+ "grad_norm": 82.5,
655
+ "learning_rate": 2.4454503907509493e-06,
656
+ "loss": 0.3607,
657
+ "step": 6500
658
+ },
659
+ {
660
+ "epoch": 2.0123865373821825,
661
+ "eval_logits/chosen": NaN,
662
+ "eval_logits/rejected": NaN,
663
+ "eval_logps/chosen": -8613.890625,
664
+ "eval_logps/rejected": -6323.0517578125,
665
+ "eval_loss": 316.09857177734375,
666
+ "eval_rewards/accuracies": 0.3194673955440521,
667
+ "eval_rewards/chosen": -816.01806640625,
668
+ "eval_rewards/margins": -217.19549560546875,
669
+ "eval_rewards/rejected": -598.8226318359375,
670
+ "eval_runtime": 1371.8671,
671
+ "eval_samples_per_second": 14.124,
672
+ "eval_steps_per_second": 14.124,
673
+ "step": 6500
674
+ },
675
+ {
676
+ "epoch": 2.0433528808376398,
677
+ "grad_norm": 43.25,
678
+ "learning_rate": 2.307466341839918e-06,
679
+ "loss": 0.3691,
680
+ "step": 6600
681
+ },
682
+ {
683
+ "epoch": 2.0743192242930966,
684
+ "grad_norm": 51.0,
685
+ "learning_rate": 2.1723122203058867e-06,
686
+ "loss": 0.3404,
687
+ "step": 6700
688
+ },
689
+ {
690
+ "epoch": 2.1052855677485534,
691
+ "grad_norm": 28.5,
692
+ "learning_rate": 2.040130076870296e-06,
693
+ "loss": 0.3692,
694
+ "step": 6800
695
+ },
696
+ {
697
+ "epoch": 2.13625191120401,
698
+ "grad_norm": 66.5,
699
+ "learning_rate": 1.9110588386233686e-06,
700
+ "loss": 0.3788,
701
+ "step": 6900
702
+ },
703
+ {
704
+ "epoch": 2.167218254659467,
705
+ "grad_norm": 83.0,
706
+ "learning_rate": 1.785234163007899e-06,
707
+ "loss": 0.3582,
708
+ "step": 7000
709
+ },
710
+ {
711
+ "epoch": 2.167218254659467,
712
+ "eval_logits/chosen": NaN,
713
+ "eval_logits/rejected": NaN,
714
+ "eval_logps/chosen": -8605.3466796875,
715
+ "eval_logps/rejected": -6316.7080078125,
716
+ "eval_loss": 315.78204345703125,
717
+ "eval_rewards/accuracies": 0.3188480734825134,
718
+ "eval_rewards/chosen": -815.16357421875,
719
+ "eval_rewards/margins": -216.9755096435547,
720
+ "eval_rewards/rejected": -598.1881103515625,
721
+ "eval_runtime": 1363.6925,
722
+ "eval_samples_per_second": 14.208,
723
+ "eval_steps_per_second": 14.208,
724
+ "step": 7000
725
+ },
726
+ {
727
+ "epoch": 2.1981845981149237,
728
+ "grad_norm": 49.5,
729
+ "learning_rate": 1.6627882952395197e-06,
730
+ "loss": 0.3762,
731
+ "step": 7100
732
+ },
733
+ {
734
+ "epoch": 2.2291509415703805,
735
+ "grad_norm": 43.25,
736
+ "learning_rate": 1.543849929313328e-06,
737
+ "loss": 0.368,
738
+ "step": 7200
739
+ },
740
+ {
741
+ "epoch": 2.2601172850258378,
742
+ "grad_norm": 43.0,
743
+ "learning_rate": 1.4285440727429296e-06,
744
+ "loss": 0.3496,
745
+ "step": 7300
746
+ },
747
+ {
748
+ "epoch": 2.2910836284812945,
749
+ "grad_norm": 53.0,
750
+ "learning_rate": 1.3169919151740884e-06,
751
+ "loss": 0.3826,
752
+ "step": 7400
753
+ },
754
+ {
755
+ "epoch": 2.3220499719367513,
756
+ "grad_norm": 42.5,
757
+ "learning_rate": 1.2093107010110516e-06,
758
+ "loss": 0.3704,
759
+ "step": 7500
760
+ },
761
+ {
762
+ "epoch": 2.3220499719367513,
763
+ "eval_logits/chosen": NaN,
764
+ "eval_logits/rejected": NaN,
765
+ "eval_logps/chosen": -8608.7568359375,
766
+ "eval_logps/rejected": -6319.107421875,
767
+ "eval_loss": 315.918701171875,
768
+ "eval_rewards/accuracies": 0.31920933723449707,
769
+ "eval_rewards/chosen": -815.5045166015625,
770
+ "eval_rewards/margins": -217.0764617919922,
771
+ "eval_rewards/rejected": -598.4281616210938,
772
+ "eval_runtime": 1364.6449,
773
+ "eval_samples_per_second": 14.199,
774
+ "eval_steps_per_second": 14.199,
775
+ "step": 7500
776
+ },
777
+ {
778
+ "epoch": 2.353016315392208,
779
+ "grad_norm": 52.75,
780
+ "learning_rate": 1.1056136061894386e-06,
781
+ "loss": 0.3592,
782
+ "step": 7600
783
+ },
784
+ {
785
+ "epoch": 2.383982658847665,
786
+ "grad_norm": 61.5,
787
+ "learning_rate": 1.006009619225199e-06,
788
+ "loss": 0.3523,
789
+ "step": 7700
790
+ },
791
+ {
792
+ "epoch": 2.4149490023031217,
793
+ "grad_norm": 40.75,
794
+ "learning_rate": 9.106034266646735e-07,
795
+ "loss": 0.3726,
796
+ "step": 7800
797
+ },
798
+ {
799
+ "epoch": 2.4459153457585785,
800
+ "grad_norm": 46.75,
801
+ "learning_rate": 8.194953030561226e-07,
802
+ "loss": 0.3816,
803
+ "step": 7900
804
+ },
805
+ {
806
+ "epoch": 2.4768816892140357,
807
+ "grad_norm": 78.0,
808
+ "learning_rate": 7.327810055584211e-07,
809
+ "loss": 0.3597,
810
+ "step": 8000
811
+ },
812
+ {
813
+ "epoch": 2.4768816892140357,
814
+ "eval_logits/chosen": NaN,
815
+ "eval_logits/rejected": NaN,
816
+ "eval_logps/chosen": -8607.6982421875,
817
+ "eval_logps/rejected": -6318.22314453125,
818
+ "eval_loss": 315.8908386230469,
819
+ "eval_rewards/accuracies": 0.31874483823776245,
820
+ "eval_rewards/chosen": -815.398681640625,
821
+ "eval_rewards/margins": -217.05902099609375,
822
+ "eval_rewards/rejected": -598.3396606445312,
823
+ "eval_runtime": 1361.5153,
824
+ "eval_samples_per_second": 14.231,
825
+ "eval_steps_per_second": 14.231,
826
+ "step": 8000
827
+ },
828
+ {
829
+ "epoch": 2.507848032669492,
830
+ "grad_norm": 46.75,
831
+ "learning_rate": 6.505516732976153e-07,
832
+ "loss": 0.3639,
833
+ "step": 8100
834
+ },
835
+ {
836
+ "epoch": 2.5388143761249493,
837
+ "grad_norm": 46.25,
838
+ "learning_rate": 5.728937315771954e-07,
839
+ "loss": 0.3778,
840
+ "step": 8200
841
+ },
842
+ {
843
+ "epoch": 2.569780719580406,
844
+ "grad_norm": 100.5,
845
+ "learning_rate": 4.99888801042701e-07,
846
+ "loss": 0.3645,
847
+ "step": 8300
848
+ },
849
+ {
850
+ "epoch": 2.600747063035863,
851
+ "grad_norm": 51.5,
852
+ "learning_rate": 4.316136118961656e-07,
853
+ "loss": 0.3746,
854
+ "step": 8400
855
+ },
856
+ {
857
+ "epoch": 2.6317134064913197,
858
+ "grad_norm": 67.5,
859
+ "learning_rate": 3.6813992325055504e-07,
860
+ "loss": 0.366,
861
+ "step": 8500
862
+ },
863
+ {
864
+ "epoch": 2.6317134064913197,
865
+ "eval_logits/chosen": NaN,
866
+ "eval_logits/rejected": NaN,
867
+ "eval_logps/chosen": -8610.2001953125,
868
+ "eval_logps/rejected": -6320.1904296875,
869
+ "eval_loss": 315.9734191894531,
870
+ "eval_rewards/accuracies": 0.3197254240512848,
871
+ "eval_rewards/chosen": -815.64892578125,
872
+ "eval_rewards/margins": -217.1125030517578,
873
+ "eval_rewards/rejected": -598.5364379882812,
874
+ "eval_runtime": 1480.2365,
875
+ "eval_samples_per_second": 13.09,
876
+ "eval_steps_per_second": 13.09,
877
+ "step": 8500
878
+ },
879
+ {
880
+ "epoch": 2.6626797499467765,
881
+ "grad_norm": 47.75,
882
+ "learning_rate": 3.095344477089462e-07,
883
+ "loss": 0.3551,
884
+ "step": 8600
885
+ },
886
+ {
887
+ "epoch": 2.6936460934022337,
888
+ "grad_norm": 54.75,
889
+ "learning_rate": 2.5585878124774754e-07,
890
+ "loss": 0.3719,
891
+ "step": 8700
892
+ },
893
+ {
894
+ "epoch": 2.72461243685769,
895
+ "grad_norm": 51.0,
896
+ "learning_rate": 2.0716933847761134e-07,
897
+ "loss": 0.3659,
898
+ "step": 8800
899
+ },
900
+ {
901
+ "epoch": 2.7555787803131473,
902
+ "grad_norm": 47.5,
903
+ "learning_rate": 1.6351729335012334e-07,
904
+ "loss": 0.3829,
905
+ "step": 8900
906
+ },
907
+ {
908
+ "epoch": 2.786545123768604,
909
+ "grad_norm": 63.5,
910
+ "learning_rate": 1.2494852537256296e-07,
911
+ "loss": 0.3503,
912
+ "step": 9000
913
+ },
914
+ {
915
+ "epoch": 2.786545123768604,
916
+ "eval_logits/chosen": NaN,
917
+ "eval_logits/rejected": NaN,
918
+ "eval_logps/chosen": -8609.8154296875,
919
+ "eval_logps/rejected": -6319.88720703125,
920
+ "eval_loss": 315.95770263671875,
921
+ "eval_rewards/accuracies": 0.3191577196121216,
922
+ "eval_rewards/chosen": -815.6104736328125,
923
+ "eval_rewards/margins": -217.10435485839844,
924
+ "eval_rewards/rejected": -598.5061645507812,
925
+ "eval_runtime": 1438.727,
926
+ "eval_samples_per_second": 13.467,
927
+ "eval_steps_per_second": 13.467,
928
+ "step": 9000
929
+ },
930
+ {
931
+ "epoch": 2.817511467224061,
932
+ "grad_norm": 46.5,
933
+ "learning_rate": 9.150357138727028e-08,
934
+ "loss": 0.3738,
935
+ "step": 9100
936
+ },
937
+ {
938
+ "epoch": 2.8484778106795177,
939
+ "grad_norm": 52.5,
940
+ "learning_rate": 6.321758296630398e-08,
941
+ "loss": 0.3754,
942
+ "step": 9200
943
+ },
944
+ {
945
+ "epoch": 2.8794441541349745,
946
+ "grad_norm": 68.5,
947
+ "learning_rate": 4.0120289466166754e-08,
948
+ "loss": 0.3747,
949
+ "step": 9300
950
+ },
951
+ {
952
+ "epoch": 2.9104104975904312,
953
+ "grad_norm": 46.75,
954
+ "learning_rate": 2.2235966781427586e-08,
955
+ "loss": 0.3565,
956
+ "step": 9400
957
+ },
958
+ {
959
+ "epoch": 2.941376841045888,
960
+ "grad_norm": 58.0,
961
+ "learning_rate": 9.583411830087485e-09,
962
+ "loss": 0.4153,
963
+ "step": 9500
964
+ },
965
+ {
966
+ "epoch": 2.941376841045888,
967
+ "eval_logits/chosen": NaN,
968
+ "eval_logits/rejected": NaN,
969
+ "eval_logps/chosen": -8609.7314453125,
970
+ "eval_logps/rejected": -6319.83056640625,
971
+ "eval_loss": 315.9565734863281,
972
+ "eval_rewards/accuracies": 0.3196222186088562,
973
+ "eval_rewards/chosen": -815.6021728515625,
974
+ "eval_rewards/margins": -217.1017303466797,
975
+ "eval_rewards/rejected": -598.50048828125,
976
+ "eval_runtime": 1365.7529,
977
+ "eval_samples_per_second": 14.187,
978
+ "eval_steps_per_second": 14.187,
979
+ "step": 9500
980
+ },
981
+ {
982
+ "epoch": 2.9723431845013453,
983
+ "grad_norm": 59.25,
984
+ "learning_rate": 2.1759227974949006e-09,
985
+ "loss": 0.3534,
986
+ "step": 9600
987
+ }
988
+ ],
989
+ "logging_steps": 100,
990
+ "max_steps": 9690,
991
+ "num_input_tokens_seen": 0,
992
+ "num_train_epochs": 3,
993
+ "save_steps": 500,
994
+ "stateful_callbacks": {
995
+ "TrainerControl": {
996
+ "args": {
997
+ "should_epoch_stop": false,
998
+ "should_evaluate": false,
999
+ "should_log": false,
1000
+ "should_save": true,
1001
+ "should_training_stop": true
1002
+ },
1003
+ "attributes": {}
1004
+ }
1005
+ },
1006
+ "total_flos": 0.0,
1007
+ "train_batch_size": 1,
1008
+ "trial_name": null,
1009
+ "trial_params": null
1010
+ }