Marta102 commited on
Commit
5bf7843
·
verified ·
1 Parent(s): f56e36c

checkpoint 3230 adaptive

Browse files
Files changed (6) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +161 -849
  6. training_args.bin +2 -2
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:728e0719bfc2b88b533e21bdb4f2454d9fd976656cdd246883e6a01c7411a720
3
  size 1192135096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1b3a22004ffbd1e028eaf577427ab4c4c8a0c73990101d3663c2045e2d7cd3
3
  size 1192135096
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b45ae3f39d88743b1662eb6134b02ad750d8cfaf6d404377e8efc8e59b6194d0
3
  size 2384460363
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c32b2c58d29f175e9bd52ca51e3628722368aa8031bf15bc96ed20689d731a
3
  size 2384460363
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91e62442493ae4f2963ab198bb4e1be4db65ab5cdd6b158248d98219357cb2b0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b517d1b8e2b0f837c8b00170b154961d4d989feba4326ac25583df7a55c57a
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4af0f0a49e234227ff12101e0294b33302da01a67e316f5465c9475b2201d4ef
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d948e73f1c14a6b55827418ea52a1af9ca9e02d099d84d8e825e46a843f0ab61
3
  size 1465
trainer_state.json CHANGED
@@ -1,996 +1,308 @@
1
  {
2
- "best_global_step": 500,
3
- "best_metric": 279.8433532714844,
4
- "best_model_checkpoint": "output-dpo-contrastive-new/checkpoint-500",
5
- "epoch": 3.0,
6
- "eval_steps": 500,
7
- "global_step": 9690,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.03096634345545685,
14
- "grad_norm": 1144.0,
15
- "learning_rate": 9.997424714962634e-06,
16
- "loss": 478.5263,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.0619326869109137,
21
- "grad_norm": 249.0,
22
- "learning_rate": 9.989597261051275e-06,
23
- "loss": 5.8995,
24
  "step": 200
25
  },
26
  {
27
  "epoch": 0.09289903036637055,
28
- "grad_norm": 1416.0,
29
- "learning_rate": 9.976525602356642e-06,
30
- "loss": 1.974,
31
  "step": 300
32
  },
33
  {
34
  "epoch": 0.1238653738218274,
35
- "grad_norm": 124.0,
36
- "learning_rate": 9.958223477553715e-06,
37
- "loss": 1.2967,
38
  "step": 400
39
  },
40
  {
41
  "epoch": 0.15483171727728426,
42
- "grad_norm": 185.0,
43
- "learning_rate": 9.93471012268208e-06,
44
- "loss": 0.9707,
45
- "step": 500
46
- },
47
- {
48
- "epoch": 0.15483171727728426,
49
- "eval_logits/chosen": NaN,
50
- "eval_logits/rejected": NaN,
51
- "eval_logps/chosen": -7700.236328125,
52
- "eval_logps/rejected": -5658.39892578125,
53
- "eval_loss": 279.8433532714844,
54
- "eval_rewards/accuracies": 0.3200867176055908,
55
- "eval_rewards/chosen": -724.652587890625,
56
- "eval_rewards/margins": -192.2952880859375,
57
- "eval_rewards/rejected": -532.3572998046875,
58
- "eval_runtime": 1360.3414,
59
- "eval_samples_per_second": 14.243,
60
- "eval_steps_per_second": 14.243,
61
  "step": 500
62
  },
63
  {
64
  "epoch": 0.1857980607327411,
65
- "grad_norm": 136.0,
66
- "learning_rate": 9.906010250928317e-06,
67
- "loss": 0.8311,
68
  "step": 600
69
  },
70
  {
71
  "epoch": 0.21676440418819795,
72
- "grad_norm": 66.5,
73
- "learning_rate": 9.872154026651767e-06,
74
- "loss": 0.6984,
75
  "step": 700
76
  },
77
  {
78
  "epoch": 0.2477307476436548,
79
- "grad_norm": 768.0,
80
- "learning_rate": 9.833177033680945e-06,
81
- "loss": 0.7479,
82
  "step": 800
83
  },
84
  {
85
- "epoch": 0.27869709109911167,
86
- "grad_norm": 73.5,
87
- "learning_rate": 9.789120237913954e-06,
88
- "loss": 0.5771,
89
- "step": 900
 
 
 
 
 
 
 
 
 
90
  },
91
  {
92
- "epoch": 0.3096634345545685,
93
- "grad_norm": 71.5,
94
- "learning_rate": 9.740029944262193e-06,
95
- "loss": 0.5536,
96
- "step": 1000
97
  },
98
  {
99
  "epoch": 0.3096634345545685,
100
- "eval_logits/chosen": NaN,
101
- "eval_logits/rejected": NaN,
102
- "eval_logps/chosen": -8134.36328125,
103
- "eval_logps/rejected": -5972.11669921875,
104
- "eval_loss": 297.4208679199219,
105
- "eval_rewards/accuracies": 0.31869322061538696,
106
- "eval_rewards/chosen": -768.0653686523438,
107
- "eval_rewards/margins": -204.33627319335938,
108
- "eval_rewards/rejected": -563.7291259765625,
109
- "eval_runtime": 1355.184,
110
- "eval_samples_per_second": 14.298,
111
- "eval_steps_per_second": 14.298,
112
  "step": 1000
113
  },
114
  {
115
  "epoch": 0.34062977801002536,
116
- "grad_norm": 53.5,
117
- "learning_rate": 9.685957747982618e-06,
118
- "loss": 0.5381,
119
  "step": 1100
120
  },
121
  {
122
  "epoch": 0.3715961214654822,
123
- "grad_norm": 84.5,
124
- "learning_rate": 9.626960480449692e-06,
125
- "loss": 0.5969,
126
  "step": 1200
127
  },
128
  {
129
  "epoch": 0.40256246492093906,
130
- "grad_norm": 70.5,
131
- "learning_rate": 9.563100149424045e-06,
132
- "loss": 0.4972,
133
  "step": 1300
134
  },
135
  {
136
  "epoch": 0.4335288083763959,
137
- "grad_norm": 85.5,
138
- "learning_rate": 9.4944438738806e-06,
139
- "loss": 0.4799,
140
  "step": 1400
141
  },
142
  {
143
  "epoch": 0.46449515183185275,
144
- "grad_norm": 79.0,
145
- "learning_rate": 9.421063813464661e-06,
146
- "loss": 0.4689,
147
  "step": 1500
148
  },
149
  {
150
- "epoch": 0.46449515183185275,
151
- "eval_logits/chosen": NaN,
152
- "eval_logits/rejected": NaN,
153
- "eval_logps/chosen": -8173.689453125,
154
- "eval_logps/rejected": -5999.046875,
155
- "eval_loss": 298.9976806640625,
156
- "eval_rewards/accuracies": 0.3185900151729584,
157
- "eval_rewards/chosen": -771.9979248046875,
158
- "eval_rewards/margins": -205.575927734375,
159
- "eval_rewards/rejected": -566.4219360351562,
160
- "eval_runtime": 1358.7276,
161
- "eval_samples_per_second": 14.26,
162
- "eval_steps_per_second": 14.26,
163
- "step": 1500
164
  },
165
  {
166
  "epoch": 0.4954614952873096,
167
- "grad_norm": 64.5,
168
- "learning_rate": 9.343037092650156e-06,
169
- "loss": 0.4951,
 
 
 
 
 
 
 
 
 
170
  "step": 1600
171
  },
172
  {
173
  "epoch": 0.5264278387427664,
174
- "grad_norm": 55.25,
175
- "learning_rate": 9.260445719679651e-06,
176
- "loss": 0.4569,
177
  "step": 1700
178
  },
179
  {
180
  "epoch": 0.5573941821982233,
181
- "grad_norm": 71.0,
182
- "learning_rate": 9.173376500371441e-06,
183
- "loss": 0.4647,
184
  "step": 1800
185
  },
186
  {
187
  "epoch": 0.5883605256536801,
188
- "grad_norm": 76.0,
189
- "learning_rate": 9.081920946884217e-06,
190
- "loss": 0.4954,
191
  "step": 1900
192
  },
193
  {
194
  "epoch": 0.619326869109137,
195
- "grad_norm": 48.75,
196
- "learning_rate": 8.986175181535266e-06,
197
- "loss": 0.4217,
198
- "step": 2000
199
- },
200
- {
201
- "epoch": 0.619326869109137,
202
- "eval_logits/chosen": NaN,
203
- "eval_logits/rejected": NaN,
204
- "eval_logps/chosen": -8315.658203125,
205
- "eval_logps/rejected": -6106.26806640625,
206
- "eval_loss": 304.197021484375,
207
- "eval_rewards/accuracies": 0.3195706009864807,
208
- "eval_rewards/chosen": -786.1946411132812,
209
- "eval_rewards/margins": -209.05052185058594,
210
- "eval_rewards/rejected": -577.1441650390625,
211
- "eval_runtime": 1355.5933,
212
- "eval_samples_per_second": 14.293,
213
- "eval_steps_per_second": 14.293,
214
  "step": 2000
215
  },
216
  {
217
  "epoch": 0.6502932125645938,
218
- "grad_norm": 43.75,
219
- "learning_rate": 8.886239835773252e-06,
220
- "loss": 0.4427,
221
  "step": 2100
222
  },
223
  {
224
  "epoch": 0.6812595560200507,
225
- "grad_norm": 47.25,
226
- "learning_rate": 8.782219944411774e-06,
227
- "loss": 0.4756,
228
  "step": 2200
229
  },
230
  {
231
  "epoch": 0.7122258994755075,
232
- "grad_norm": 64.5,
233
- "learning_rate": 8.674224835234879e-06,
234
- "loss": 0.4246,
235
  "step": 2300
236
  },
237
  {
238
  "epoch": 0.7431922429309644,
239
- "grad_norm": 71.5,
240
- "learning_rate": 8.562368014090532e-06,
241
- "loss": 0.4434,
242
  "step": 2400
243
  },
244
  {
245
- "epoch": 0.7741585863864212,
246
- "grad_norm": 77.0,
247
- "learning_rate": 8.446767045592829e-06,
248
- "loss": 0.5978,
249
- "step": 2500
 
 
 
 
 
 
 
 
 
250
  },
251
  {
252
  "epoch": 0.7741585863864212,
253
- "eval_logits/chosen": NaN,
254
- "eval_logits/rejected": NaN,
255
- "eval_logps/chosen": -8511.75,
256
- "eval_logps/rejected": -6250.36181640625,
257
- "eval_loss": 311.8685607910156,
258
- "eval_rewards/accuracies": 0.3185900151729584,
259
- "eval_rewards/chosen": -805.803955078125,
260
- "eval_rewards/margins": -214.25033569335938,
261
- "eval_rewards/rejected": -591.5535888671875,
262
- "eval_runtime": 1359.1867,
263
- "eval_samples_per_second": 14.256,
264
- "eval_steps_per_second": 14.256,
265
  "step": 2500
266
  },
267
  {
268
  "epoch": 0.8051249298418781,
269
- "grad_norm": 49.75,
270
- "learning_rate": 8.327543429558335e-06,
271
- "loss": 0.4466,
272
  "step": 2600
273
  },
274
  {
275
  "epoch": 0.836091273297335,
276
- "grad_norm": 52.25,
277
- "learning_rate": 8.20482247330641e-06,
278
- "loss": 0.4372,
279
  "step": 2700
280
  },
281
  {
282
  "epoch": 0.8670576167527918,
283
- "grad_norm": 62.0,
284
- "learning_rate": 8.07873315995776e-06,
285
- "loss": 0.4154,
286
  "step": 2800
287
  },
288
  {
289
  "epoch": 0.8980239602082487,
290
- "grad_norm": 69.0,
291
- "learning_rate": 7.9494080128696e-06,
292
- "loss": 0.4073,
293
  "step": 2900
294
  },
295
  {
296
  "epoch": 0.9289903036637055,
297
- "grad_norm": 46.75,
298
- "learning_rate": 7.816982956349941e-06,
299
- "loss": 0.399,
300
- "step": 3000
301
- },
302
- {
303
- "epoch": 0.9289903036637055,
304
- "eval_logits/chosen": NaN,
305
- "eval_logits/rejected": NaN,
306
- "eval_logps/chosen": -8489.6416015625,
307
- "eval_logps/rejected": -6233.3359375,
308
- "eval_loss": 311.04168701171875,
309
- "eval_rewards/accuracies": 0.31926095485687256,
310
- "eval_rewards/chosen": -803.5930786132812,
311
- "eval_rewards/margins": -213.7421875,
312
- "eval_rewards/rejected": -589.8509521484375,
313
- "eval_runtime": 1346.8135,
314
- "eval_samples_per_second": 14.387,
315
- "eval_steps_per_second": 14.387,
316
  "step": 3000
317
  },
318
  {
319
  "epoch": 0.9599566471191624,
320
- "grad_norm": 40.75,
321
- "learning_rate": 7.681597172797377e-06,
322
- "loss": 0.4016,
323
  "step": 3100
324
  },
325
  {
326
  "epoch": 0.9909229905746192,
327
- "grad_norm": 61.5,
328
- "learning_rate": 7.543392956416542e-06,
329
- "loss": 0.4017,
330
  "step": 3200
331
  },
332
  {
333
- "epoch": 1.0216764404188199,
334
- "grad_norm": 49.75,
335
- "learning_rate": 7.4025155636629546e-06,
336
- "loss": 0.3778,
337
- "step": 3300
338
- },
339
- {
340
- "epoch": 1.0526427838742767,
341
- "grad_norm": 45.0,
342
- "learning_rate": 7.25911306057447e-06,
343
- "loss": 0.3591,
344
- "step": 3400
345
- },
346
- {
347
- "epoch": 1.0836091273297335,
348
- "grad_norm": 61.5,
349
- "learning_rate": 7.113336167149775e-06,
350
- "loss": 0.3661,
351
- "step": 3500
352
- },
353
- {
354
- "epoch": 1.0836091273297335,
355
- "eval_logits/chosen": NaN,
356
- "eval_logits/rejected": NaN,
357
- "eval_logps/chosen": -8532.7568359375,
358
- "eval_logps/rejected": -6264.232421875,
359
- "eval_loss": 312.83892822265625,
360
- "eval_rewards/accuracies": 0.31828033924102783,
361
- "eval_rewards/chosen": -807.90478515625,
362
- "eval_rewards/margins": -214.964111328125,
363
- "eval_rewards/rejected": -592.940673828125,
364
- "eval_runtime": 1355.1706,
365
- "eval_samples_per_second": 14.298,
366
- "eval_steps_per_second": 14.298,
367
- "step": 3500
368
- },
369
- {
370
- "epoch": 1.1145754707851903,
371
- "grad_norm": 41.25,
372
- "learning_rate": 6.965338098937496e-06,
373
- "loss": 0.3844,
374
- "step": 3600
375
- },
376
- {
377
- "epoch": 1.1455418142406473,
378
- "grad_norm": 62.5,
379
- "learning_rate": 6.815274406002428e-06,
380
- "loss": 0.3653,
381
- "step": 3700
382
- },
383
- {
384
- "epoch": 1.176508157696104,
385
- "grad_norm": 95.0,
386
- "learning_rate": 6.663302809438097e-06,
387
- "loss": 0.3915,
388
- "step": 3800
389
- },
390
- {
391
- "epoch": 1.2074745011515609,
392
- "grad_norm": 50.25,
393
- "learning_rate": 6.509583035597538e-06,
394
- "loss": 0.3743,
395
- "step": 3900
396
- },
397
- {
398
- "epoch": 1.2384408446070179,
399
- "grad_norm": 57.75,
400
- "learning_rate": 6.35427664821648e-06,
401
- "loss": 0.3638,
402
- "step": 4000
403
- },
404
- {
405
- "epoch": 1.2384408446070179,
406
- "eval_logits/chosen": NaN,
407
- "eval_logits/rejected": NaN,
408
- "eval_logps/chosen": -8631.439453125,
409
- "eval_logps/rejected": -6336.66796875,
410
- "eval_loss": 316.6842346191406,
411
- "eval_rewards/accuracies": 0.3185900151729584,
412
- "eval_rewards/chosen": -817.7728271484375,
413
- "eval_rewards/margins": -217.5886993408203,
414
- "eval_rewards/rejected": -600.1841430664062,
415
- "eval_runtime": 1356.2341,
416
- "eval_samples_per_second": 14.287,
417
- "eval_steps_per_second": 14.287,
418
- "step": 4000
419
- },
420
- {
421
- "epoch": 1.2694071880624747,
422
- "grad_norm": 68.0,
423
- "learning_rate": 6.197546878605376e-06,
424
- "loss": 0.4077,
425
- "step": 4100
426
- },
427
- {
428
- "epoch": 1.3003735315179314,
429
- "grad_norm": 44.0,
430
- "learning_rate": 6.039558454088796e-06,
431
- "loss": 0.3837,
432
- "step": 4200
433
- },
434
- {
435
- "epoch": 1.3313398749733882,
436
- "grad_norm": 47.75,
437
- "learning_rate": 5.88047742487244e-06,
438
- "loss": 0.3916,
439
- "step": 4300
440
- },
441
- {
442
- "epoch": 1.362306218428845,
443
- "grad_norm": 70.5,
444
- "learning_rate": 5.720470989519773e-06,
445
- "loss": 0.3604,
446
- "step": 4400
447
- },
448
- {
449
- "epoch": 1.393272561884302,
450
- "grad_norm": 42.5,
451
- "learning_rate": 5.559707319221725e-06,
452
- "loss": 0.3803,
453
- "step": 4500
454
- },
455
- {
456
- "epoch": 1.393272561884302,
457
- "eval_logits/chosen": NaN,
458
- "eval_logits/rejected": NaN,
459
- "eval_logps/chosen": -8557.1044921875,
460
- "eval_logps/rejected": -6282.44775390625,
461
- "eval_loss": 313.7301940917969,
462
- "eval_rewards/accuracies": 0.3190028965473175,
463
- "eval_rewards/chosen": -810.3394165039062,
464
- "eval_rewards/margins": -215.5771942138672,
465
- "eval_rewards/rejected": -594.7621459960938,
466
- "eval_runtime": 1359.1293,
467
- "eval_samples_per_second": 14.256,
468
- "eval_steps_per_second": 14.256,
469
- "step": 4500
470
- },
471
- {
472
- "epoch": 1.4242389053397588,
473
- "grad_norm": 37.25,
474
- "learning_rate": 5.3983553810441004e-06,
475
- "loss": 0.3853,
476
- "step": 4600
477
- },
478
- {
479
- "epoch": 1.4552052487952156,
480
- "grad_norm": 49.0,
481
- "learning_rate": 5.236584760338523e-06,
482
- "loss": 0.3786,
483
- "step": 4700
484
- },
485
- {
486
- "epoch": 1.4861715922506726,
487
- "grad_norm": 38.75,
488
- "learning_rate": 5.074565482503543e-06,
489
- "loss": 0.3855,
490
- "step": 4800
491
- },
492
- {
493
- "epoch": 1.5171379357061294,
494
- "grad_norm": 36.5,
495
- "learning_rate": 4.912467834283229e-06,
496
- "loss": 0.3645,
497
- "step": 4900
498
- },
499
- {
500
- "epoch": 1.5481042791615862,
501
- "grad_norm": 49.25,
502
- "learning_rate": 4.750462184791093e-06,
503
- "loss": 0.3757,
504
- "step": 5000
505
- },
506
- {
507
- "epoch": 1.5481042791615862,
508
- "eval_logits/chosen": NaN,
509
- "eval_logits/rejected": NaN,
510
- "eval_logps/chosen": -8567.33203125,
511
- "eval_logps/rejected": -6289.53173828125,
512
- "eval_loss": 314.1842346191406,
513
- "eval_rewards/accuracies": 0.3201383054256439,
514
- "eval_rewards/chosen": -811.3621215820312,
515
- "eval_rewards/margins": -215.89157104492188,
516
- "eval_rewards/rejected": -595.470458984375,
517
- "eval_runtime": 1365.2519,
518
- "eval_samples_per_second": 14.192,
519
- "eval_steps_per_second": 14.192,
520
- "step": 5000
521
- },
522
- {
523
- "epoch": 1.579070622617043,
524
- "grad_norm": 82.5,
525
- "learning_rate": 4.588718806447441e-06,
526
- "loss": 0.3887,
527
- "step": 5100
528
- },
529
- {
530
- "epoch": 1.6100369660724998,
531
- "grad_norm": 46.0,
532
- "learning_rate": 4.427407696018343e-06,
533
- "loss": 0.3759,
534
- "step": 5200
535
- },
536
- {
537
- "epoch": 1.6410033095279568,
538
- "grad_norm": 39.25,
539
- "learning_rate": 4.266698395944332e-06,
540
- "loss": 0.3836,
541
- "step": 5300
542
- },
543
- {
544
- "epoch": 1.6719696529834136,
545
- "grad_norm": 42.5,
546
- "learning_rate": 4.1067598161466e-06,
547
- "loss": 0.3727,
548
- "step": 5400
549
- },
550
- {
551
- "epoch": 1.7029359964388706,
552
- "grad_norm": 64.5,
553
- "learning_rate": 3.947760056498012e-06,
554
- "loss": 0.3917,
555
- "step": 5500
556
- },
557
- {
558
- "epoch": 1.7029359964388706,
559
- "eval_logits/chosen": NaN,
560
- "eval_logits/rejected": NaN,
561
- "eval_logps/chosen": -8585.994140625,
562
- "eval_logps/rejected": -6303.09765625,
563
- "eval_loss": 314.93072509765625,
564
- "eval_rewards/accuracies": 0.31920933723449707,
565
- "eval_rewards/chosen": -813.2284545898438,
566
- "eval_rewards/margins": -216.4012451171875,
567
- "eval_rewards/rejected": -596.8271484375,
568
- "eval_runtime": 1366.4753,
569
- "eval_samples_per_second": 14.18,
570
- "eval_steps_per_second": 14.18,
571
- "step": 5500
572
- },
573
- {
574
- "epoch": 1.7339023398943274,
575
- "grad_norm": 52.5,
576
- "learning_rate": 3.7898662301454724e-06,
577
- "loss": 0.3941,
578
- "step": 5600
579
- },
580
- {
581
- "epoch": 1.7648686833497842,
582
- "grad_norm": 42.75,
583
- "learning_rate": 3.6332442878693896e-06,
584
- "loss": 0.3701,
585
- "step": 5700
586
- },
587
- {
588
- "epoch": 1.795835026805241,
589
- "grad_norm": 42.75,
590
- "learning_rate": 3.4780588436648223e-06,
591
- "loss": 0.3707,
592
- "step": 5800
593
- },
594
- {
595
- "epoch": 1.8268013702606978,
596
- "grad_norm": 52.75,
597
- "learning_rate": 3.3244730017275974e-06,
598
- "loss": 0.3729,
599
- "step": 5900
600
- },
601
- {
602
- "epoch": 1.8577677137161548,
603
- "grad_norm": 49.75,
604
- "learning_rate": 3.172648185027306e-06,
605
- "loss": 0.3716,
606
- "step": 6000
607
- },
608
- {
609
- "epoch": 1.8577677137161548,
610
- "eval_logits/chosen": NaN,
611
- "eval_logits/rejected": NaN,
612
- "eval_logps/chosen": -8575.375,
613
- "eval_logps/rejected": -6295.24853515625,
614
- "eval_loss": 314.5245361328125,
615
- "eval_rewards/accuracies": 0.3183319568634033,
616
- "eval_rewards/chosen": -812.16650390625,
617
- "eval_rewards/margins": -216.12428283691406,
618
- "eval_rewards/rejected": -596.042236328125,
619
- "eval_runtime": 1368.3845,
620
- "eval_samples_per_second": 14.16,
621
- "eval_steps_per_second": 14.16,
622
- "step": 6000
623
- },
624
- {
625
- "epoch": 1.8887340571716116,
626
- "grad_norm": 54.25,
627
- "learning_rate": 3.0227439656472878e-06,
628
- "loss": 0.3626,
629
- "step": 6100
630
- },
631
- {
632
- "epoch": 1.9197004006270686,
633
- "grad_norm": 38.5,
634
- "learning_rate": 2.87491789706995e-06,
635
- "loss": 0.393,
636
- "step": 6200
637
- },
638
- {
639
- "epoch": 1.9506667440825254,
640
- "grad_norm": 40.5,
641
- "learning_rate": 2.729325348583711e-06,
642
- "loss": 0.3619,
643
- "step": 6300
644
- },
645
- {
646
- "epoch": 1.9816330875379822,
647
- "grad_norm": 80.0,
648
- "learning_rate": 2.5861193419855634e-06,
649
- "loss": 0.4156,
650
- "step": 6400
651
- },
652
- {
653
- "epoch": 2.0123865373821825,
654
- "grad_norm": 82.5,
655
- "learning_rate": 2.4454503907509493e-06,
656
- "loss": 0.3607,
657
- "step": 6500
658
- },
659
- {
660
- "epoch": 2.0123865373821825,
661
- "eval_logits/chosen": NaN,
662
- "eval_logits/rejected": NaN,
663
- "eval_logps/chosen": -8613.890625,
664
- "eval_logps/rejected": -6323.0517578125,
665
- "eval_loss": 316.09857177734375,
666
- "eval_rewards/accuracies": 0.3194673955440521,
667
- "eval_rewards/chosen": -816.01806640625,
668
- "eval_rewards/margins": -217.19549560546875,
669
- "eval_rewards/rejected": -598.8226318359375,
670
- "eval_runtime": 1371.8671,
671
- "eval_samples_per_second": 14.124,
672
- "eval_steps_per_second": 14.124,
673
- "step": 6500
674
- },
675
- {
676
- "epoch": 2.0433528808376398,
677
- "grad_norm": 43.25,
678
- "learning_rate": 2.307466341839918e-06,
679
- "loss": 0.3691,
680
- "step": 6600
681
- },
682
- {
683
- "epoch": 2.0743192242930966,
684
- "grad_norm": 51.0,
685
- "learning_rate": 2.1723122203058867e-06,
686
- "loss": 0.3404,
687
- "step": 6700
688
- },
689
- {
690
- "epoch": 2.1052855677485534,
691
- "grad_norm": 28.5,
692
- "learning_rate": 2.040130076870296e-06,
693
- "loss": 0.3692,
694
- "step": 6800
695
- },
696
- {
697
- "epoch": 2.13625191120401,
698
- "grad_norm": 66.5,
699
- "learning_rate": 1.9110588386233686e-06,
700
- "loss": 0.3788,
701
- "step": 6900
702
- },
703
- {
704
- "epoch": 2.167218254659467,
705
- "grad_norm": 83.0,
706
- "learning_rate": 1.785234163007899e-06,
707
- "loss": 0.3582,
708
- "step": 7000
709
- },
710
- {
711
- "epoch": 2.167218254659467,
712
- "eval_logits/chosen": NaN,
713
- "eval_logits/rejected": NaN,
714
- "eval_logps/chosen": -8605.3466796875,
715
- "eval_logps/rejected": -6316.7080078125,
716
- "eval_loss": 315.78204345703125,
717
- "eval_rewards/accuracies": 0.3188480734825134,
718
- "eval_rewards/chosen": -815.16357421875,
719
- "eval_rewards/margins": -216.9755096435547,
720
- "eval_rewards/rejected": -598.1881103515625,
721
- "eval_runtime": 1363.6925,
722
- "eval_samples_per_second": 14.208,
723
- "eval_steps_per_second": 14.208,
724
- "step": 7000
725
- },
726
- {
727
- "epoch": 2.1981845981149237,
728
- "grad_norm": 49.5,
729
- "learning_rate": 1.6627882952395197e-06,
730
- "loss": 0.3762,
731
- "step": 7100
732
- },
733
- {
734
- "epoch": 2.2291509415703805,
735
- "grad_norm": 43.25,
736
- "learning_rate": 1.543849929313328e-06,
737
- "loss": 0.368,
738
- "step": 7200
739
- },
740
- {
741
- "epoch": 2.2601172850258378,
742
- "grad_norm": 43.0,
743
- "learning_rate": 1.4285440727429296e-06,
744
- "loss": 0.3496,
745
- "step": 7300
746
- },
747
- {
748
- "epoch": 2.2910836284812945,
749
- "grad_norm": 53.0,
750
- "learning_rate": 1.3169919151740884e-06,
751
- "loss": 0.3826,
752
- "step": 7400
753
- },
754
- {
755
- "epoch": 2.3220499719367513,
756
- "grad_norm": 42.5,
757
- "learning_rate": 1.2093107010110516e-06,
758
- "loss": 0.3704,
759
- "step": 7500
760
- },
761
- {
762
- "epoch": 2.3220499719367513,
763
- "eval_logits/chosen": NaN,
764
- "eval_logits/rejected": NaN,
765
- "eval_logps/chosen": -8608.7568359375,
766
- "eval_logps/rejected": -6319.107421875,
767
- "eval_loss": 315.918701171875,
768
- "eval_rewards/accuracies": 0.31920933723449707,
769
- "eval_rewards/chosen": -815.5045166015625,
770
- "eval_rewards/margins": -217.0764617919922,
771
- "eval_rewards/rejected": -598.4281616210938,
772
- "eval_runtime": 1364.6449,
773
- "eval_samples_per_second": 14.199,
774
- "eval_steps_per_second": 14.199,
775
- "step": 7500
776
- },
777
- {
778
- "epoch": 2.353016315392208,
779
- "grad_norm": 52.75,
780
- "learning_rate": 1.1056136061894386e-06,
781
- "loss": 0.3592,
782
- "step": 7600
783
- },
784
- {
785
- "epoch": 2.383982658847665,
786
- "grad_norm": 61.5,
787
- "learning_rate": 1.006009619225199e-06,
788
- "loss": 0.3523,
789
- "step": 7700
790
- },
791
- {
792
- "epoch": 2.4149490023031217,
793
- "grad_norm": 40.75,
794
- "learning_rate": 9.106034266646735e-07,
795
- "loss": 0.3726,
796
- "step": 7800
797
- },
798
- {
799
- "epoch": 2.4459153457585785,
800
- "grad_norm": 46.75,
801
- "learning_rate": 8.194953030561226e-07,
802
- "loss": 0.3816,
803
- "step": 7900
804
- },
805
- {
806
- "epoch": 2.4768816892140357,
807
- "grad_norm": 78.0,
808
- "learning_rate": 7.327810055584211e-07,
809
- "loss": 0.3597,
810
- "step": 8000
811
- },
812
- {
813
- "epoch": 2.4768816892140357,
814
- "eval_logits/chosen": NaN,
815
- "eval_logits/rejected": NaN,
816
- "eval_logps/chosen": -8607.6982421875,
817
- "eval_logps/rejected": -6318.22314453125,
818
- "eval_loss": 315.8908386230469,
819
- "eval_rewards/accuracies": 0.31874483823776245,
820
- "eval_rewards/chosen": -815.398681640625,
821
- "eval_rewards/margins": -217.05902099609375,
822
- "eval_rewards/rejected": -598.3396606445312,
823
- "eval_runtime": 1361.5153,
824
- "eval_samples_per_second": 14.231,
825
- "eval_steps_per_second": 14.231,
826
- "step": 8000
827
- },
828
- {
829
- "epoch": 2.507848032669492,
830
- "grad_norm": 46.75,
831
- "learning_rate": 6.505516732976153e-07,
832
- "loss": 0.3639,
833
- "step": 8100
834
- },
835
- {
836
- "epoch": 2.5388143761249493,
837
- "grad_norm": 46.25,
838
- "learning_rate": 5.728937315771954e-07,
839
- "loss": 0.3778,
840
- "step": 8200
841
- },
842
- {
843
- "epoch": 2.569780719580406,
844
- "grad_norm": 100.5,
845
- "learning_rate": 4.99888801042701e-07,
846
- "loss": 0.3645,
847
- "step": 8300
848
- },
849
- {
850
- "epoch": 2.600747063035863,
851
- "grad_norm": 51.5,
852
- "learning_rate": 4.316136118961656e-07,
853
- "loss": 0.3746,
854
- "step": 8400
855
- },
856
- {
857
- "epoch": 2.6317134064913197,
858
- "grad_norm": 67.5,
859
- "learning_rate": 3.6813992325055504e-07,
860
- "loss": 0.366,
861
- "step": 8500
862
- },
863
- {
864
- "epoch": 2.6317134064913197,
865
- "eval_logits/chosen": NaN,
866
- "eval_logits/rejected": NaN,
867
- "eval_logps/chosen": -8610.2001953125,
868
- "eval_logps/rejected": -6320.1904296875,
869
- "eval_loss": 315.9734191894531,
870
- "eval_rewards/accuracies": 0.3197254240512848,
871
- "eval_rewards/chosen": -815.64892578125,
872
- "eval_rewards/margins": -217.1125030517578,
873
- "eval_rewards/rejected": -598.5364379882812,
874
- "eval_runtime": 1480.2365,
875
- "eval_samples_per_second": 13.09,
876
- "eval_steps_per_second": 13.09,
877
- "step": 8500
878
- },
879
- {
880
- "epoch": 2.6626797499467765,
881
- "grad_norm": 47.75,
882
- "learning_rate": 3.095344477089462e-07,
883
- "loss": 0.3551,
884
- "step": 8600
885
- },
886
- {
887
- "epoch": 2.6936460934022337,
888
- "grad_norm": 54.75,
889
- "learning_rate": 2.5585878124774754e-07,
890
- "loss": 0.3719,
891
- "step": 8700
892
- },
893
- {
894
- "epoch": 2.72461243685769,
895
- "grad_norm": 51.0,
896
- "learning_rate": 2.0716933847761134e-07,
897
- "loss": 0.3659,
898
- "step": 8800
899
- },
900
- {
901
- "epoch": 2.7555787803131473,
902
- "grad_norm": 47.5,
903
- "learning_rate": 1.6351729335012334e-07,
904
- "loss": 0.3829,
905
- "step": 8900
906
- },
907
- {
908
- "epoch": 2.786545123768604,
909
- "grad_norm": 63.5,
910
- "learning_rate": 1.2494852537256296e-07,
911
- "loss": 0.3503,
912
- "step": 9000
913
- },
914
- {
915
- "epoch": 2.786545123768604,
916
- "eval_logits/chosen": NaN,
917
- "eval_logits/rejected": NaN,
918
- "eval_logps/chosen": -8609.8154296875,
919
- "eval_logps/rejected": -6319.88720703125,
920
- "eval_loss": 315.95770263671875,
921
- "eval_rewards/accuracies": 0.3191577196121216,
922
- "eval_rewards/chosen": -815.6104736328125,
923
- "eval_rewards/margins": -217.10435485839844,
924
- "eval_rewards/rejected": -598.5061645507812,
925
- "eval_runtime": 1438.727,
926
- "eval_samples_per_second": 13.467,
927
- "eval_steps_per_second": 13.467,
928
- "step": 9000
929
- },
930
- {
931
- "epoch": 2.817511467224061,
932
- "grad_norm": 46.5,
933
- "learning_rate": 9.150357138727028e-08,
934
- "loss": 0.3738,
935
- "step": 9100
936
- },
937
- {
938
- "epoch": 2.8484778106795177,
939
- "grad_norm": 52.5,
940
- "learning_rate": 6.321758296630398e-08,
941
- "loss": 0.3754,
942
- "step": 9200
943
- },
944
- {
945
- "epoch": 2.8794441541349745,
946
- "grad_norm": 68.5,
947
- "learning_rate": 4.0120289466166754e-08,
948
- "loss": 0.3747,
949
- "step": 9300
950
- },
951
- {
952
- "epoch": 2.9104104975904312,
953
- "grad_norm": 46.75,
954
- "learning_rate": 2.2235966781427586e-08,
955
- "loss": 0.3565,
956
- "step": 9400
957
- },
958
- {
959
- "epoch": 2.941376841045888,
960
- "grad_norm": 58.0,
961
- "learning_rate": 9.583411830087485e-09,
962
- "loss": 0.4153,
963
- "step": 9500
964
- },
965
- {
966
- "epoch": 2.941376841045888,
967
  "eval_logits/chosen": NaN,
968
  "eval_logits/rejected": NaN,
969
- "eval_logps/chosen": -8609.7314453125,
970
- "eval_logps/rejected": -6319.83056640625,
971
- "eval_loss": 315.9565734863281,
972
- "eval_rewards/accuracies": 0.3196222186088562,
973
- "eval_rewards/chosen": -815.6021728515625,
974
- "eval_rewards/margins": -217.1017303466797,
975
- "eval_rewards/rejected": -598.50048828125,
976
- "eval_runtime": 1365.7529,
977
- "eval_samples_per_second": 14.187,
978
- "eval_steps_per_second": 14.187,
979
- "step": 9500
980
- },
981
- {
982
- "epoch": 2.9723431845013453,
983
- "grad_norm": 59.25,
984
- "learning_rate": 2.1759227974949006e-09,
985
- "loss": 0.3534,
986
- "step": 9600
987
  }
988
  ],
989
  "logging_steps": 100,
990
- "max_steps": 9690,
991
  "num_input_tokens_seen": 0,
992
- "num_train_epochs": 3,
993
- "save_steps": 500,
994
  "stateful_callbacks": {
995
  "TrainerControl": {
996
  "args": {
 
1
  {
2
+ "best_global_step": 800,
3
+ "best_metric": 209.9661102294922,
4
+ "best_model_checkpoint": "final-model-dpo-ad-1ep/checkpoint-800",
5
+ "epoch": 1.0,
6
+ "eval_steps": 800,
7
+ "global_step": 3230,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0.03096634345545685,
14
+ "grad_norm": 4.53125,
15
+ "learning_rate": 9.976838348954221e-06,
16
+ "loss": 5.5573,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.0619326869109137,
21
+ "grad_norm": 4.15625,
22
+ "learning_rate": 9.906634890087323e-06,
23
+ "loss": 0.9191,
24
  "step": 200
25
  },
26
  {
27
  "epoch": 0.09289903036637055,
28
+ "grad_norm": 4.96875,
29
+ "learning_rate": 9.790050865156384e-06,
30
+ "loss": 1.0287,
31
  "step": 300
32
  },
33
  {
34
  "epoch": 0.1238653738218274,
35
+ "grad_norm": 11.625,
36
+ "learning_rate": 9.628188298907782e-06,
37
+ "loss": 1.0742,
38
  "step": 400
39
  },
40
  {
41
  "epoch": 0.15483171727728426,
42
+ "grad_norm": 8.6875,
43
+ "learning_rate": 9.422577217034351e-06,
44
+ "loss": 1.2919,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  "step": 500
46
  },
47
  {
48
  "epoch": 0.1857980607327411,
49
+ "grad_norm": 16.75,
50
+ "learning_rate": 9.175161183420499e-06,
51
+ "loss": 0.9978,
52
  "step": 600
53
  },
54
  {
55
  "epoch": 0.21676440418819795,
56
+ "grad_norm": 5.1875,
57
+ "learning_rate": 8.888278928367003e-06,
58
+ "loss": 1.0586,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 0.2477307476436548,
63
+ "grad_norm": 15.4375,
64
+ "learning_rate": 8.564642241456986e-06,
65
+ "loss": 1.4034,
66
  "step": 800
67
  },
68
  {
69
+ "epoch": 0.2477307476436548,
70
+ "eval_logits/chosen": NaN,
71
+ "eval_logits/rejected": NaN,
72
+ "eval_logps/chosen": -6010.39697265625,
73
+ "eval_logps/rejected": -4464.38623046875,
74
+ "eval_loss": 209.9661102294922,
75
+ "eval_rewards/accuracies": 0.3250929117202759,
76
+ "eval_rewards/chosen": -555.6686401367188,
77
+ "eval_rewards/margins": -142.71261596679688,
78
+ "eval_rewards/rejected": -412.95599365234375,
79
+ "eval_runtime": 1394.4098,
80
+ "eval_samples_per_second": 13.895,
81
+ "eval_steps_per_second": 13.895,
82
+ "step": 800
83
  },
84
  {
85
+ "epoch": 0.27869709109911167,
86
+ "grad_norm": 13.875,
87
+ "learning_rate": 8.207310338033391e-06,
88
+ "loss": 1.5456,
89
+ "step": 900
90
  },
91
  {
92
  "epoch": 0.3096634345545685,
93
+ "grad_norm": 40.25,
94
+ "learning_rate": 7.819660941592014e-06,
95
+ "loss": 1.1894,
 
 
 
 
 
 
 
 
 
96
  "step": 1000
97
  },
98
  {
99
  "epoch": 0.34062977801002536,
100
+ "grad_norm": 40.25,
101
+ "learning_rate": 7.405358355437272e-06,
102
+ "loss": 1.4514,
103
  "step": 1100
104
  },
105
  {
106
  "epoch": 0.3715961214654822,
107
+ "grad_norm": 10.375,
108
+ "learning_rate": 6.968318825407323e-06,
109
+ "loss": 1.1933,
110
  "step": 1200
111
  },
112
  {
113
  "epoch": 0.40256246492093906,
114
+ "grad_norm": 28.125,
115
+ "learning_rate": 6.512673521081566e-06,
116
+ "loss": 1.3781,
117
  "step": 1300
118
  },
119
  {
120
  "epoch": 0.4335288083763959,
121
+ "grad_norm": 10.75,
122
+ "learning_rate": 6.042729485395221e-06,
123
+ "loss": 1.219,
124
  "step": 1400
125
  },
126
  {
127
  "epoch": 0.46449515183185275,
128
+ "grad_norm": 13.4375,
129
+ "learning_rate": 5.562928921789507e-06,
130
+ "loss": 1.3677,
131
  "step": 1500
132
  },
133
  {
134
+ "epoch": 0.4954614952873096,
135
+ "grad_norm": 20.5,
136
+ "learning_rate": 5.077807203740619e-06,
137
+ "loss": 2.2353,
138
+ "step": 1600
 
 
 
 
 
 
 
 
 
139
  },
140
  {
141
  "epoch": 0.4954614952873096,
142
+ "eval_logits/chosen": NaN,
143
+ "eval_logits/rejected": NaN,
144
+ "eval_logps/chosen": -6157.478515625,
145
+ "eval_logps/rejected": -4588.53955078125,
146
+ "eval_loss": 214.6779022216797,
147
+ "eval_rewards/accuracies": 0.3266928195953369,
148
+ "eval_rewards/chosen": -570.3768310546875,
149
+ "eval_rewards/margins": -145.00547790527344,
150
+ "eval_rewards/rejected": -425.37127685546875,
151
+ "eval_runtime": 2581.7706,
152
+ "eval_samples_per_second": 7.505,
153
+ "eval_steps_per_second": 7.505,
154
  "step": 1600
155
  },
156
  {
157
  "epoch": 0.5264278387427664,
158
+ "grad_norm": 7.25,
159
+ "learning_rate": 4.591950003587562e-06,
160
+ "loss": 1.754,
161
  "step": 1700
162
  },
163
  {
164
  "epoch": 0.5573941821982233,
165
+ "grad_norm": 10.6875,
166
+ "learning_rate": 4.109949945903833e-06,
167
+ "loss": 1.6524,
168
  "step": 1800
169
  },
170
  {
171
  "epoch": 0.5883605256536801,
172
+ "grad_norm": 12.9375,
173
+ "learning_rate": 3.636363195152255e-06,
174
+ "loss": 1.2557,
175
  "step": 1900
176
  },
177
  {
178
  "epoch": 0.619326869109137,
179
+ "grad_norm": 27.5,
180
+ "learning_rate": 3.1756663879834735e-06,
181
+ "loss": 1.2763,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  "step": 2000
183
  },
184
  {
185
  "epoch": 0.6502932125645938,
186
+ "grad_norm": 18.25,
187
+ "learning_rate": 2.732214317280802e-06,
188
+ "loss": 1.7662,
189
  "step": 2100
190
  },
191
  {
192
  "epoch": 0.6812595560200507,
193
+ "grad_norm": 24.25,
194
+ "learning_rate": 2.3101987679481918e-06,
195
+ "loss": 1.7935,
196
  "step": 2200
197
  },
198
  {
199
  "epoch": 0.7122258994755075,
200
+ "grad_norm": 22.375,
201
+ "learning_rate": 1.913608893551036e-06,
202
+ "loss": 1.6459,
203
  "step": 2300
204
  },
205
  {
206
  "epoch": 0.7431922429309644,
207
+ "grad_norm": 32.75,
208
+ "learning_rate": 1.5461935083544755e-06,
209
+ "loss": 1.4079,
210
  "step": 2400
211
  },
212
  {
213
+ "epoch": 0.7431922429309644,
214
+ "eval_logits/chosen": NaN,
215
+ "eval_logits/rejected": NaN,
216
+ "eval_logps/chosen": -6187.34716796875,
217
+ "eval_logps/rejected": -4600.21826171875,
218
+ "eval_loss": 216.61477661132812,
219
+ "eval_rewards/accuracies": 0.3254541754722595,
220
+ "eval_rewards/chosen": -573.3637084960938,
221
+ "eval_rewards/margins": -146.82444763183594,
222
+ "eval_rewards/rejected": -426.5391845703125,
223
+ "eval_runtime": 2856.8785,
224
+ "eval_samples_per_second": 6.782,
225
+ "eval_steps_per_second": 6.782,
226
+ "step": 2400
227
  },
228
  {
229
  "epoch": 0.7741585863864212,
230
+ "grad_norm": 16.375,
231
+ "learning_rate": 1.2114256511983274e-06,
232
+ "loss": 1.2724,
 
 
 
 
 
 
 
 
 
233
  "step": 2500
234
  },
235
  {
236
  "epoch": 0.8051249298418781,
237
+ "grad_norm": 12.6875,
238
+ "learning_rate": 9.124697561729073e-07,
239
+ "loss": 1.5263,
240
  "step": 2600
241
  },
242
  {
243
  "epoch": 0.836091273297335,
244
+ "grad_norm": 12.75,
245
+ "learning_rate": 6.521517404190009e-07,
246
+ "loss": 1.6869,
247
  "step": 2700
248
  },
249
  {
250
  "epoch": 0.8670576167527918,
251
+ "grad_norm": 11.625,
252
+ "learning_rate": 4.3293229180065233e-07,
253
+ "loss": 1.4792,
254
  "step": 2800
255
  },
256
  {
257
  "epoch": 0.8980239602082487,
258
+ "grad_norm": 9.0,
259
+ "learning_rate": 2.5688360895234796e-07,
260
+ "loss": 1.3264,
261
  "step": 2900
262
  },
263
  {
264
  "epoch": 0.9289903036637055,
265
+ "grad_norm": 25.625,
266
+ "learning_rate": 1.256698135681289e-07,
267
+ "loss": 1.3937,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  "step": 3000
269
  },
270
  {
271
  "epoch": 0.9599566471191624,
272
+ "grad_norm": 21.25,
273
+ "learning_rate": 4.05312200878627e-08,
274
+ "loss": 1.6048,
275
  "step": 3100
276
  },
277
  {
278
  "epoch": 0.9909229905746192,
279
+ "grad_norm": 36.0,
280
+ "learning_rate": 2.272611473388975e-09,
281
+ "loss": 1.2954,
282
  "step": 3200
283
  },
284
  {
285
+ "epoch": 0.9909229905746192,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  "eval_logits/chosen": NaN,
287
  "eval_logits/rejected": NaN,
288
+ "eval_logps/chosen": -6186.20751953125,
289
+ "eval_logps/rejected": -4598.9970703125,
290
+ "eval_loss": 216.60252380371094,
291
+ "eval_rewards/accuracies": 0.3249380588531494,
292
+ "eval_rewards/chosen": -573.2498168945312,
293
+ "eval_rewards/margins": -146.83267211914062,
294
+ "eval_rewards/rejected": -426.41705322265625,
295
+ "eval_runtime": 1367.7925,
296
+ "eval_samples_per_second": 14.166,
297
+ "eval_steps_per_second": 14.166,
298
+ "step": 3200
 
 
 
 
 
 
 
299
  }
300
  ],
301
  "logging_steps": 100,
302
+ "max_steps": 3230,
303
  "num_input_tokens_seen": 0,
304
+ "num_train_epochs": 1,
305
+ "save_steps": 800,
306
  "stateful_callbacks": {
307
  "TrainerControl": {
308
  "args": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34b488da56bb162b28a10a08d84bea316241bb1080116bef2e50ef4a4c3ea7f4
3
- size 6609
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03c1fc65975d3429b9d2a5590beb102c89b663c463a8376f5d3653c19966cdbc
3
+ size 6545