File size: 13,248 Bytes
c050228 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | {
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 33,
"global_step": 167,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.059880239520958084,
"grad_norm": 12.36841869354248,
"learning_rate": 9.936708860759493e-05,
"logits/chosen": -0.735156238079071,
"logits/rejected": -0.6986328363418579,
"logps/chosen": -19.024999618530273,
"logps/rejected": -28.543750762939453,
"loss": 0.5971,
"nll_loss": 0.20131835341453552,
"rewards/accuracies": 0.4937500059604645,
"rewards/chosen": 0.01967773400247097,
"rewards/margins": 0.14492186903953552,
"rewards/rejected": -0.12529297173023224,
"step": 10
},
{
"epoch": 0.11976047904191617,
"grad_norm": 8.02683162689209,
"learning_rate": 9.303797468354431e-05,
"logits/chosen": -0.865039050579071,
"logits/rejected": -0.849804699420929,
"logps/chosen": -17.296875,
"logps/rejected": -28.184375762939453,
"loss": 0.547,
"nll_loss": 0.18623046576976776,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.09682617336511612,
"rewards/margins": 0.814404308795929,
"rewards/rejected": -0.91064453125,
"step": 20
},
{
"epoch": 0.17964071856287425,
"grad_norm": 10.350639343261719,
"learning_rate": 8.670886075949367e-05,
"logits/chosen": -0.9947265386581421,
"logits/rejected": -0.942187488079071,
"logps/chosen": -18.146875381469727,
"logps/rejected": -30.168750762939453,
"loss": 0.5183,
"nll_loss": 0.19204100966453552,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.14462891221046448,
"rewards/margins": 1.4348633289337158,
"rewards/rejected": -1.5797851085662842,
"step": 30
},
{
"epoch": 0.19760479041916168,
"eval_logits/chosen": -1.0199424028396606,
"eval_logits/rejected": -0.9895148277282715,
"eval_logps/chosen": -18.636512756347656,
"eval_logps/rejected": -31.595394134521484,
"eval_loss": 0.5152081847190857,
"eval_nll_loss": 0.19370631873607635,
"eval_rewards/accuracies": 0.6973684430122375,
"eval_rewards/chosen": -0.248046875,
"eval_rewards/margins": 1.5133634805679321,
"eval_rewards/rejected": -1.7625411748886108,
"eval_runtime": 37.7687,
"eval_samples_per_second": 7.89,
"eval_steps_per_second": 1.006,
"step": 33
},
{
"epoch": 0.23952095808383234,
"grad_norm": 12.673577308654785,
"learning_rate": 8.037974683544304e-05,
"logits/chosen": -1.006250023841858,
"logits/rejected": -0.9486328363418579,
"logps/chosen": -18.325000762939453,
"logps/rejected": -30.96875,
"loss": 0.5276,
"nll_loss": 0.19785156846046448,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.36396485567092896,
"rewards/margins": 1.570214867591858,
"rewards/rejected": -1.9357421398162842,
"step": 40
},
{
"epoch": 0.2994011976047904,
"grad_norm": 14.858013153076172,
"learning_rate": 7.40506329113924e-05,
"logits/chosen": -0.971484363079071,
"logits/rejected": -0.9488281011581421,
"logps/chosen": -19.975000381469727,
"logps/rejected": -30.5,
"loss": 0.5554,
"nll_loss": 0.21669921278953552,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.32304686307907104,
"rewards/margins": 1.359960913658142,
"rewards/rejected": -1.684472680091858,
"step": 50
},
{
"epoch": 0.3592814371257485,
"grad_norm": 13.94198226928711,
"learning_rate": 6.772151898734177e-05,
"logits/chosen": -0.9164062738418579,
"logits/rejected": -0.895703136920929,
"logps/chosen": -19.456249237060547,
"logps/rejected": -31.3125,
"loss": 0.5578,
"nll_loss": 0.20249024033546448,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.30244141817092896,
"rewards/margins": 1.196874976158142,
"rewards/rejected": -1.50048828125,
"step": 60
},
{
"epoch": 0.39520958083832336,
"eval_logits/chosen": -0.8888774514198303,
"eval_logits/rejected": -0.8612253069877625,
"eval_logps/chosen": -18.394737243652344,
"eval_logps/rejected": -30.789474487304688,
"eval_loss": 0.46207094192504883,
"eval_nll_loss": 0.1904296875,
"eval_rewards/accuracies": 0.7532894611358643,
"eval_rewards/chosen": -0.1289319545030594,
"eval_rewards/margins": 1.2322677373886108,
"eval_rewards/rejected": -1.3603515625,
"eval_runtime": 37.6185,
"eval_samples_per_second": 7.922,
"eval_steps_per_second": 1.01,
"step": 66
},
{
"epoch": 0.41916167664670656,
"grad_norm": 10.616917610168457,
"learning_rate": 6.139240506329115e-05,
"logits/chosen": -0.826953113079071,
"logits/rejected": -0.830273449420929,
"logps/chosen": -20.596874237060547,
"logps/rejected": -29.740625381469727,
"loss": 0.5122,
"nll_loss": 0.20893554389476776,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.11264648288488388,
"rewards/margins": 1.2311522960662842,
"rewards/rejected": -1.3449218273162842,
"step": 70
},
{
"epoch": 0.47904191616766467,
"grad_norm": 11.696365356445312,
"learning_rate": 5.5063291139240514e-05,
"logits/chosen": -0.82421875,
"logits/rejected": -0.8033202886581421,
"logps/chosen": -19.581249237060547,
"logps/rejected": -32.271873474121094,
"loss": 0.5047,
"nll_loss": 0.19770507514476776,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.20634765923023224,
"rewards/margins": 1.2736327648162842,
"rewards/rejected": -1.4802734851837158,
"step": 80
},
{
"epoch": 0.5389221556886228,
"grad_norm": 11.490265846252441,
"learning_rate": 4.8734177215189874e-05,
"logits/chosen": -0.863476574420929,
"logits/rejected": -0.841015636920929,
"logps/chosen": -19.484375,
"logps/rejected": -33.95624923706055,
"loss": 0.4647,
"nll_loss": 0.20292969048023224,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.24423828721046448,
"rewards/margins": 1.6508300304412842,
"rewards/rejected": -1.8943359851837158,
"step": 90
},
{
"epoch": 0.592814371257485,
"eval_logits/chosen": -0.9116981625556946,
"eval_logits/rejected": -0.8838404417037964,
"eval_logps/chosen": -18.429275512695312,
"eval_logps/rejected": -31.63157844543457,
"eval_loss": 0.43091967701911926,
"eval_nll_loss": 0.19037829339504242,
"eval_rewards/accuracies": 0.7532894611358643,
"eval_rewards/chosen": -0.15003083646297455,
"eval_rewards/margins": 1.6260793209075928,
"eval_rewards/rejected": -1.7764699459075928,
"eval_runtime": 37.7297,
"eval_samples_per_second": 7.898,
"eval_steps_per_second": 1.007,
"step": 99
},
{
"epoch": 0.5988023952095808,
"grad_norm": 8.535696983337402,
"learning_rate": 4.240506329113924e-05,
"logits/chosen": -0.848828136920929,
"logits/rejected": -0.8291015625,
"logps/chosen": -18.943750381469727,
"logps/rejected": -29.356250762939453,
"loss": 0.4623,
"nll_loss": 0.19814452528953552,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.10761718451976776,
"rewards/margins": 1.6233398914337158,
"rewards/rejected": -1.7314453125,
"step": 100
},
{
"epoch": 0.6586826347305389,
"grad_norm": 5.726072311401367,
"learning_rate": 3.607594936708861e-05,
"logits/chosen": -0.8636718988418579,
"logits/rejected": -0.866992175579071,
"logps/chosen": -20.371875762939453,
"logps/rejected": -31.131250381469727,
"loss": 0.477,
"nll_loss": 0.212890625,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.22114257514476776,
"rewards/margins": 1.5869140625,
"rewards/rejected": -1.808203101158142,
"step": 110
},
{
"epoch": 0.718562874251497,
"grad_norm": 6.890474796295166,
"learning_rate": 2.9746835443037974e-05,
"logits/chosen": -0.9125000238418579,
"logits/rejected": -0.9126952886581421,
"logps/chosen": -17.600000381469727,
"logps/rejected": -33.837501525878906,
"loss": 0.4372,
"nll_loss": 0.185546875,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.20854492485523224,
"rewards/margins": 2.422656297683716,
"rewards/rejected": -2.6328125,
"step": 120
},
{
"epoch": 0.7784431137724551,
"grad_norm": 7.191044807434082,
"learning_rate": 2.341772151898734e-05,
"logits/chosen": -0.93359375,
"logits/rejected": -0.8980468511581421,
"logps/chosen": -16.762500762939453,
"logps/rejected": -30.174999237060547,
"loss": 0.4398,
"nll_loss": 0.18398436903953552,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.18193359673023224,
"rewards/margins": 1.81787109375,
"rewards/rejected": -2.0015625953674316,
"step": 130
},
{
"epoch": 0.7904191616766467,
"eval_logits/chosen": -0.9958881735801697,
"eval_logits/rejected": -0.9672080874443054,
"eval_logps/chosen": -18.569900512695312,
"eval_logps/rejected": -32.644737243652344,
"eval_loss": 0.41646456718444824,
"eval_nll_loss": 0.19179172813892365,
"eval_rewards/accuracies": 0.75,
"eval_rewards/chosen": -0.2185187041759491,
"eval_rewards/margins": 2.0729339122772217,
"eval_rewards/rejected": -2.2922492027282715,
"eval_runtime": 37.9044,
"eval_samples_per_second": 7.862,
"eval_steps_per_second": 1.003,
"step": 132
},
{
"epoch": 0.8383233532934131,
"grad_norm": 10.231584548950195,
"learning_rate": 1.7088607594936708e-05,
"logits/chosen": -0.9898437261581421,
"logits/rejected": -0.976757824420929,
"logps/chosen": -17.987499237060547,
"logps/rejected": -31.737499237060547,
"loss": 0.3967,
"nll_loss": 0.19194336235523224,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.28300780057907104,
"rewards/margins": 1.9914062023162842,
"rewards/rejected": -2.274218797683716,
"step": 140
},
{
"epoch": 0.8982035928143712,
"grad_norm": 8.393524169921875,
"learning_rate": 1.0759493670886076e-05,
"logits/chosen": -0.975390613079071,
"logits/rejected": -0.953906238079071,
"logps/chosen": -19.209375381469727,
"logps/rejected": -33.400001525878906,
"loss": 0.5022,
"nll_loss": 0.20380859076976776,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.4892578125,
"rewards/margins": 1.9375,
"rewards/rejected": -2.4291014671325684,
"step": 150
},
{
"epoch": 0.9580838323353293,
"grad_norm": 10.486552238464355,
"learning_rate": 4.430379746835443e-06,
"logits/chosen": -0.991992175579071,
"logits/rejected": -0.948046863079071,
"logps/chosen": -19.268749237060547,
"logps/rejected": -33.556251525878906,
"loss": 0.5564,
"nll_loss": 0.206787109375,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.40800780057907104,
"rewards/margins": 1.840429663658142,
"rewards/rejected": -2.244873046875,
"step": 160
},
{
"epoch": 0.9880239520958084,
"eval_logits/chosen": -1.0091488361358643,
"eval_logits/rejected": -0.98046875,
"eval_logps/chosen": -18.596216201782227,
"eval_logps/rejected": -32.71381759643555,
"eval_loss": 0.41747432947158813,
"eval_nll_loss": 0.19197162985801697,
"eval_rewards/accuracies": 0.7730262875556946,
"eval_rewards/chosen": -0.22913239896297455,
"eval_rewards/margins": 2.0992496013641357,
"eval_rewards/rejected": -2.329050064086914,
"eval_runtime": 37.8726,
"eval_samples_per_second": 7.868,
"eval_steps_per_second": 1.003,
"step": 165
},
{
"epoch": 1.0,
"step": 167,
"total_flos": 0.0,
"train_loss": 0.5023956755678097,
"train_runtime": 947.2222,
"train_samples_per_second": 2.816,
"train_steps_per_second": 0.176
}
],
"logging_steps": 10,
"max_steps": 167,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 33,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|