M3_model_contra / trainer_state.json
Marta102's picture
checkpoint 3230 adaptive
5bf7843 verified
Invalid JSON:Unexpected token 'N', ..."/chosen": NaN, "... is not valid JSON
{
"best_global_step": 800,
"best_metric": 209.9661102294922,
"best_model_checkpoint": "final-model-dpo-ad-1ep/checkpoint-800",
"epoch": 1.0,
"eval_steps": 800,
"global_step": 3230,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03096634345545685,
"grad_norm": 4.53125,
"learning_rate": 9.976838348954221e-06,
"loss": 5.5573,
"step": 100
},
{
"epoch": 0.0619326869109137,
"grad_norm": 4.15625,
"learning_rate": 9.906634890087323e-06,
"loss": 0.9191,
"step": 200
},
{
"epoch": 0.09289903036637055,
"grad_norm": 4.96875,
"learning_rate": 9.790050865156384e-06,
"loss": 1.0287,
"step": 300
},
{
"epoch": 0.1238653738218274,
"grad_norm": 11.625,
"learning_rate": 9.628188298907782e-06,
"loss": 1.0742,
"step": 400
},
{
"epoch": 0.15483171727728426,
"grad_norm": 8.6875,
"learning_rate": 9.422577217034351e-06,
"loss": 1.2919,
"step": 500
},
{
"epoch": 0.1857980607327411,
"grad_norm": 16.75,
"learning_rate": 9.175161183420499e-06,
"loss": 0.9978,
"step": 600
},
{
"epoch": 0.21676440418819795,
"grad_norm": 5.1875,
"learning_rate": 8.888278928367003e-06,
"loss": 1.0586,
"step": 700
},
{
"epoch": 0.2477307476436548,
"grad_norm": 15.4375,
"learning_rate": 8.564642241456986e-06,
"loss": 1.4034,
"step": 800
},
{
"epoch": 0.2477307476436548,
"eval_logits/chosen": NaN,
"eval_logits/rejected": NaN,
"eval_logps/chosen": -6010.39697265625,
"eval_logps/rejected": -4464.38623046875,
"eval_loss": 209.9661102294922,
"eval_rewards/accuracies": 0.3250929117202759,
"eval_rewards/chosen": -555.6686401367188,
"eval_rewards/margins": -142.71261596679688,
"eval_rewards/rejected": -412.95599365234375,
"eval_runtime": 1394.4098,
"eval_samples_per_second": 13.895,
"eval_steps_per_second": 13.895,
"step": 800
},
{
"epoch": 0.27869709109911167,
"grad_norm": 13.875,
"learning_rate": 8.207310338033391e-06,
"loss": 1.5456,
"step": 900
},
{
"epoch": 0.3096634345545685,
"grad_norm": 40.25,
"learning_rate": 7.819660941592014e-06,
"loss": 1.1894,
"step": 1000
},
{
"epoch": 0.34062977801002536,
"grad_norm": 40.25,
"learning_rate": 7.405358355437272e-06,
"loss": 1.4514,
"step": 1100
},
{
"epoch": 0.3715961214654822,
"grad_norm": 10.375,
"learning_rate": 6.968318825407323e-06,
"loss": 1.1933,
"step": 1200
},
{
"epoch": 0.40256246492093906,
"grad_norm": 28.125,
"learning_rate": 6.512673521081566e-06,
"loss": 1.3781,
"step": 1300
},
{
"epoch": 0.4335288083763959,
"grad_norm": 10.75,
"learning_rate": 6.042729485395221e-06,
"loss": 1.219,
"step": 1400
},
{
"epoch": 0.46449515183185275,
"grad_norm": 13.4375,
"learning_rate": 5.562928921789507e-06,
"loss": 1.3677,
"step": 1500
},
{
"epoch": 0.4954614952873096,
"grad_norm": 20.5,
"learning_rate": 5.077807203740619e-06,
"loss": 2.2353,
"step": 1600
},
{
"epoch": 0.4954614952873096,
"eval_logits/chosen": NaN,
"eval_logits/rejected": NaN,
"eval_logps/chosen": -6157.478515625,
"eval_logps/rejected": -4588.53955078125,
"eval_loss": 214.6779022216797,
"eval_rewards/accuracies": 0.3266928195953369,
"eval_rewards/chosen": -570.3768310546875,
"eval_rewards/margins": -145.00547790527344,
"eval_rewards/rejected": -425.37127685546875,
"eval_runtime": 2581.7706,
"eval_samples_per_second": 7.505,
"eval_steps_per_second": 7.505,
"step": 1600
},
{
"epoch": 0.5264278387427664,
"grad_norm": 7.25,
"learning_rate": 4.591950003587562e-06,
"loss": 1.754,
"step": 1700
},
{
"epoch": 0.5573941821982233,
"grad_norm": 10.6875,
"learning_rate": 4.109949945903833e-06,
"loss": 1.6524,
"step": 1800
},
{
"epoch": 0.5883605256536801,
"grad_norm": 12.9375,
"learning_rate": 3.636363195152255e-06,
"loss": 1.2557,
"step": 1900
},
{
"epoch": 0.619326869109137,
"grad_norm": 27.5,
"learning_rate": 3.1756663879834735e-06,
"loss": 1.2763,
"step": 2000
},
{
"epoch": 0.6502932125645938,
"grad_norm": 18.25,
"learning_rate": 2.732214317280802e-06,
"loss": 1.7662,
"step": 2100
},
{
"epoch": 0.6812595560200507,
"grad_norm": 24.25,
"learning_rate": 2.3101987679481918e-06,
"loss": 1.7935,
"step": 2200
},
{
"epoch": 0.7122258994755075,
"grad_norm": 22.375,
"learning_rate": 1.913608893551036e-06,
"loss": 1.6459,
"step": 2300
},
{
"epoch": 0.7431922429309644,
"grad_norm": 32.75,
"learning_rate": 1.5461935083544755e-06,
"loss": 1.4079,
"step": 2400
},
{
"epoch": 0.7431922429309644,
"eval_logits/chosen": NaN,
"eval_logits/rejected": NaN,
"eval_logps/chosen": -6187.34716796875,
"eval_logps/rejected": -4600.21826171875,
"eval_loss": 216.61477661132812,
"eval_rewards/accuracies": 0.3254541754722595,
"eval_rewards/chosen": -573.3637084960938,
"eval_rewards/margins": -146.82444763183594,
"eval_rewards/rejected": -426.5391845703125,
"eval_runtime": 2856.8785,
"eval_samples_per_second": 6.782,
"eval_steps_per_second": 6.782,
"step": 2400
},
{
"epoch": 0.7741585863864212,
"grad_norm": 16.375,
"learning_rate": 1.2114256511983274e-06,
"loss": 1.2724,
"step": 2500
},
{
"epoch": 0.8051249298418781,
"grad_norm": 12.6875,
"learning_rate": 9.124697561729073e-07,
"loss": 1.5263,
"step": 2600
},
{
"epoch": 0.836091273297335,
"grad_norm": 12.75,
"learning_rate": 6.521517404190009e-07,
"loss": 1.6869,
"step": 2700
},
{
"epoch": 0.8670576167527918,
"grad_norm": 11.625,
"learning_rate": 4.3293229180065233e-07,
"loss": 1.4792,
"step": 2800
},
{
"epoch": 0.8980239602082487,
"grad_norm": 9.0,
"learning_rate": 2.5688360895234796e-07,
"loss": 1.3264,
"step": 2900
},
{
"epoch": 0.9289903036637055,
"grad_norm": 25.625,
"learning_rate": 1.256698135681289e-07,
"loss": 1.3937,
"step": 3000
},
{
"epoch": 0.9599566471191624,
"grad_norm": 21.25,
"learning_rate": 4.05312200878627e-08,
"loss": 1.6048,
"step": 3100
},
{
"epoch": 0.9909229905746192,
"grad_norm": 36.0,
"learning_rate": 2.272611473388975e-09,
"loss": 1.2954,
"step": 3200
},
{
"epoch": 0.9909229905746192,
"eval_logits/chosen": NaN,
"eval_logits/rejected": NaN,
"eval_logps/chosen": -6186.20751953125,
"eval_logps/rejected": -4598.9970703125,
"eval_loss": 216.60252380371094,
"eval_rewards/accuracies": 0.3249380588531494,
"eval_rewards/chosen": -573.2498168945312,
"eval_rewards/margins": -146.83267211914062,
"eval_rewards/rejected": -426.41705322265625,
"eval_runtime": 1367.7925,
"eval_samples_per_second": 14.166,
"eval_steps_per_second": 14.166,
"step": 3200
}
],
"logging_steps": 100,
"max_steps": 3230,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 800,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}