{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 33, "global_step": 167, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.059880239520958084, "grad_norm": 12.36841869354248, "learning_rate": 9.936708860759493e-05, "logits/chosen": -0.735156238079071, "logits/rejected": -0.6986328363418579, "logps/chosen": -19.024999618530273, "logps/rejected": -28.543750762939453, "loss": 0.5971, "nll_loss": 0.20131835341453552, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": 0.01967773400247097, "rewards/margins": 0.14492186903953552, "rewards/rejected": -0.12529297173023224, "step": 10 }, { "epoch": 0.11976047904191617, "grad_norm": 8.02683162689209, "learning_rate": 9.303797468354431e-05, "logits/chosen": -0.865039050579071, "logits/rejected": -0.849804699420929, "logps/chosen": -17.296875, "logps/rejected": -28.184375762939453, "loss": 0.547, "nll_loss": 0.18623046576976776, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.09682617336511612, "rewards/margins": 0.814404308795929, "rewards/rejected": -0.91064453125, "step": 20 }, { "epoch": 0.17964071856287425, "grad_norm": 10.350639343261719, "learning_rate": 8.670886075949367e-05, "logits/chosen": -0.9947265386581421, "logits/rejected": -0.942187488079071, "logps/chosen": -18.146875381469727, "logps/rejected": -30.168750762939453, "loss": 0.5183, "nll_loss": 0.19204100966453552, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.14462891221046448, "rewards/margins": 1.4348633289337158, "rewards/rejected": -1.5797851085662842, "step": 30 }, { "epoch": 0.19760479041916168, "eval_logits/chosen": -1.0199424028396606, "eval_logits/rejected": -0.9895148277282715, "eval_logps/chosen": -18.636512756347656, "eval_logps/rejected": -31.595394134521484, "eval_loss": 0.5152081847190857, "eval_nll_loss": 0.19370631873607635, "eval_rewards/accuracies": 0.6973684430122375, "eval_rewards/chosen": -0.248046875, "eval_rewards/margins": 1.5133634805679321, "eval_rewards/rejected": -1.7625411748886108, "eval_runtime": 37.7687, "eval_samples_per_second": 7.89, "eval_steps_per_second": 1.006, "step": 33 }, { "epoch": 0.23952095808383234, "grad_norm": 12.673577308654785, "learning_rate": 8.037974683544304e-05, "logits/chosen": -1.006250023841858, "logits/rejected": -0.9486328363418579, "logps/chosen": -18.325000762939453, "logps/rejected": -30.96875, "loss": 0.5276, "nll_loss": 0.19785156846046448, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36396485567092896, "rewards/margins": 1.570214867591858, "rewards/rejected": -1.9357421398162842, "step": 40 }, { "epoch": 0.2994011976047904, "grad_norm": 14.858013153076172, "learning_rate": 7.40506329113924e-05, "logits/chosen": -0.971484363079071, "logits/rejected": -0.9488281011581421, "logps/chosen": -19.975000381469727, "logps/rejected": -30.5, "loss": 0.5554, "nll_loss": 0.21669921278953552, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.32304686307907104, "rewards/margins": 1.359960913658142, "rewards/rejected": -1.684472680091858, "step": 50 }, { "epoch": 0.3592814371257485, "grad_norm": 13.94198226928711, "learning_rate": 6.772151898734177e-05, "logits/chosen": -0.9164062738418579, "logits/rejected": -0.895703136920929, "logps/chosen": -19.456249237060547, "logps/rejected": -31.3125, "loss": 0.5578, "nll_loss": 0.20249024033546448, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.30244141817092896, "rewards/margins": 1.196874976158142, "rewards/rejected": -1.50048828125, "step": 60 }, { "epoch": 0.39520958083832336, "eval_logits/chosen": -0.8888774514198303, "eval_logits/rejected": -0.8612253069877625, "eval_logps/chosen": -18.394737243652344, "eval_logps/rejected": -30.789474487304688, "eval_loss": 0.46207094192504883, "eval_nll_loss": 0.1904296875, "eval_rewards/accuracies": 0.7532894611358643, "eval_rewards/chosen": -0.1289319545030594, "eval_rewards/margins": 1.2322677373886108, "eval_rewards/rejected": -1.3603515625, "eval_runtime": 37.6185, "eval_samples_per_second": 7.922, "eval_steps_per_second": 1.01, "step": 66 }, { "epoch": 0.41916167664670656, "grad_norm": 10.616917610168457, "learning_rate": 6.139240506329115e-05, "logits/chosen": -0.826953113079071, "logits/rejected": -0.830273449420929, "logps/chosen": -20.596874237060547, "logps/rejected": -29.740625381469727, "loss": 0.5122, "nll_loss": 0.20893554389476776, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.11264648288488388, "rewards/margins": 1.2311522960662842, "rewards/rejected": -1.3449218273162842, "step": 70 }, { "epoch": 0.47904191616766467, "grad_norm": 11.696365356445312, "learning_rate": 5.5063291139240514e-05, "logits/chosen": -0.82421875, "logits/rejected": -0.8033202886581421, "logps/chosen": -19.581249237060547, "logps/rejected": -32.271873474121094, "loss": 0.5047, "nll_loss": 0.19770507514476776, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20634765923023224, "rewards/margins": 1.2736327648162842, "rewards/rejected": -1.4802734851837158, "step": 80 }, { "epoch": 0.5389221556886228, "grad_norm": 11.490265846252441, "learning_rate": 4.8734177215189874e-05, "logits/chosen": -0.863476574420929, "logits/rejected": -0.841015636920929, "logps/chosen": -19.484375, "logps/rejected": -33.95624923706055, "loss": 0.4647, "nll_loss": 0.20292969048023224, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.24423828721046448, "rewards/margins": 1.6508300304412842, "rewards/rejected": -1.8943359851837158, "step": 90 }, { "epoch": 0.592814371257485, "eval_logits/chosen": -0.9116981625556946, "eval_logits/rejected": -0.8838404417037964, "eval_logps/chosen": -18.429275512695312, "eval_logps/rejected": -31.63157844543457, "eval_loss": 0.43091967701911926, "eval_nll_loss": 0.19037829339504242, "eval_rewards/accuracies": 0.7532894611358643, "eval_rewards/chosen": -0.15003083646297455, "eval_rewards/margins": 1.6260793209075928, "eval_rewards/rejected": -1.7764699459075928, "eval_runtime": 37.7297, "eval_samples_per_second": 7.898, "eval_steps_per_second": 1.007, "step": 99 }, { "epoch": 0.5988023952095808, "grad_norm": 8.535696983337402, "learning_rate": 4.240506329113924e-05, "logits/chosen": -0.848828136920929, "logits/rejected": -0.8291015625, "logps/chosen": -18.943750381469727, "logps/rejected": -29.356250762939453, "loss": 0.4623, "nll_loss": 0.19814452528953552, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10761718451976776, "rewards/margins": 1.6233398914337158, "rewards/rejected": -1.7314453125, "step": 100 }, { "epoch": 0.6586826347305389, "grad_norm": 5.726072311401367, "learning_rate": 3.607594936708861e-05, "logits/chosen": -0.8636718988418579, "logits/rejected": -0.866992175579071, "logps/chosen": -20.371875762939453, "logps/rejected": -31.131250381469727, "loss": 0.477, "nll_loss": 0.212890625, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.22114257514476776, "rewards/margins": 1.5869140625, "rewards/rejected": -1.808203101158142, "step": 110 }, { "epoch": 0.718562874251497, "grad_norm": 6.890474796295166, "learning_rate": 2.9746835443037974e-05, "logits/chosen": -0.9125000238418579, "logits/rejected": -0.9126952886581421, "logps/chosen": -17.600000381469727, "logps/rejected": -33.837501525878906, "loss": 0.4372, "nll_loss": 0.185546875, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.20854492485523224, "rewards/margins": 2.422656297683716, "rewards/rejected": -2.6328125, "step": 120 }, { "epoch": 0.7784431137724551, "grad_norm": 7.191044807434082, "learning_rate": 2.341772151898734e-05, "logits/chosen": -0.93359375, "logits/rejected": -0.8980468511581421, "logps/chosen": -16.762500762939453, "logps/rejected": -30.174999237060547, "loss": 0.4398, "nll_loss": 0.18398436903953552, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.18193359673023224, "rewards/margins": 1.81787109375, "rewards/rejected": -2.0015625953674316, "step": 130 }, { "epoch": 0.7904191616766467, "eval_logits/chosen": -0.9958881735801697, "eval_logits/rejected": -0.9672080874443054, "eval_logps/chosen": -18.569900512695312, "eval_logps/rejected": -32.644737243652344, "eval_loss": 0.41646456718444824, "eval_nll_loss": 0.19179172813892365, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -0.2185187041759491, "eval_rewards/margins": 2.0729339122772217, "eval_rewards/rejected": -2.2922492027282715, "eval_runtime": 37.9044, "eval_samples_per_second": 7.862, "eval_steps_per_second": 1.003, "step": 132 }, { "epoch": 0.8383233532934131, "grad_norm": 10.231584548950195, "learning_rate": 1.7088607594936708e-05, "logits/chosen": -0.9898437261581421, "logits/rejected": -0.976757824420929, "logps/chosen": -17.987499237060547, "logps/rejected": -31.737499237060547, "loss": 0.3967, "nll_loss": 0.19194336235523224, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.28300780057907104, "rewards/margins": 1.9914062023162842, "rewards/rejected": -2.274218797683716, "step": 140 }, { "epoch": 0.8982035928143712, "grad_norm": 8.393524169921875, "learning_rate": 1.0759493670886076e-05, "logits/chosen": -0.975390613079071, "logits/rejected": -0.953906238079071, "logps/chosen": -19.209375381469727, "logps/rejected": -33.400001525878906, "loss": 0.5022, "nll_loss": 0.20380859076976776, "rewards/accuracies": 0.71875, "rewards/chosen": -0.4892578125, "rewards/margins": 1.9375, "rewards/rejected": -2.4291014671325684, "step": 150 }, { "epoch": 0.9580838323353293, "grad_norm": 10.486552238464355, "learning_rate": 4.430379746835443e-06, "logits/chosen": -0.991992175579071, "logits/rejected": -0.948046863079071, "logps/chosen": -19.268749237060547, "logps/rejected": -33.556251525878906, "loss": 0.5564, "nll_loss": 0.206787109375, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.40800780057907104, "rewards/margins": 1.840429663658142, "rewards/rejected": -2.244873046875, "step": 160 }, { "epoch": 0.9880239520958084, "eval_logits/chosen": -1.0091488361358643, "eval_logits/rejected": -0.98046875, "eval_logps/chosen": -18.596216201782227, "eval_logps/rejected": -32.71381759643555, "eval_loss": 0.41747432947158813, "eval_nll_loss": 0.19197162985801697, "eval_rewards/accuracies": 0.7730262875556946, "eval_rewards/chosen": -0.22913239896297455, "eval_rewards/margins": 2.0992496013641357, "eval_rewards/rejected": -2.329050064086914, "eval_runtime": 37.8726, "eval_samples_per_second": 7.868, "eval_steps_per_second": 1.003, "step": 165 }, { "epoch": 1.0, "step": 167, "total_flos": 0.0, "train_loss": 0.5023956755678097, "train_runtime": 947.2222, "train_samples_per_second": 2.816, "train_steps_per_second": 0.176 } ], "logging_steps": 10, "max_steps": 167, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 33, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }