| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 33, |
| "global_step": 167, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.059880239520958084, |
| "grad_norm": 12.36841869354248, |
| "learning_rate": 9.936708860759493e-05, |
| "logits/chosen": -0.735156238079071, |
| "logits/rejected": -0.6986328363418579, |
| "logps/chosen": -19.024999618530273, |
| "logps/rejected": -28.543750762939453, |
| "loss": 0.5971, |
| "nll_loss": 0.20131835341453552, |
| "rewards/accuracies": 0.4937500059604645, |
| "rewards/chosen": 0.01967773400247097, |
| "rewards/margins": 0.14492186903953552, |
| "rewards/rejected": -0.12529297173023224, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11976047904191617, |
| "grad_norm": 8.02683162689209, |
| "learning_rate": 9.303797468354431e-05, |
| "logits/chosen": -0.865039050579071, |
| "logits/rejected": -0.849804699420929, |
| "logps/chosen": -17.296875, |
| "logps/rejected": -28.184375762939453, |
| "loss": 0.547, |
| "nll_loss": 0.18623046576976776, |
| "rewards/accuracies": 0.6187499761581421, |
| "rewards/chosen": -0.09682617336511612, |
| "rewards/margins": 0.814404308795929, |
| "rewards/rejected": -0.91064453125, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17964071856287425, |
| "grad_norm": 10.350639343261719, |
| "learning_rate": 8.670886075949367e-05, |
| "logits/chosen": -0.9947265386581421, |
| "logits/rejected": -0.942187488079071, |
| "logps/chosen": -18.146875381469727, |
| "logps/rejected": -30.168750762939453, |
| "loss": 0.5183, |
| "nll_loss": 0.19204100966453552, |
| "rewards/accuracies": 0.6812499761581421, |
| "rewards/chosen": -0.14462891221046448, |
| "rewards/margins": 1.4348633289337158, |
| "rewards/rejected": -1.5797851085662842, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.19760479041916168, |
| "eval_logits/chosen": -1.0199424028396606, |
| "eval_logits/rejected": -0.9895148277282715, |
| "eval_logps/chosen": -18.636512756347656, |
| "eval_logps/rejected": -31.595394134521484, |
| "eval_loss": 0.5152081847190857, |
| "eval_nll_loss": 0.19370631873607635, |
| "eval_rewards/accuracies": 0.6973684430122375, |
| "eval_rewards/chosen": -0.248046875, |
| "eval_rewards/margins": 1.5133634805679321, |
| "eval_rewards/rejected": -1.7625411748886108, |
| "eval_runtime": 37.7687, |
| "eval_samples_per_second": 7.89, |
| "eval_steps_per_second": 1.006, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.23952095808383234, |
| "grad_norm": 12.673577308654785, |
| "learning_rate": 8.037974683544304e-05, |
| "logits/chosen": -1.006250023841858, |
| "logits/rejected": -0.9486328363418579, |
| "logps/chosen": -18.325000762939453, |
| "logps/rejected": -30.96875, |
| "loss": 0.5276, |
| "nll_loss": 0.19785156846046448, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.36396485567092896, |
| "rewards/margins": 1.570214867591858, |
| "rewards/rejected": -1.9357421398162842, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2994011976047904, |
| "grad_norm": 14.858013153076172, |
| "learning_rate": 7.40506329113924e-05, |
| "logits/chosen": -0.971484363079071, |
| "logits/rejected": -0.9488281011581421, |
| "logps/chosen": -19.975000381469727, |
| "logps/rejected": -30.5, |
| "loss": 0.5554, |
| "nll_loss": 0.21669921278953552, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.32304686307907104, |
| "rewards/margins": 1.359960913658142, |
| "rewards/rejected": -1.684472680091858, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3592814371257485, |
| "grad_norm": 13.94198226928711, |
| "learning_rate": 6.772151898734177e-05, |
| "logits/chosen": -0.9164062738418579, |
| "logits/rejected": -0.895703136920929, |
| "logps/chosen": -19.456249237060547, |
| "logps/rejected": -31.3125, |
| "loss": 0.5578, |
| "nll_loss": 0.20249024033546448, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.30244141817092896, |
| "rewards/margins": 1.196874976158142, |
| "rewards/rejected": -1.50048828125, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.39520958083832336, |
| "eval_logits/chosen": -0.8888774514198303, |
| "eval_logits/rejected": -0.8612253069877625, |
| "eval_logps/chosen": -18.394737243652344, |
| "eval_logps/rejected": -30.789474487304688, |
| "eval_loss": 0.46207094192504883, |
| "eval_nll_loss": 0.1904296875, |
| "eval_rewards/accuracies": 0.7532894611358643, |
| "eval_rewards/chosen": -0.1289319545030594, |
| "eval_rewards/margins": 1.2322677373886108, |
| "eval_rewards/rejected": -1.3603515625, |
| "eval_runtime": 37.6185, |
| "eval_samples_per_second": 7.922, |
| "eval_steps_per_second": 1.01, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.41916167664670656, |
| "grad_norm": 10.616917610168457, |
| "learning_rate": 6.139240506329115e-05, |
| "logits/chosen": -0.826953113079071, |
| "logits/rejected": -0.830273449420929, |
| "logps/chosen": -20.596874237060547, |
| "logps/rejected": -29.740625381469727, |
| "loss": 0.5122, |
| "nll_loss": 0.20893554389476776, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.11264648288488388, |
| "rewards/margins": 1.2311522960662842, |
| "rewards/rejected": -1.3449218273162842, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.47904191616766467, |
| "grad_norm": 11.696365356445312, |
| "learning_rate": 5.5063291139240514e-05, |
| "logits/chosen": -0.82421875, |
| "logits/rejected": -0.8033202886581421, |
| "logps/chosen": -19.581249237060547, |
| "logps/rejected": -32.271873474121094, |
| "loss": 0.5047, |
| "nll_loss": 0.19770507514476776, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.20634765923023224, |
| "rewards/margins": 1.2736327648162842, |
| "rewards/rejected": -1.4802734851837158, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5389221556886228, |
| "grad_norm": 11.490265846252441, |
| "learning_rate": 4.8734177215189874e-05, |
| "logits/chosen": -0.863476574420929, |
| "logits/rejected": -0.841015636920929, |
| "logps/chosen": -19.484375, |
| "logps/rejected": -33.95624923706055, |
| "loss": 0.4647, |
| "nll_loss": 0.20292969048023224, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.24423828721046448, |
| "rewards/margins": 1.6508300304412842, |
| "rewards/rejected": -1.8943359851837158, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.592814371257485, |
| "eval_logits/chosen": -0.9116981625556946, |
| "eval_logits/rejected": -0.8838404417037964, |
| "eval_logps/chosen": -18.429275512695312, |
| "eval_logps/rejected": -31.63157844543457, |
| "eval_loss": 0.43091967701911926, |
| "eval_nll_loss": 0.19037829339504242, |
| "eval_rewards/accuracies": 0.7532894611358643, |
| "eval_rewards/chosen": -0.15003083646297455, |
| "eval_rewards/margins": 1.6260793209075928, |
| "eval_rewards/rejected": -1.7764699459075928, |
| "eval_runtime": 37.7297, |
| "eval_samples_per_second": 7.898, |
| "eval_steps_per_second": 1.007, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5988023952095808, |
| "grad_norm": 8.535696983337402, |
| "learning_rate": 4.240506329113924e-05, |
| "logits/chosen": -0.848828136920929, |
| "logits/rejected": -0.8291015625, |
| "logps/chosen": -18.943750381469727, |
| "logps/rejected": -29.356250762939453, |
| "loss": 0.4623, |
| "nll_loss": 0.19814452528953552, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.10761718451976776, |
| "rewards/margins": 1.6233398914337158, |
| "rewards/rejected": -1.7314453125, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6586826347305389, |
| "grad_norm": 5.726072311401367, |
| "learning_rate": 3.607594936708861e-05, |
| "logits/chosen": -0.8636718988418579, |
| "logits/rejected": -0.866992175579071, |
| "logps/chosen": -20.371875762939453, |
| "logps/rejected": -31.131250381469727, |
| "loss": 0.477, |
| "nll_loss": 0.212890625, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.22114257514476776, |
| "rewards/margins": 1.5869140625, |
| "rewards/rejected": -1.808203101158142, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.718562874251497, |
| "grad_norm": 6.890474796295166, |
| "learning_rate": 2.9746835443037974e-05, |
| "logits/chosen": -0.9125000238418579, |
| "logits/rejected": -0.9126952886581421, |
| "logps/chosen": -17.600000381469727, |
| "logps/rejected": -33.837501525878906, |
| "loss": 0.4372, |
| "nll_loss": 0.185546875, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -0.20854492485523224, |
| "rewards/margins": 2.422656297683716, |
| "rewards/rejected": -2.6328125, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7784431137724551, |
| "grad_norm": 7.191044807434082, |
| "learning_rate": 2.341772151898734e-05, |
| "logits/chosen": -0.93359375, |
| "logits/rejected": -0.8980468511581421, |
| "logps/chosen": -16.762500762939453, |
| "logps/rejected": -30.174999237060547, |
| "loss": 0.4398, |
| "nll_loss": 0.18398436903953552, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -0.18193359673023224, |
| "rewards/margins": 1.81787109375, |
| "rewards/rejected": -2.0015625953674316, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7904191616766467, |
| "eval_logits/chosen": -0.9958881735801697, |
| "eval_logits/rejected": -0.9672080874443054, |
| "eval_logps/chosen": -18.569900512695312, |
| "eval_logps/rejected": -32.644737243652344, |
| "eval_loss": 0.41646456718444824, |
| "eval_nll_loss": 0.19179172813892365, |
| "eval_rewards/accuracies": 0.75, |
| "eval_rewards/chosen": -0.2185187041759491, |
| "eval_rewards/margins": 2.0729339122772217, |
| "eval_rewards/rejected": -2.2922492027282715, |
| "eval_runtime": 37.9044, |
| "eval_samples_per_second": 7.862, |
| "eval_steps_per_second": 1.003, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.8383233532934131, |
| "grad_norm": 10.231584548950195, |
| "learning_rate": 1.7088607594936708e-05, |
| "logits/chosen": -0.9898437261581421, |
| "logits/rejected": -0.976757824420929, |
| "logps/chosen": -17.987499237060547, |
| "logps/rejected": -31.737499237060547, |
| "loss": 0.3967, |
| "nll_loss": 0.19194336235523224, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.28300780057907104, |
| "rewards/margins": 1.9914062023162842, |
| "rewards/rejected": -2.274218797683716, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8982035928143712, |
| "grad_norm": 8.393524169921875, |
| "learning_rate": 1.0759493670886076e-05, |
| "logits/chosen": -0.975390613079071, |
| "logits/rejected": -0.953906238079071, |
| "logps/chosen": -19.209375381469727, |
| "logps/rejected": -33.400001525878906, |
| "loss": 0.5022, |
| "nll_loss": 0.20380859076976776, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.4892578125, |
| "rewards/margins": 1.9375, |
| "rewards/rejected": -2.4291014671325684, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9580838323353293, |
| "grad_norm": 10.486552238464355, |
| "learning_rate": 4.430379746835443e-06, |
| "logits/chosen": -0.991992175579071, |
| "logits/rejected": -0.948046863079071, |
| "logps/chosen": -19.268749237060547, |
| "logps/rejected": -33.556251525878906, |
| "loss": 0.5564, |
| "nll_loss": 0.206787109375, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.40800780057907104, |
| "rewards/margins": 1.840429663658142, |
| "rewards/rejected": -2.244873046875, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9880239520958084, |
| "eval_logits/chosen": -1.0091488361358643, |
| "eval_logits/rejected": -0.98046875, |
| "eval_logps/chosen": -18.596216201782227, |
| "eval_logps/rejected": -32.71381759643555, |
| "eval_loss": 0.41747432947158813, |
| "eval_nll_loss": 0.19197162985801697, |
| "eval_rewards/accuracies": 0.7730262875556946, |
| "eval_rewards/chosen": -0.22913239896297455, |
| "eval_rewards/margins": 2.0992496013641357, |
| "eval_rewards/rejected": -2.329050064086914, |
| "eval_runtime": 37.8726, |
| "eval_samples_per_second": 7.868, |
| "eval_steps_per_second": 1.003, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 167, |
| "total_flos": 0.0, |
| "train_loss": 0.5023956755678097, |
| "train_runtime": 947.2222, |
| "train_samples_per_second": 2.816, |
| "train_steps_per_second": 0.176 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 167, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 33, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|