{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 79, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.064, "grad_norm": 6.679206848144531, "learning_rate": 9.936876709681666e-07, "logits/chosen": -0.8538128137588501, "logits/rejected": -0.3107415735721588, "logps/chosen": -76.47968292236328, "logps/rejected": -334.6451416015625, "loss": 0.3147, "num_input_tokens_seen": 256000, "rewards/accuracies": 0.793749988079071, "rewards/chosen": 0.6914523839950562, "rewards/margins": 1.9821197986602783, "rewards/rejected": -1.2906672954559326, "step": 5, "train_runtime": 1679.4563, "train_tokens_per_second": 152.43 }, { "epoch": 0.128, "grad_norm": 0.1262335479259491, "learning_rate": 9.68316749134364e-07, "logits/chosen": -0.7508918642997742, "logits/rejected": -0.27888986468315125, "logps/chosen": -66.19853973388672, "logps/rejected": -340.9087829589844, "loss": 0.0071, "num_input_tokens_seen": 507424, "rewards/accuracies": 1.0, "rewards/chosen": 3.1356029510498047, "rewards/margins": 8.877564430236816, "rewards/rejected": -5.7419610023498535, "step": 10, "train_runtime": 2616.3069, "train_tokens_per_second": 193.947 }, { "epoch": 0.192, "grad_norm": 0.23208095133304596, "learning_rate": 9.24491681045682e-07, "logits/chosen": -0.6891063451766968, "logits/rejected": -0.21943971514701843, "logps/chosen": -56.16652297973633, "logps/rejected": -382.0284423828125, "loss": 0.0003, "num_input_tokens_seen": 763040, "rewards/accuracies": 1.0, "rewards/chosen": 5.018775939941406, "rewards/margins": 16.494199752807617, "rewards/rejected": -11.475423812866211, "step": 15, "train_runtime": 3551.0174, "train_tokens_per_second": 214.879 }, { "epoch": 0.256, "grad_norm": 0.15515422821044922, "learning_rate": 8.639394051847471e-07, "logits/chosen": -0.5768939256668091, "logits/rejected": -0.18155181407928467, "logps/chosen": -51.949642181396484, "logps/rejected": -367.6221923828125, "loss": 0.0015, "num_input_tokens_seen": 1014528, "rewards/accuracies": 1.0, "rewards/chosen": 5.824897289276123, "rewards/margins": 17.040712356567383, "rewards/rejected": -11.215815544128418, "step": 20, "train_runtime": 4492.2862, "train_tokens_per_second": 225.838 }, { "epoch": 0.32, "grad_norm": 1.1735421419143677, "learning_rate": 7.890460001124241e-07, "logits/chosen": -0.3975931704044342, "logits/rejected": -0.11135731637477875, "logps/chosen": -54.473506927490234, "logps/rejected": -390.9683532714844, "loss": 0.0027, "num_input_tokens_seen": 1279392, "rewards/accuracies": 1.0, "rewards/chosen": 6.003702640533447, "rewards/margins": 17.591880798339844, "rewards/rejected": -11.588177680969238, "step": 25, "train_runtime": 5435.3332, "train_tokens_per_second": 235.384 }, { "epoch": 0.384, "grad_norm": 0.07401101291179657, "learning_rate": 7.027626604064969e-07, "logits/chosen": -0.27080175280570984, "logits/rejected": -0.03910026326775551, "logps/chosen": -51.99932098388672, "logps/rejected": -378.4240417480469, "loss": 0.0127, "num_input_tokens_seen": 1528704, "rewards/accuracies": 0.995312511920929, "rewards/chosen": 6.039531707763672, "rewards/margins": 18.659555435180664, "rewards/rejected": -12.620022773742676, "step": 30, "train_runtime": 6382.847, "train_tokens_per_second": 239.502 }, { "epoch": 0.448, "grad_norm": 4.258738040924072, "learning_rate": 6.084894040531589e-07, "logits/chosen": -0.2694633901119232, "logits/rejected": -0.0405702069401741, "logps/chosen": -51.36381912231445, "logps/rejected": -390.9013366699219, "loss": 0.0037, "num_input_tokens_seen": 1785344, "rewards/accuracies": 0.9984375238418579, "rewards/chosen": 6.026695728302002, "rewards/margins": 19.767641067504883, "rewards/rejected": -13.740945816040039, "step": 35, "train_runtime": 7322.5463, "train_tokens_per_second": 243.815 }, { "epoch": 0.512, "grad_norm": 2.983978509902954, "learning_rate": 5.09941093832535e-07, "logits/chosen": -0.2793508768081665, "logits/rejected": -0.027698948979377747, "logps/chosen": -48.76968002319336, "logps/rejected": -391.20758056640625, "loss": 0.0066, "num_input_tokens_seen": 2041504, "rewards/accuracies": 0.9984375238418579, "rewards/chosen": 6.045849323272705, "rewards/margins": 19.663557052612305, "rewards/rejected": -13.61771011352539, "step": 40, "train_runtime": 8270.3741, "train_tokens_per_second": 246.845 }, { "epoch": 0.576, "grad_norm": 0.4456007480621338, "learning_rate": 4.1100105216122496e-07, "logits/chosen": -0.3262310028076172, "logits/rejected": -0.06511984020471573, "logps/chosen": -52.1357536315918, "logps/rejected": -398.6451416015625, "loss": 0.0004, "num_input_tokens_seen": 2297408, "rewards/accuracies": 1.0, "rewards/chosen": 6.014974117279053, "rewards/margins": 20.012720108032227, "rewards/rejected": -13.997745513916016, "step": 45, "train_runtime": 9219.9165, "train_tokens_per_second": 249.179 }, { "epoch": 0.64, "grad_norm": 4.368767738342285, "learning_rate": 3.1556803773799613e-07, "logits/chosen": -0.3225005865097046, "logits/rejected": -0.07438264787197113, "logps/chosen": -49.3891716003418, "logps/rejected": -390.9215087890625, "loss": 0.0017, "num_input_tokens_seen": 2552704, "rewards/accuracies": 0.9984375238418579, "rewards/chosen": 6.208975791931152, "rewards/margins": 20.07794952392578, "rewards/rejected": -13.868974685668945, "step": 50, "train_runtime": 10157.4064, "train_tokens_per_second": 251.315 }, { "epoch": 0.704, "grad_norm": 0.9644066095352173, "learning_rate": 2.2740261391866633e-07, "logits/chosen": -0.3191309869289398, "logits/rejected": -0.06508567929267883, "logps/chosen": -52.274436950683594, "logps/rejected": -377.0621337890625, "loss": 0.0006, "num_input_tokens_seen": 2802080, "rewards/accuracies": 1.0, "rewards/chosen": 5.761786937713623, "rewards/margins": 19.63225555419922, "rewards/rejected": -13.870468139648438, "step": 55, "train_runtime": 11101.9212, "train_tokens_per_second": 252.396 }, { "epoch": 0.768, "grad_norm": 0.1925666630268097, "learning_rate": 1.4997896271528737e-07, "logits/chosen": -0.2972492575645447, "logits/rejected": -0.04955977201461792, "logps/chosen": -52.26522445678711, "logps/rejected": -388.23724365234375, "loss": 0.0055, "num_input_tokens_seen": 3057440, "rewards/accuracies": 0.9984375238418579, "rewards/chosen": 5.993190765380859, "rewards/margins": 19.97586441040039, "rewards/rejected": -13.982673645019531, "step": 60, "train_runtime": 12037.0054, "train_tokens_per_second": 254.003 }, { "epoch": 0.832, "grad_norm": 2.587663412094116, "learning_rate": 8.634798372847146e-08, "logits/chosen": -0.32648926973342896, "logits/rejected": -0.04533248394727707, "logps/chosen": -54.620330810546875, "logps/rejected": -382.8239440917969, "loss": 0.0011, "num_input_tokens_seen": 3306816, "rewards/accuracies": 1.0, "rewards/chosen": 6.238775253295898, "rewards/margins": 19.866392135620117, "rewards/rejected": -13.627615928649902, "step": 65, "train_runtime": 12987.846, "train_tokens_per_second": 254.609 }, { "epoch": 0.896, "grad_norm": 0.31388822197914124, "learning_rate": 3.901707263589671e-08, "logits/chosen": -0.3047231435775757, "logits/rejected": -0.024533234536647797, "logps/chosen": -51.3085823059082, "logps/rejected": -381.90814208984375, "loss": 0.0007, "num_input_tokens_seen": 3555424, "rewards/accuracies": 1.0, "rewards/chosen": 5.905380725860596, "rewards/margins": 19.539260864257812, "rewards/rejected": -13.633878707885742, "step": 70, "train_runtime": 13943.1522, "train_tokens_per_second": 254.994 }, { "epoch": 0.96, "grad_norm": 0.31311288475990295, "learning_rate": 9.851316597681959e-09, "logits/chosen": -0.30659955739974976, "logits/rejected": -0.04967175051569939, "logps/chosen": -51.19919967651367, "logps/rejected": -385.9148254394531, "loss": 0.0015, "num_input_tokens_seen": 3806624, "rewards/accuracies": 1.0, "rewards/chosen": 6.100650310516357, "rewards/margins": 19.596603393554688, "rewards/rejected": -13.495952606201172, "step": 75, "train_runtime": 14884.713, "train_tokens_per_second": 255.741 }, { "epoch": 1.0, "num_input_tokens_seen": 3967808, "step": 79, "total_flos": 1.6832915605500723e+17, "train_loss": 0.02286101435605861, "train_runtime": 15157.818, "train_samples_per_second": 0.66, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 79, "num_input_tokens_seen": 3967808, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6832915605500723e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }