| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 79, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.064, |
| "grad_norm": 6.679206848144531, |
| "learning_rate": 9.936876709681666e-07, |
| "logits/chosen": -0.8538128137588501, |
| "logits/rejected": -0.3107415735721588, |
| "logps/chosen": -76.47968292236328, |
| "logps/rejected": -334.6451416015625, |
| "loss": 0.3147, |
| "num_input_tokens_seen": 256000, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 0.6914523839950562, |
| "rewards/margins": 1.9821197986602783, |
| "rewards/rejected": -1.2906672954559326, |
| "step": 5, |
| "train_runtime": 1679.4563, |
| "train_tokens_per_second": 152.43 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.1262335479259491, |
| "learning_rate": 9.68316749134364e-07, |
| "logits/chosen": -0.7508918642997742, |
| "logits/rejected": -0.27888986468315125, |
| "logps/chosen": -66.19853973388672, |
| "logps/rejected": -340.9087829589844, |
| "loss": 0.0071, |
| "num_input_tokens_seen": 507424, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.1356029510498047, |
| "rewards/margins": 8.877564430236816, |
| "rewards/rejected": -5.7419610023498535, |
| "step": 10, |
| "train_runtime": 2616.3069, |
| "train_tokens_per_second": 193.947 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.23208095133304596, |
| "learning_rate": 9.24491681045682e-07, |
| "logits/chosen": -0.6891063451766968, |
| "logits/rejected": -0.21943971514701843, |
| "logps/chosen": -56.16652297973633, |
| "logps/rejected": -382.0284423828125, |
| "loss": 0.0003, |
| "num_input_tokens_seen": 763040, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.018775939941406, |
| "rewards/margins": 16.494199752807617, |
| "rewards/rejected": -11.475423812866211, |
| "step": 15, |
| "train_runtime": 3551.0174, |
| "train_tokens_per_second": 214.879 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.15515422821044922, |
| "learning_rate": 8.639394051847471e-07, |
| "logits/chosen": -0.5768939256668091, |
| "logits/rejected": -0.18155181407928467, |
| "logps/chosen": -51.949642181396484, |
| "logps/rejected": -367.6221923828125, |
| "loss": 0.0015, |
| "num_input_tokens_seen": 1014528, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.824897289276123, |
| "rewards/margins": 17.040712356567383, |
| "rewards/rejected": -11.215815544128418, |
| "step": 20, |
| "train_runtime": 4492.2862, |
| "train_tokens_per_second": 225.838 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.1735421419143677, |
| "learning_rate": 7.890460001124241e-07, |
| "logits/chosen": -0.3975931704044342, |
| "logits/rejected": -0.11135731637477875, |
| "logps/chosen": -54.473506927490234, |
| "logps/rejected": -390.9683532714844, |
| "loss": 0.0027, |
| "num_input_tokens_seen": 1279392, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.003702640533447, |
| "rewards/margins": 17.591880798339844, |
| "rewards/rejected": -11.588177680969238, |
| "step": 25, |
| "train_runtime": 5435.3332, |
| "train_tokens_per_second": 235.384 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.07401101291179657, |
| "learning_rate": 7.027626604064969e-07, |
| "logits/chosen": -0.27080175280570984, |
| "logits/rejected": -0.03910026326775551, |
| "logps/chosen": -51.99932098388672, |
| "logps/rejected": -378.4240417480469, |
| "loss": 0.0127, |
| "num_input_tokens_seen": 1528704, |
| "rewards/accuracies": 0.995312511920929, |
| "rewards/chosen": 6.039531707763672, |
| "rewards/margins": 18.659555435180664, |
| "rewards/rejected": -12.620022773742676, |
| "step": 30, |
| "train_runtime": 6382.847, |
| "train_tokens_per_second": 239.502 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 4.258738040924072, |
| "learning_rate": 6.084894040531589e-07, |
| "logits/chosen": -0.2694633901119232, |
| "logits/rejected": -0.0405702069401741, |
| "logps/chosen": -51.36381912231445, |
| "logps/rejected": -390.9013366699219, |
| "loss": 0.0037, |
| "num_input_tokens_seen": 1785344, |
| "rewards/accuracies": 0.9984375238418579, |
| "rewards/chosen": 6.026695728302002, |
| "rewards/margins": 19.767641067504883, |
| "rewards/rejected": -13.740945816040039, |
| "step": 35, |
| "train_runtime": 7322.5463, |
| "train_tokens_per_second": 243.815 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 2.983978509902954, |
| "learning_rate": 5.09941093832535e-07, |
| "logits/chosen": -0.2793508768081665, |
| "logits/rejected": -0.027698948979377747, |
| "logps/chosen": -48.76968002319336, |
| "logps/rejected": -391.20758056640625, |
| "loss": 0.0066, |
| "num_input_tokens_seen": 2041504, |
| "rewards/accuracies": 0.9984375238418579, |
| "rewards/chosen": 6.045849323272705, |
| "rewards/margins": 19.663557052612305, |
| "rewards/rejected": -13.61771011352539, |
| "step": 40, |
| "train_runtime": 8270.3741, |
| "train_tokens_per_second": 246.845 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.4456007480621338, |
| "learning_rate": 4.1100105216122496e-07, |
| "logits/chosen": -0.3262310028076172, |
| "logits/rejected": -0.06511984020471573, |
| "logps/chosen": -52.1357536315918, |
| "logps/rejected": -398.6451416015625, |
| "loss": 0.0004, |
| "num_input_tokens_seen": 2297408, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.014974117279053, |
| "rewards/margins": 20.012720108032227, |
| "rewards/rejected": -13.997745513916016, |
| "step": 45, |
| "train_runtime": 9219.9165, |
| "train_tokens_per_second": 249.179 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 4.368767738342285, |
| "learning_rate": 3.1556803773799613e-07, |
| "logits/chosen": -0.3225005865097046, |
| "logits/rejected": -0.07438264787197113, |
| "logps/chosen": -49.3891716003418, |
| "logps/rejected": -390.9215087890625, |
| "loss": 0.0017, |
| "num_input_tokens_seen": 2552704, |
| "rewards/accuracies": 0.9984375238418579, |
| "rewards/chosen": 6.208975791931152, |
| "rewards/margins": 20.07794952392578, |
| "rewards/rejected": -13.868974685668945, |
| "step": 50, |
| "train_runtime": 10157.4064, |
| "train_tokens_per_second": 251.315 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.9644066095352173, |
| "learning_rate": 2.2740261391866633e-07, |
| "logits/chosen": -0.3191309869289398, |
| "logits/rejected": -0.06508567929267883, |
| "logps/chosen": -52.274436950683594, |
| "logps/rejected": -377.0621337890625, |
| "loss": 0.0006, |
| "num_input_tokens_seen": 2802080, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.761786937713623, |
| "rewards/margins": 19.63225555419922, |
| "rewards/rejected": -13.870468139648438, |
| "step": 55, |
| "train_runtime": 11101.9212, |
| "train_tokens_per_second": 252.396 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.1925666630268097, |
| "learning_rate": 1.4997896271528737e-07, |
| "logits/chosen": -0.2972492575645447, |
| "logits/rejected": -0.04955977201461792, |
| "logps/chosen": -52.26522445678711, |
| "logps/rejected": -388.23724365234375, |
| "loss": 0.0055, |
| "num_input_tokens_seen": 3057440, |
| "rewards/accuracies": 0.9984375238418579, |
| "rewards/chosen": 5.993190765380859, |
| "rewards/margins": 19.97586441040039, |
| "rewards/rejected": -13.982673645019531, |
| "step": 60, |
| "train_runtime": 12037.0054, |
| "train_tokens_per_second": 254.003 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 2.587663412094116, |
| "learning_rate": 8.634798372847146e-08, |
| "logits/chosen": -0.32648926973342896, |
| "logits/rejected": -0.04533248394727707, |
| "logps/chosen": -54.620330810546875, |
| "logps/rejected": -382.8239440917969, |
| "loss": 0.0011, |
| "num_input_tokens_seen": 3306816, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.238775253295898, |
| "rewards/margins": 19.866392135620117, |
| "rewards/rejected": -13.627615928649902, |
| "step": 65, |
| "train_runtime": 12987.846, |
| "train_tokens_per_second": 254.609 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.31388822197914124, |
| "learning_rate": 3.901707263589671e-08, |
| "logits/chosen": -0.3047231435775757, |
| "logits/rejected": -0.024533234536647797, |
| "logps/chosen": -51.3085823059082, |
| "logps/rejected": -381.90814208984375, |
| "loss": 0.0007, |
| "num_input_tokens_seen": 3555424, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.905380725860596, |
| "rewards/margins": 19.539260864257812, |
| "rewards/rejected": -13.633878707885742, |
| "step": 70, |
| "train_runtime": 13943.1522, |
| "train_tokens_per_second": 254.994 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.31311288475990295, |
| "learning_rate": 9.851316597681959e-09, |
| "logits/chosen": -0.30659955739974976, |
| "logits/rejected": -0.04967175051569939, |
| "logps/chosen": -51.19919967651367, |
| "logps/rejected": -385.9148254394531, |
| "loss": 0.0015, |
| "num_input_tokens_seen": 3806624, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.100650310516357, |
| "rewards/margins": 19.596603393554688, |
| "rewards/rejected": -13.495952606201172, |
| "step": 75, |
| "train_runtime": 14884.713, |
| "train_tokens_per_second": 255.741 |
| }, |
| { |
| "epoch": 1.0, |
| "num_input_tokens_seen": 3967808, |
| "step": 79, |
| "total_flos": 1.6832915605500723e+17, |
| "train_loss": 0.02286101435605861, |
| "train_runtime": 15157.818, |
| "train_samples_per_second": 0.66, |
| "train_steps_per_second": 0.005 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 79, |
| "num_input_tokens_seen": 3967808, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.6832915605500723e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|