{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 217, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02304147465437788, "grad_norm": 4.609155178070068, "learning_rate": 4e-05, "loss": 1.1945276260375977, "step": 5 }, { "epoch": 0.04608294930875576, "grad_norm": 0.3184139132499695, "learning_rate": 9e-05, "loss": 1.017074966430664, "step": 10 }, { "epoch": 0.06912442396313365, "grad_norm": 0.21383991837501526, "learning_rate": 0.00014, "loss": 0.956594181060791, "step": 15 }, { "epoch": 0.09216589861751152, "grad_norm": 0.20728200674057007, "learning_rate": 0.00019, "loss": 0.9034479141235352, "step": 20 }, { "epoch": 0.1152073732718894, "grad_norm": 0.1944284737110138, "learning_rate": 0.00019979661917102115, "loss": 0.8788949012756347, "step": 25 }, { "epoch": 0.1382488479262673, "grad_norm": 0.18074148893356323, "learning_rate": 0.00019897180218885507, "loss": 0.9575190544128418, "step": 30 }, { "epoch": 0.16129032258064516, "grad_norm": 0.29437240958213806, "learning_rate": 0.00019751807495903484, "loss": 0.9204333305358887, "step": 35 }, { "epoch": 0.18433179723502305, "grad_norm": 0.22639837861061096, "learning_rate": 0.00019544467510209388, "loss": 1.0152652740478516, "step": 40 }, { "epoch": 0.2073732718894009, "grad_norm": 0.21111330389976501, "learning_rate": 0.00019276477791027374, "loss": 0.8392349243164062, "step": 45 }, { "epoch": 0.2304147465437788, "grad_norm": 0.2804746925830841, "learning_rate": 0.00018949541262593762, "loss": 0.7984195232391358, "step": 50 }, { "epoch": 0.2534562211981567, "grad_norm": 0.18596959114074707, "learning_rate": 0.00018565735423029404, "loss": 0.8844935417175293, "step": 55 }, { "epoch": 0.2764976958525346, "grad_norm": 0.3064640462398529, "learning_rate": 0.00018127499143005268, "loss": 0.8820157051086426, "step": 60 }, { "epoch": 0.2995391705069124, "grad_norm": 0.23555733263492584, "learning_rate": 0.00017637617168088325, "loss": 0.9234310150146484, "step": 65 }, { "epoch": 0.3225806451612903, "grad_norm": 0.18329502642154694, "learning_rate": 0.0001709920242324663, "loss": 0.8056103706359863, "step": 70 }, { "epoch": 0.3456221198156682, "grad_norm": 0.1875191181898117, "learning_rate": 0.0001651567623195849, "loss": 0.979500675201416, "step": 75 }, { "epoch": 0.3686635944700461, "grad_norm": 0.22501425445079803, "learning_rate": 0.00015890746575622231, "loss": 0.8597883224487305, "step": 80 }, { "epoch": 0.391705069124424, "grad_norm": 0.17843660712242126, "learning_rate": 0.0001522838453141581, "loss": 0.7388361930847168, "step": 85 }, { "epoch": 0.4147465437788018, "grad_norm": 0.24085059762001038, "learning_rate": 0.00014532799038330385, "loss": 1.0389366149902344, "step": 90 }, { "epoch": 0.4377880184331797, "grad_norm": 0.2150639444589615, "learning_rate": 0.0001380841015172563, "loss": 0.8493513107299805, "step": 95 }, { "epoch": 0.4608294930875576, "grad_norm": 0.18283779919147491, "learning_rate": 0.00013059820956358998, "loss": 1.0224772453308106, "step": 100 }, { "epoch": 0.4838709677419355, "grad_norm": 0.23088669776916504, "learning_rate": 0.00012291788316365888, "loss": 0.7935346126556396, "step": 105 }, { "epoch": 0.5069124423963134, "grad_norm": 0.20359237492084503, "learning_rate": 0.00011509192648058249, "loss": 0.880291748046875, "step": 110 }, { "epoch": 0.5299539170506913, "grad_norm": 0.2222241908311844, "learning_rate": 0.00010717006907618377, "loss": 1.0091294288635253, "step": 115 }, { "epoch": 0.5529953917050692, "grad_norm": 0.23833954334259033, "learning_rate": 9.920264990753837e-05, "loss": 0.8710408210754395, "step": 120 }, { "epoch": 0.576036866359447, "grad_norm": 0.21623441576957703, "learning_rate": 9.12402974511587e-05, "loss": 0.7930989265441895, "step": 125 }, { "epoch": 0.5990783410138248, "grad_norm": 0.131376251578331, "learning_rate": 8.333360798744496e-05, "loss": 0.7971048831939698, "step": 130 }, { "epoch": 0.6221198156682027, "grad_norm": 0.21020947396755219, "learning_rate": 7.553282408972382e-05, "loss": 0.9506874084472656, "step": 135 }, { "epoch": 0.6451612903225806, "grad_norm": 0.1900622397661209, "learning_rate": 6.788751536089739e-05, "loss": 0.7837625980377197, "step": 140 }, { "epoch": 0.6682027649769585, "grad_norm": 0.20265358686447144, "learning_rate": 6.044626344644151e-05, "loss": 0.9717947959899902, "step": 145 }, { "epoch": 0.6912442396313364, "grad_norm": 0.20827989280223846, "learning_rate": 5.325635332531864e-05, "loss": 0.8602096557617187, "step": 150 }, { "epoch": 0.7142857142857143, "grad_norm": 0.18106794357299805, "learning_rate": 4.636347284047877e-05, "loss": 0.7170271873474121, "step": 155 }, { "epoch": 0.7373271889400922, "grad_norm": 0.1832089126110077, "learning_rate": 3.981142237826332e-05, "loss": 0.927896785736084, "step": 160 }, { "epoch": 0.7603686635944701, "grad_norm": 0.2030986100435257, "learning_rate": 3.364183654153592e-05, "loss": 0.7584274768829345, "step": 165 }, { "epoch": 0.783410138248848, "grad_norm": 0.22505062818527222, "learning_rate": 2.789391958515183e-05, "loss": 0.8892077445983887, "step": 170 }, { "epoch": 0.8064516129032258, "grad_norm": 0.22897103428840637, "learning_rate": 2.2604196294924694e-05, "loss": 0.7206392288208008, "step": 175 }, { "epoch": 0.8294930875576036, "grad_norm": 0.16774588823318481, "learning_rate": 1.7806279893114875e-05, "loss": 0.854553508758545, "step": 180 }, { "epoch": 0.8525345622119815, "grad_norm": 0.21229791641235352, "learning_rate": 1.3530658445269783e-05, "loss": 0.9499557495117188, "step": 185 }, { "epoch": 0.8755760368663594, "grad_norm": 0.2512429356575012, "learning_rate": 9.804501125681243e-06, "loss": 0.8282492637634278, "step": 190 }, { "epoch": 0.8986175115207373, "grad_norm": 0.2164967805147171, "learning_rate": 6.651485572533378e-06, "loss": 0.8490478515625, "step": 195 }, { "epoch": 0.9216589861751152, "grad_norm": 0.22630353271961212, "learning_rate": 4.091647429802869e-06, "loss": 0.8240178108215332, "step": 200 }, { "epoch": 0.9447004608294931, "grad_norm": 0.2094026803970337, "learning_rate": 2.1412530319879887e-06, "loss": 0.8646288871765136, "step": 205 }, { "epoch": 0.967741935483871, "grad_norm": 0.22000199556350708, "learning_rate": 8.126960406835249e-07, "loss": 0.7955609798431397, "step": 210 }, { "epoch": 0.9907834101382489, "grad_norm": 0.2844116985797882, "learning_rate": 1.1441868981815207e-07, "loss": 0.9579826354980469, "step": 215 } ], "logging_steps": 5, "max_steps": 217, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.887535275770291e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }