| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 217, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02304147465437788, |
| "grad_norm": 4.609155178070068, |
| "learning_rate": 4e-05, |
| "loss": 1.1945276260375977, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.04608294930875576, |
| "grad_norm": 0.3184139132499695, |
| "learning_rate": 9e-05, |
| "loss": 1.017074966430664, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06912442396313365, |
| "grad_norm": 0.21383991837501526, |
| "learning_rate": 0.00014, |
| "loss": 0.956594181060791, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.09216589861751152, |
| "grad_norm": 0.20728200674057007, |
| "learning_rate": 0.00019, |
| "loss": 0.9034479141235352, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1152073732718894, |
| "grad_norm": 0.1944284737110138, |
| "learning_rate": 0.00019979661917102115, |
| "loss": 0.8788949012756347, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1382488479262673, |
| "grad_norm": 0.18074148893356323, |
| "learning_rate": 0.00019897180218885507, |
| "loss": 0.9575190544128418, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.16129032258064516, |
| "grad_norm": 0.29437240958213806, |
| "learning_rate": 0.00019751807495903484, |
| "loss": 0.9204333305358887, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.18433179723502305, |
| "grad_norm": 0.22639837861061096, |
| "learning_rate": 0.00019544467510209388, |
| "loss": 1.0152652740478516, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2073732718894009, |
| "grad_norm": 0.21111330389976501, |
| "learning_rate": 0.00019276477791027374, |
| "loss": 0.8392349243164062, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2304147465437788, |
| "grad_norm": 0.2804746925830841, |
| "learning_rate": 0.00018949541262593762, |
| "loss": 0.7984195232391358, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2534562211981567, |
| "grad_norm": 0.18596959114074707, |
| "learning_rate": 0.00018565735423029404, |
| "loss": 0.8844935417175293, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2764976958525346, |
| "grad_norm": 0.3064640462398529, |
| "learning_rate": 0.00018127499143005268, |
| "loss": 0.8820157051086426, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2995391705069124, |
| "grad_norm": 0.23555733263492584, |
| "learning_rate": 0.00017637617168088325, |
| "loss": 0.9234310150146484, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3225806451612903, |
| "grad_norm": 0.18329502642154694, |
| "learning_rate": 0.0001709920242324663, |
| "loss": 0.8056103706359863, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3456221198156682, |
| "grad_norm": 0.1875191181898117, |
| "learning_rate": 0.0001651567623195849, |
| "loss": 0.979500675201416, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3686635944700461, |
| "grad_norm": 0.22501425445079803, |
| "learning_rate": 0.00015890746575622231, |
| "loss": 0.8597883224487305, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.391705069124424, |
| "grad_norm": 0.17843660712242126, |
| "learning_rate": 0.0001522838453141581, |
| "loss": 0.7388361930847168, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4147465437788018, |
| "grad_norm": 0.24085059762001038, |
| "learning_rate": 0.00014532799038330385, |
| "loss": 1.0389366149902344, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4377880184331797, |
| "grad_norm": 0.2150639444589615, |
| "learning_rate": 0.0001380841015172563, |
| "loss": 0.8493513107299805, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.4608294930875576, |
| "grad_norm": 0.18283779919147491, |
| "learning_rate": 0.00013059820956358998, |
| "loss": 1.0224772453308106, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4838709677419355, |
| "grad_norm": 0.23088669776916504, |
| "learning_rate": 0.00012291788316365888, |
| "loss": 0.7935346126556396, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5069124423963134, |
| "grad_norm": 0.20359237492084503, |
| "learning_rate": 0.00011509192648058249, |
| "loss": 0.880291748046875, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5299539170506913, |
| "grad_norm": 0.2222241908311844, |
| "learning_rate": 0.00010717006907618377, |
| "loss": 1.0091294288635253, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5529953917050692, |
| "grad_norm": 0.23833954334259033, |
| "learning_rate": 9.920264990753837e-05, |
| "loss": 0.8710408210754395, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.576036866359447, |
| "grad_norm": 0.21623441576957703, |
| "learning_rate": 9.12402974511587e-05, |
| "loss": 0.7930989265441895, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5990783410138248, |
| "grad_norm": 0.131376251578331, |
| "learning_rate": 8.333360798744496e-05, |
| "loss": 0.7971048831939698, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6221198156682027, |
| "grad_norm": 0.21020947396755219, |
| "learning_rate": 7.553282408972382e-05, |
| "loss": 0.9506874084472656, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 0.1900622397661209, |
| "learning_rate": 6.788751536089739e-05, |
| "loss": 0.7837625980377197, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6682027649769585, |
| "grad_norm": 0.20265358686447144, |
| "learning_rate": 6.044626344644151e-05, |
| "loss": 0.9717947959899902, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6912442396313364, |
| "grad_norm": 0.20827989280223846, |
| "learning_rate": 5.325635332531864e-05, |
| "loss": 0.8602096557617187, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.18106794357299805, |
| "learning_rate": 4.636347284047877e-05, |
| "loss": 0.7170271873474121, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.7373271889400922, |
| "grad_norm": 0.1832089126110077, |
| "learning_rate": 3.981142237826332e-05, |
| "loss": 0.927896785736084, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7603686635944701, |
| "grad_norm": 0.2030986100435257, |
| "learning_rate": 3.364183654153592e-05, |
| "loss": 0.7584274768829345, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.783410138248848, |
| "grad_norm": 0.22505062818527222, |
| "learning_rate": 2.789391958515183e-05, |
| "loss": 0.8892077445983887, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 0.22897103428840637, |
| "learning_rate": 2.2604196294924694e-05, |
| "loss": 0.7206392288208008, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.8294930875576036, |
| "grad_norm": 0.16774588823318481, |
| "learning_rate": 1.7806279893114875e-05, |
| "loss": 0.854553508758545, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8525345622119815, |
| "grad_norm": 0.21229791641235352, |
| "learning_rate": 1.3530658445269783e-05, |
| "loss": 0.9499557495117188, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.8755760368663594, |
| "grad_norm": 0.2512429356575012, |
| "learning_rate": 9.804501125681243e-06, |
| "loss": 0.8282492637634278, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8986175115207373, |
| "grad_norm": 0.2164967805147171, |
| "learning_rate": 6.651485572533378e-06, |
| "loss": 0.8490478515625, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.9216589861751152, |
| "grad_norm": 0.22630353271961212, |
| "learning_rate": 4.091647429802869e-06, |
| "loss": 0.8240178108215332, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9447004608294931, |
| "grad_norm": 0.2094026803970337, |
| "learning_rate": 2.1412530319879887e-06, |
| "loss": 0.8646288871765136, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.967741935483871, |
| "grad_norm": 0.22000199556350708, |
| "learning_rate": 8.126960406835249e-07, |
| "loss": 0.7955609798431397, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9907834101382489, |
| "grad_norm": 0.2844116985797882, |
| "learning_rate": 1.1441868981815207e-07, |
| "loss": 0.9579826354980469, |
| "step": 215 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 217, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.887535275770291e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|