| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9955555555555555, |
| "eval_steps": 100, |
| "global_step": 562, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.035555555555555556, |
| "grad_norm": 11.767955780029297, |
| "learning_rate": 0.00019679715302491104, |
| "loss": 33.6554, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07111111111111111, |
| "grad_norm": 9.438668251037598, |
| "learning_rate": 0.0001932384341637011, |
| "loss": 33.8787, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 10.174617767333984, |
| "learning_rate": 0.00018967971530249112, |
| "loss": 33.8919, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14222222222222222, |
| "grad_norm": 8.84274673461914, |
| "learning_rate": 0.00018612099644128114, |
| "loss": 33.7011, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 10.169342041015625, |
| "learning_rate": 0.0001825622775800712, |
| "loss": 33.6306, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 9.339362144470215, |
| "learning_rate": 0.0001790035587188612, |
| "loss": 33.5378, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.24888888888888888, |
| "grad_norm": 10.399051666259766, |
| "learning_rate": 0.00017544483985765125, |
| "loss": 33.1223, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.28444444444444444, |
| "grad_norm": 8.772202491760254, |
| "learning_rate": 0.00017188612099644127, |
| "loss": 34.3864, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 9.338233947753906, |
| "learning_rate": 0.00016832740213523133, |
| "loss": 33.2955, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 9.439739227294922, |
| "learning_rate": 0.00016476868327402135, |
| "loss": 33.229, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "eval_loss": 2.133469820022583, |
| "eval_runtime": 296.1668, |
| "eval_samples_per_second": 3.376, |
| "eval_steps_per_second": 0.422, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.39111111111111113, |
| "grad_norm": 9.046673774719238, |
| "learning_rate": 0.0001612099644128114, |
| "loss": 33.3667, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 8.99227237701416, |
| "learning_rate": 0.00015765124555160143, |
| "loss": 32.6701, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4622222222222222, |
| "grad_norm": 7.6904144287109375, |
| "learning_rate": 0.00015409252669039148, |
| "loss": 33.2927, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.49777777777777776, |
| "grad_norm": 8.012206077575684, |
| "learning_rate": 0.00015053380782918148, |
| "loss": 33.2934, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 10.931622505187988, |
| "learning_rate": 0.00014697508896797153, |
| "loss": 33.3676, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5688888888888889, |
| "grad_norm": 7.606035232543945, |
| "learning_rate": 0.00014341637010676156, |
| "loss": 34.1758, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6044444444444445, |
| "grad_norm": 9.531214714050293, |
| "learning_rate": 0.0001398576512455516, |
| "loss": 33.0847, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 8.761300086975098, |
| "learning_rate": 0.00013629893238434164, |
| "loss": 33.5206, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6755555555555556, |
| "grad_norm": 9.155729293823242, |
| "learning_rate": 0.0001327402135231317, |
| "loss": 33.2403, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 9.354476928710938, |
| "learning_rate": 0.00012918149466192172, |
| "loss": 33.5548, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "eval_loss": 2.126850128173828, |
| "eval_runtime": 296.1679, |
| "eval_samples_per_second": 3.376, |
| "eval_steps_per_second": 0.422, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7466666666666667, |
| "grad_norm": 8.922224998474121, |
| "learning_rate": 0.00012562277580071177, |
| "loss": 33.279, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7822222222222223, |
| "grad_norm": 9.973633766174316, |
| "learning_rate": 0.00012206405693950178, |
| "loss": 33.5481, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8177777777777778, |
| "grad_norm": 8.771803855895996, |
| "learning_rate": 0.00011850533807829183, |
| "loss": 33.1058, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 10.16543960571289, |
| "learning_rate": 0.00011494661921708185, |
| "loss": 33.3706, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 9.286821365356445, |
| "learning_rate": 0.0001113879003558719, |
| "loss": 33.3456, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9244444444444444, |
| "grad_norm": 9.520956039428711, |
| "learning_rate": 0.00010782918149466192, |
| "loss": 33.5781, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 10.376456260681152, |
| "learning_rate": 0.00010427046263345198, |
| "loss": 32.9687, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9955555555555555, |
| "grad_norm": 8.36178207397461, |
| "learning_rate": 0.00010071174377224199, |
| "loss": 33.7239, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0284444444444445, |
| "grad_norm": 10.113052368164062, |
| "learning_rate": 9.715302491103203e-05, |
| "loss": 29.9997, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 11.123631477355957, |
| "learning_rate": 9.359430604982207e-05, |
| "loss": 32.5004, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.064, |
| "eval_loss": 2.122236490249634, |
| "eval_runtime": 296.127, |
| "eval_samples_per_second": 3.377, |
| "eval_steps_per_second": 0.422, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0995555555555556, |
| "grad_norm": 9.897551536560059, |
| "learning_rate": 9.00355871886121e-05, |
| "loss": 32.5046, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.1351111111111112, |
| "grad_norm": 9.53073501586914, |
| "learning_rate": 8.647686832740213e-05, |
| "loss": 32.2727, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.1706666666666667, |
| "grad_norm": 10.394311904907227, |
| "learning_rate": 8.291814946619217e-05, |
| "loss": 32.688, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.2062222222222223, |
| "grad_norm": 9.498970031738281, |
| "learning_rate": 7.935943060498221e-05, |
| "loss": 33.6316, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.2417777777777779, |
| "grad_norm": 10.150975227355957, |
| "learning_rate": 7.580071174377225e-05, |
| "loss": 33.0713, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.2773333333333334, |
| "grad_norm": 9.899177551269531, |
| "learning_rate": 7.224199288256229e-05, |
| "loss": 32.4769, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.3128888888888888, |
| "grad_norm": 9.39831829071045, |
| "learning_rate": 6.868327402135231e-05, |
| "loss": 32.2654, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.3484444444444446, |
| "grad_norm": 10.761151313781738, |
| "learning_rate": 6.512455516014235e-05, |
| "loss": 32.491, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.384, |
| "grad_norm": 9.932414054870605, |
| "learning_rate": 6.156583629893239e-05, |
| "loss": 33.5308, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.4195555555555557, |
| "grad_norm": 11.054327011108398, |
| "learning_rate": 5.8007117437722425e-05, |
| "loss": 31.7061, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.4195555555555557, |
| "eval_loss": 2.120673418045044, |
| "eval_runtime": 296.1092, |
| "eval_samples_per_second": 3.377, |
| "eval_steps_per_second": 0.422, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.455111111111111, |
| "grad_norm": 10.89476203918457, |
| "learning_rate": 5.4448398576512464e-05, |
| "loss": 32.485, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.4906666666666666, |
| "grad_norm": 9.823376655578613, |
| "learning_rate": 5.0889679715302496e-05, |
| "loss": 32.9951, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.5262222222222221, |
| "grad_norm": 11.316079139709473, |
| "learning_rate": 4.733096085409253e-05, |
| "loss": 32.3443, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.561777777777778, |
| "grad_norm": 11.608524322509766, |
| "learning_rate": 4.377224199288256e-05, |
| "loss": 32.2948, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.5973333333333333, |
| "grad_norm": 11.020298957824707, |
| "learning_rate": 4.02135231316726e-05, |
| "loss": 32.6702, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.6328888888888888, |
| "grad_norm": 9.804555892944336, |
| "learning_rate": 3.665480427046263e-05, |
| "loss": 31.6452, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.6684444444444444, |
| "grad_norm": 11.037073135375977, |
| "learning_rate": 3.309608540925267e-05, |
| "loss": 32.479, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 9.837021827697754, |
| "learning_rate": 2.9537366548042704e-05, |
| "loss": 32.72, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.7395555555555555, |
| "grad_norm": 11.720721244812012, |
| "learning_rate": 2.597864768683274e-05, |
| "loss": 32.6789, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.775111111111111, |
| "grad_norm": 11.738125801086426, |
| "learning_rate": 2.2419928825622775e-05, |
| "loss": 33.3128, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.775111111111111, |
| "eval_loss": 2.11881947517395, |
| "eval_runtime": 296.1216, |
| "eval_samples_per_second": 3.377, |
| "eval_steps_per_second": 0.422, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.8106666666666666, |
| "grad_norm": 11.249613761901855, |
| "learning_rate": 1.8861209964412814e-05, |
| "loss": 31.9298, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.8462222222222222, |
| "grad_norm": 11.530637741088867, |
| "learning_rate": 1.530249110320285e-05, |
| "loss": 31.8878, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.8817777777777778, |
| "grad_norm": 11.147592544555664, |
| "learning_rate": 1.1743772241992882e-05, |
| "loss": 32.6852, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.9173333333333333, |
| "grad_norm": 9.81916332244873, |
| "learning_rate": 8.185053380782918e-06, |
| "loss": 32.1578, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.952888888888889, |
| "grad_norm": 10.557317733764648, |
| "learning_rate": 4.626334519572954e-06, |
| "loss": 32.2151, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.9884444444444445, |
| "grad_norm": 10.493524551391602, |
| "learning_rate": 1.0676156583629894e-06, |
| "loss": 31.9549, |
| "step": 560 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 562, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.93400073703424e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|