| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 123, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12461059190031153, |
| "grad_norm": 9.83171558380127, |
| "learning_rate": 4.986964078748837e-05, |
| "loss": 2.2631, |
| "num_input_tokens_seen": 23312, |
| "step": 5, |
| "train_runtime": 5.7668, |
| "train_tokens_per_second": 4042.456 |
| }, |
| { |
| "epoch": 0.24922118380062305, |
| "grad_norm": 4.205252647399902, |
| "learning_rate": 4.934238559694448e-05, |
| "loss": 1.1429, |
| "num_input_tokens_seen": 46272, |
| "step": 10, |
| "train_runtime": 11.1634, |
| "train_tokens_per_second": 4144.99 |
| }, |
| { |
| "epoch": 0.37383177570093457, |
| "grad_norm": 1.370972990989685, |
| "learning_rate": 4.8418667698290696e-05, |
| "loss": 0.2742, |
| "num_input_tokens_seen": 68144, |
| "step": 15, |
| "train_runtime": 16.6069, |
| "train_tokens_per_second": 4103.352 |
| }, |
| { |
| "epoch": 0.4984423676012461, |
| "grad_norm": 0.373440146446228, |
| "learning_rate": 4.7113531619441984e-05, |
| "loss": 0.0755, |
| "num_input_tokens_seen": 91312, |
| "step": 20, |
| "train_runtime": 21.9176, |
| "train_tokens_per_second": 4166.149 |
| }, |
| { |
| "epoch": 0.6230529595015576, |
| "grad_norm": 0.34141597151756287, |
| "learning_rate": 4.5448234019167945e-05, |
| "loss": 0.034, |
| "num_input_tokens_seen": 114192, |
| "step": 25, |
| "train_runtime": 27.1841, |
| "train_tokens_per_second": 4200.692 |
| }, |
| { |
| "epoch": 0.7476635514018691, |
| "grad_norm": 0.30972689390182495, |
| "learning_rate": 4.344989748139873e-05, |
| "loss": 0.0195, |
| "num_input_tokens_seen": 137440, |
| "step": 30, |
| "train_runtime": 32.4806, |
| "train_tokens_per_second": 4231.451 |
| }, |
| { |
| "epoch": 0.8722741433021807, |
| "grad_norm": 0.0684092566370964, |
| "learning_rate": 4.1151068771623866e-05, |
| "loss": 0.0117, |
| "num_input_tokens_seen": 161952, |
| "step": 35, |
| "train_runtime": 38.0414, |
| "train_tokens_per_second": 4257.26 |
| }, |
| { |
| "epoch": 0.9968847352024922, |
| "grad_norm": 0.3527483344078064, |
| "learning_rate": 3.858918875003053e-05, |
| "loss": 0.0039, |
| "num_input_tokens_seen": 185888, |
| "step": 40, |
| "train_runtime": 43.3233, |
| "train_tokens_per_second": 4290.72 |
| }, |
| { |
| "epoch": 1.0996884735202492, |
| "grad_norm": 0.02160419337451458, |
| "learning_rate": 3.580598257486867e-05, |
| "loss": 0.0075, |
| "num_input_tokens_seen": 205520, |
| "step": 45, |
| "train_runtime": 47.694, |
| "train_tokens_per_second": 4309.142 |
| }, |
| { |
| "epoch": 1.2242990654205608, |
| "grad_norm": 0.022831469774246216, |
| "learning_rate": 3.284678012775727e-05, |
| "loss": 0.017, |
| "num_input_tokens_seen": 228528, |
| "step": 50, |
| "train_runtime": 53.089, |
| "train_tokens_per_second": 4304.62 |
| }, |
| { |
| "epoch": 1.3489096573208723, |
| "grad_norm": 0.26614609360694885, |
| "learning_rate": 2.975977772911671e-05, |
| "loss": 0.0026, |
| "num_input_tokens_seen": 254656, |
| "step": 55, |
| "train_runtime": 58.3985, |
| "train_tokens_per_second": 4360.662 |
| }, |
| { |
| "epoch": 1.4735202492211839, |
| "grad_norm": 0.01649504341185093, |
| "learning_rate": 2.659525316811571e-05, |
| "loss": 0.0063, |
| "num_input_tokens_seen": 275648, |
| "step": 60, |
| "train_runtime": 63.656, |
| "train_tokens_per_second": 4330.278 |
| }, |
| { |
| "epoch": 1.5981308411214954, |
| "grad_norm": 0.17046956717967987, |
| "learning_rate": 2.340474683188429e-05, |
| "loss": 0.0104, |
| "num_input_tokens_seen": 297392, |
| "step": 65, |
| "train_runtime": 68.7485, |
| "train_tokens_per_second": 4325.795 |
| }, |
| { |
| "epoch": 1.722741433021807, |
| "grad_norm": 0.014068239368498325, |
| "learning_rate": 2.0240222270883288e-05, |
| "loss": 0.0223, |
| "num_input_tokens_seen": 319808, |
| "step": 70, |
| "train_runtime": 73.8847, |
| "train_tokens_per_second": 4328.472 |
| }, |
| { |
| "epoch": 1.8473520249221185, |
| "grad_norm": 0.05504901334643364, |
| "learning_rate": 1.7153219872242727e-05, |
| "loss": 0.0277, |
| "num_input_tokens_seen": 343280, |
| "step": 75, |
| "train_runtime": 79.1945, |
| "train_tokens_per_second": 4334.643 |
| }, |
| { |
| "epoch": 1.97196261682243, |
| "grad_norm": 0.14178654551506042, |
| "learning_rate": 1.4194017425131323e-05, |
| "loss": 0.0198, |
| "num_input_tokens_seen": 366560, |
| "step": 80, |
| "train_runtime": 84.4049, |
| "train_tokens_per_second": 4342.878 |
| }, |
| { |
| "epoch": 2.074766355140187, |
| "grad_norm": 0.28684306144714355, |
| "learning_rate": 1.1410811249969475e-05, |
| "loss": 0.0362, |
| "num_input_tokens_seen": 385040, |
| "step": 85, |
| "train_runtime": 88.7672, |
| "train_tokens_per_second": 4337.639 |
| }, |
| { |
| "epoch": 2.1993769470404985, |
| "grad_norm": 0.06420072913169861, |
| "learning_rate": 8.848931228376136e-06, |
| "loss": 0.0057, |
| "num_input_tokens_seen": 409200, |
| "step": 90, |
| "train_runtime": 93.8816, |
| "train_tokens_per_second": 4358.682 |
| }, |
| { |
| "epoch": 2.32398753894081, |
| "grad_norm": 0.015087173320353031, |
| "learning_rate": 6.55010251860127e-06, |
| "loss": 0.008, |
| "num_input_tokens_seen": 430592, |
| "step": 95, |
| "train_runtime": 98.9816, |
| "train_tokens_per_second": 4350.221 |
| }, |
| { |
| "epoch": 2.4485981308411215, |
| "grad_norm": 0.17626583576202393, |
| "learning_rate": 4.551765980832059e-06, |
| "loss": 0.0136, |
| "num_input_tokens_seen": 454608, |
| "step": 100, |
| "train_runtime": 104.0764, |
| "train_tokens_per_second": 4368.024 |
| }, |
| { |
| "epoch": 2.573208722741433, |
| "grad_norm": 0.00867235753685236, |
| "learning_rate": 2.8864683805580133e-06, |
| "loss": 0.006, |
| "num_input_tokens_seen": 478128, |
| "step": 105, |
| "train_runtime": 110.2609, |
| "train_tokens_per_second": 4336.332 |
| }, |
| { |
| "epoch": 2.6978193146417446, |
| "grad_norm": 0.013947433792054653, |
| "learning_rate": 1.581332301709304e-06, |
| "loss": 0.009, |
| "num_input_tokens_seen": 503760, |
| "step": 110, |
| "train_runtime": 115.3636, |
| "train_tokens_per_second": 4366.717 |
| }, |
| { |
| "epoch": 2.822429906542056, |
| "grad_norm": 0.019248466938734055, |
| "learning_rate": 6.576144030555259e-07, |
| "loss": 0.002, |
| "num_input_tokens_seen": 526752, |
| "step": 115, |
| "train_runtime": 120.4599, |
| "train_tokens_per_second": 4372.841 |
| }, |
| { |
| "epoch": 2.9470404984423677, |
| "grad_norm": 0.016108745709061623, |
| "learning_rate": 1.3035921251163263e-07, |
| "loss": 0.0006, |
| "num_input_tokens_seen": 548960, |
| "step": 120, |
| "train_runtime": 125.5472, |
| "train_tokens_per_second": 4372.538 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 123, |
| "num_input_tokens_seen": 558704, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 356017775837184.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|