| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1407, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 0.7955821046978235, |
| "epoch": 0.21344717182497333, |
| "grad_norm": 0.259765625, |
| "learning_rate": 0.0001859275053304904, |
| "loss": 0.8123, |
| "mean_token_accuracy": 0.7977147643268109, |
| "num_tokens": 3815906.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.7014639886468649, |
| "epoch": 0.42689434364994666, |
| "grad_norm": 0.2265625, |
| "learning_rate": 0.0001717128642501777, |
| "loss": 0.7209, |
| "mean_token_accuracy": 0.8144587337970733, |
| "num_tokens": 7626566.0, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.684154222086072, |
| "epoch": 0.6403415154749199, |
| "grad_norm": 0.2470703125, |
| "learning_rate": 0.00015749822316986497, |
| "loss": 0.7017, |
| "mean_token_accuracy": 0.8176216109097004, |
| "num_tokens": 11456784.0, |
| "step": 300 |
| }, |
| { |
| "entropy": 0.6706647833436727, |
| "epoch": 0.8537886872998933, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 0.00014328358208955225, |
| "loss": 0.6881, |
| "mean_token_accuracy": 0.8207742583751678, |
| "num_tokens": 15297842.0, |
| "step": 400 |
| }, |
| { |
| "entropy": 0.5925532557332336, |
| "epoch": 1.0661686232657417, |
| "grad_norm": 0.2119140625, |
| "learning_rate": 0.00012906894100923953, |
| "loss": 0.5889, |
| "mean_token_accuracy": 0.8439057000617886, |
| "num_tokens": 19106846.0, |
| "step": 500 |
| }, |
| { |
| "entropy": 0.42009303748607635, |
| "epoch": 1.279615795090715, |
| "grad_norm": 0.228515625, |
| "learning_rate": 0.0001148542999289268, |
| "loss": 0.3836, |
| "mean_token_accuracy": 0.8929252660274506, |
| "num_tokens": 22944206.0, |
| "step": 600 |
| }, |
| { |
| "entropy": 0.4144257218763232, |
| "epoch": 1.4930629669156883, |
| "grad_norm": 0.2333984375, |
| "learning_rate": 0.00010063965884861408, |
| "loss": 0.3809, |
| "mean_token_accuracy": 0.8935985819995403, |
| "num_tokens": 26787706.0, |
| "step": 700 |
| }, |
| { |
| "entropy": 0.4126653341576457, |
| "epoch": 1.7065101387406618, |
| "grad_norm": 0.23828125, |
| "learning_rate": 8.642501776830136e-05, |
| "loss": 0.3796, |
| "mean_token_accuracy": 0.8937636642158031, |
| "num_tokens": 30652821.0, |
| "step": 800 |
| }, |
| { |
| "entropy": 0.4055024874210358, |
| "epoch": 1.9199573105656351, |
| "grad_norm": 0.255859375, |
| "learning_rate": 7.221037668798864e-05, |
| "loss": 0.3719, |
| "mean_token_accuracy": 0.896048932671547, |
| "num_tokens": 34453009.0, |
| "step": 900 |
| }, |
| { |
| "entropy": 0.33940730929075175, |
| "epoch": 2.1323372465314834, |
| "grad_norm": 0.2001953125, |
| "learning_rate": 5.7995735607675904e-05, |
| "loss": 0.2844, |
| "mean_token_accuracy": 0.922690433024162, |
| "num_tokens": 38246767.0, |
| "step": 1000 |
| }, |
| { |
| "entropy": 0.28708830222487447, |
| "epoch": 2.3457844183564567, |
| "grad_norm": 0.20703125, |
| "learning_rate": 4.3781094527363184e-05, |
| "loss": 0.2259, |
| "mean_token_accuracy": 0.9395377920567989, |
| "num_tokens": 42074506.0, |
| "step": 1100 |
| }, |
| { |
| "entropy": 0.28266524378210306, |
| "epoch": 2.55923159018143, |
| "grad_norm": 0.20703125, |
| "learning_rate": 2.9566453447050464e-05, |
| "loss": 0.2225, |
| "mean_token_accuracy": 0.9406452259421348, |
| "num_tokens": 45883112.0, |
| "step": 1200 |
| }, |
| { |
| "entropy": 0.28105670753866435, |
| "epoch": 2.7726787620064033, |
| "grad_norm": 0.19921875, |
| "learning_rate": 1.535181236673774e-05, |
| "loss": 0.2224, |
| "mean_token_accuracy": 0.9406881707906724, |
| "num_tokens": 49691137.0, |
| "step": 1300 |
| }, |
| { |
| "entropy": 0.28744990050792696, |
| "epoch": 2.9861259338313766, |
| "grad_norm": 0.201171875, |
| "learning_rate": 1.1371712864250178e-06, |
| "loss": 0.2261, |
| "mean_token_accuracy": 0.9393656353652478, |
| "num_tokens": 53554833.0, |
| "step": 1400 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 1407, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0385484190580736e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|