| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 111, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13559322033898305, |
| "grad_norm": 5.389516730584021, |
| "learning_rate": 4.9839963190492576e-05, |
| "loss": 0.7555, |
| "num_input_tokens_seen": 256960, |
| "step": 5, |
| "train_runtime": 32.8726, |
| "train_tokens_per_second": 7816.843 |
| }, |
| { |
| "epoch": 0.2711864406779661, |
| "grad_norm": 3.138977267645858, |
| "learning_rate": 4.9193323673337476e-05, |
| "loss": 0.4688, |
| "num_input_tokens_seen": 520160, |
| "step": 10, |
| "train_runtime": 58.4348, |
| "train_tokens_per_second": 8901.55 |
| }, |
| { |
| "epoch": 0.4067796610169492, |
| "grad_norm": 1.9755541197783468, |
| "learning_rate": 4.806299712081172e-05, |
| "loss": 0.4121, |
| "num_input_tokens_seen": 780072, |
| "step": 15, |
| "train_runtime": 83.9935, |
| "train_tokens_per_second": 9287.291 |
| }, |
| { |
| "epoch": 0.5423728813559322, |
| "grad_norm": 5.889624762007321, |
| "learning_rate": 4.647158168051066e-05, |
| "loss": 0.3846, |
| "num_input_tokens_seen": 1044392, |
| "step": 20, |
| "train_runtime": 109.6459, |
| "train_tokens_per_second": 9525.135 |
| }, |
| { |
| "epoch": 0.6779661016949152, |
| "grad_norm": 1.2235406213647593, |
| "learning_rate": 4.445089385796099e-05, |
| "loss": 0.3815, |
| "num_input_tokens_seen": 1304776, |
| "step": 25, |
| "train_runtime": 134.9424, |
| "train_tokens_per_second": 9669.131 |
| }, |
| { |
| "epoch": 0.8135593220338984, |
| "grad_norm": 1.0683102943611105, |
| "learning_rate": 4.204133242248832e-05, |
| "loss": 0.3832, |
| "num_input_tokens_seen": 1570800, |
| "step": 30, |
| "train_runtime": 160.3739, |
| "train_tokens_per_second": 9794.61 |
| }, |
| { |
| "epoch": 0.9491525423728814, |
| "grad_norm": 1.0750347246493508, |
| "learning_rate": 3.929107073146197e-05, |
| "loss": 0.3741, |
| "num_input_tokens_seen": 1832200, |
| "step": 35, |
| "train_runtime": 185.7504, |
| "train_tokens_per_second": 9863.772 |
| }, |
| { |
| "epoch": 1.0813559322033899, |
| "grad_norm": 0.8734367906334863, |
| "learning_rate": 3.6255093620441834e-05, |
| "loss": 0.2731, |
| "num_input_tokens_seen": 2090208, |
| "step": 40, |
| "train_runtime": 210.3732, |
| "train_tokens_per_second": 9935.716 |
| }, |
| { |
| "epoch": 1.2169491525423728, |
| "grad_norm": 0.8766224598320921, |
| "learning_rate": 3.2994098114281134e-05, |
| "loss": 0.1815, |
| "num_input_tokens_seen": 2354848, |
| "step": 45, |
| "train_runtime": 235.7824, |
| "train_tokens_per_second": 9987.38 |
| }, |
| { |
| "epoch": 1.352542372881356, |
| "grad_norm": 0.8177117110259531, |
| "learning_rate": 2.9573279936809667e-05, |
| "loss": 0.1772, |
| "num_input_tokens_seen": 2613552, |
| "step": 50, |
| "train_runtime": 261.07, |
| "train_tokens_per_second": 10010.924 |
| }, |
| { |
| "epoch": 1.488135593220339, |
| "grad_norm": 0.7233871182762651, |
| "learning_rate": 2.606103007990371e-05, |
| "loss": 0.1767, |
| "num_input_tokens_seen": 2880040, |
| "step": 55, |
| "train_runtime": 286.582, |
| "train_tokens_per_second": 10049.62 |
| }, |
| { |
| "epoch": 1.623728813559322, |
| "grad_norm": 0.6985657612141047, |
| "learning_rate": 2.2527567490893758e-05, |
| "loss": 0.1729, |
| "num_input_tokens_seen": 3141872, |
| "step": 60, |
| "train_runtime": 312.1829, |
| "train_tokens_per_second": 10064.203 |
| }, |
| { |
| "epoch": 1.759322033898305, |
| "grad_norm": 0.7398167457365813, |
| "learning_rate": 1.904353521442088e-05, |
| "loss": 0.1704, |
| "num_input_tokens_seen": 3402088, |
| "step": 65, |
| "train_runtime": 337.3138, |
| "train_tokens_per_second": 10085.824 |
| }, |
| { |
| "epoch": 1.8949152542372882, |
| "grad_norm": 0.7777946582004355, |
| "learning_rate": 1.567858805549229e-05, |
| "loss": 0.1628, |
| "num_input_tokens_seen": 3663008, |
| "step": 70, |
| "train_runtime": 363.0243, |
| "train_tokens_per_second": 10090.254 |
| }, |
| { |
| "epoch": 2.0271186440677966, |
| "grad_norm": 0.709214405015576, |
| "learning_rate": 1.2500000000000006e-05, |
| "loss": 0.1438, |
| "num_input_tokens_seen": 3918864, |
| "step": 75, |
| "train_runtime": 387.6707, |
| "train_tokens_per_second": 10108.744 |
| }, |
| { |
| "epoch": 2.1627118644067798, |
| "grad_norm": 0.45507439489415397, |
| "learning_rate": 9.571319233963627e-06, |
| "loss": 0.0713, |
| "num_input_tokens_seen": 4183256, |
| "step": 80, |
| "train_runtime": 413.1057, |
| "train_tokens_per_second": 10126.357 |
| }, |
| { |
| "epoch": 2.298305084745763, |
| "grad_norm": 0.45196090531442223, |
| "learning_rate": 6.951097651136889e-06, |
| "loss": 0.0633, |
| "num_input_tokens_seen": 4448016, |
| "step": 85, |
| "train_runtime": 438.6986, |
| "train_tokens_per_second": 10139.116 |
| }, |
| { |
| "epoch": 2.4338983050847456, |
| "grad_norm": 0.4885688394161113, |
| "learning_rate": 4.691720249402856e-06, |
| "loss": 0.0599, |
| "num_input_tokens_seen": 4701808, |
| "step": 90, |
| "train_runtime": 464.0382, |
| "train_tokens_per_second": 10132.373 |
| }, |
| { |
| "epoch": 2.5694915254237287, |
| "grad_norm": 0.4741771649848976, |
| "learning_rate": 2.8383578193475315e-06, |
| "loss": 0.0574, |
| "num_input_tokens_seen": 4960648, |
| "step": 95, |
| "train_runtime": 489.5242, |
| "train_tokens_per_second": 10133.612 |
| }, |
| { |
| "epoch": 2.705084745762712, |
| "grad_norm": 0.43334153315557006, |
| "learning_rate": 1.428063863472895e-06, |
| "loss": 0.0534, |
| "num_input_tokens_seen": 5224432, |
| "step": 100, |
| "train_runtime": 514.9998, |
| "train_tokens_per_second": 10144.533 |
| }, |
| { |
| "epoch": 2.840677966101695, |
| "grad_norm": 0.4532410169042894, |
| "learning_rate": 4.890338009668316e-07, |
| "loss": 0.0536, |
| "num_input_tokens_seen": 5489120, |
| "step": 105, |
| "train_runtime": 754.9559, |
| "train_tokens_per_second": 7270.782 |
| }, |
| { |
| "epoch": 2.976271186440678, |
| "grad_norm": 0.484714255010592, |
| "learning_rate": 4.004126844042444e-08, |
| "loss": 0.0559, |
| "num_input_tokens_seen": 5751896, |
| "step": 110, |
| "train_runtime": 780.4575, |
| "train_tokens_per_second": 7369.903 |
| }, |
| { |
| "epoch": 3.0, |
| "num_input_tokens_seen": 5798376, |
| "step": 111, |
| "total_flos": 11582938693632.0, |
| "train_loss": 0.22720730056365332, |
| "train_runtime": 933.6635, |
| "train_samples_per_second": 7.583, |
| "train_steps_per_second": 0.119 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 111, |
| "num_input_tokens_seen": 5798376, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 11582938693632.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|