{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9996599410564497, "eval_steps": 500, "global_step": 2205, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09068238494672409, "grad_norm": 5.644063472747803, "learning_rate": 1.9395039322444043e-05, "loss": 3.3805, "step": 100 }, { "epoch": 0.18136476989344819, "grad_norm": 5.796308517456055, "learning_rate": 1.8790078644888082e-05, "loss": 3.0439, "step": 200 }, { "epoch": 0.2720471548401723, "grad_norm": 5.5042619705200195, "learning_rate": 1.8185117967332124e-05, "loss": 2.855, "step": 300 }, { "epoch": 0.36272953978689637, "grad_norm": 5.634021282196045, "learning_rate": 1.7580157289776165e-05, "loss": 2.7331, "step": 400 }, { "epoch": 0.4534119247336205, "grad_norm": 5.445876121520996, "learning_rate": 1.6975196612220207e-05, "loss": 2.6605, "step": 500 }, { "epoch": 0.5440943096803446, "grad_norm": 4.866733551025391, "learning_rate": 1.637023593466425e-05, "loss": 2.5331, "step": 600 }, { "epoch": 0.6347766946270686, "grad_norm": 5.359363079071045, "learning_rate": 1.576527525710829e-05, "loss": 2.5295, "step": 700 }, { "epoch": 0.7254590795737927, "grad_norm": 5.927916049957275, "learning_rate": 1.516031457955233e-05, "loss": 2.4087, "step": 800 }, { "epoch": 0.8161414645205168, "grad_norm": 4.596010684967041, "learning_rate": 1.4555353901996372e-05, "loss": 2.3699, "step": 900 }, { "epoch": 0.906823849467241, "grad_norm": 4.6903157234191895, "learning_rate": 1.3950393224440413e-05, "loss": 2.3565, "step": 1000 }, { "epoch": 0.9975062344139651, "grad_norm": 4.985106945037842, "learning_rate": 1.3345432546884454e-05, "loss": 2.3568, "step": 1100 }, { "epoch": 1.0883019723418725, "grad_norm": 5.238546371459961, "learning_rate": 1.2740471869328494e-05, "loss": 1.2399, "step": 1200 }, { "epoch": 1.1789843572885967, "grad_norm": 5.630727291107178, "learning_rate": 1.2135511191772536e-05, "loss": 1.2032, "step": 1300 }, { "epoch": 1.2696667422353207, "grad_norm": 5.714221000671387, "learning_rate": 1.1530550514216576e-05, "loss": 1.1826, "step": 1400 }, { "epoch": 1.360349127182045, "grad_norm": 6.117649555206299, "learning_rate": 1.0925589836660618e-05, "loss": 1.2095, "step": 1500 }, { "epoch": 1.451031512128769, "grad_norm": 6.070336818695068, "learning_rate": 1.0320629159104658e-05, "loss": 1.1953, "step": 1600 }, { "epoch": 1.5417138970754931, "grad_norm": 5.351612091064453, "learning_rate": 9.7156684815487e-06, "loss": 1.1894, "step": 1700 }, { "epoch": 1.632396282022217, "grad_norm": 5.272965431213379, "learning_rate": 9.110707803992742e-06, "loss": 1.1729, "step": 1800 }, { "epoch": 1.7230786669689413, "grad_norm": 5.650433540344238, "learning_rate": 8.505747126436782e-06, "loss": 1.1763, "step": 1900 }, { "epoch": 1.8137610519156655, "grad_norm": 4.901707172393799, "learning_rate": 7.900786448880823e-06, "loss": 1.1432, "step": 2000 }, { "epoch": 1.9044434368623895, "grad_norm": 5.2121405601501465, "learning_rate": 7.295825771324865e-06, "loss": 1.15, "step": 2100 }, { "epoch": 1.9951258218091135, "grad_norm": 5.46393346786499, "learning_rate": 6.690865093768906e-06, "loss": 1.1764, "step": 2200 } ], "logging_steps": 100, "max_steps": 3306, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.412969971426263e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }