| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9004832647216753, |
| "eval_steps": 1677, |
| "global_step": 15093, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9403376886820598e-05, |
| "loss": 3.0319, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.8806753773641194e-05, |
| "loss": 2.8898, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.8210130660461786e-05, |
| "loss": 2.8471, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_f1": 0.09119675351283209, |
| "eval_loss": 2.799565315246582, |
| "eval_runtime": 565.0093, |
| "eval_samples_per_second": 210.945, |
| "eval_steps_per_second": 3.297, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.7613507547282383e-05, |
| "loss": 2.826, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.701688443410298e-05, |
| "loss": 2.8064, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.6420261320923575e-05, |
| "loss": 2.7912, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_f1": 0.09886291342405676, |
| "eval_loss": 2.7552475929260254, |
| "eval_runtime": 565.8136, |
| "eval_samples_per_second": 210.645, |
| "eval_steps_per_second": 3.293, |
| "step": 3354 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.582363820774417e-05, |
| "loss": 2.769, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.5227015094564765e-05, |
| "loss": 2.7626, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.463039198138536e-05, |
| "loss": 2.7493, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.4033768868205956e-05, |
| "loss": 2.7485, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_f1": 0.10934945582171454, |
| "eval_loss": 2.701012134552002, |
| "eval_runtime": 563.4598, |
| "eval_samples_per_second": 211.525, |
| "eval_steps_per_second": 3.306, |
| "step": 5031 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.343714575502655e-05, |
| "loss": 2.7355, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.2840522641847146e-05, |
| "loss": 2.7254, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.2243899528667742e-05, |
| "loss": 2.7163, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_f1": 0.11683918795621939, |
| "eval_loss": 2.6925740242004395, |
| "eval_runtime": 561.7517, |
| "eval_samples_per_second": 212.168, |
| "eval_steps_per_second": 3.316, |
| "step": 6708 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.1647276415488338e-05, |
| "loss": 2.7101, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.1050653302308931e-05, |
| "loss": 2.7063, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.0454030189129527e-05, |
| "loss": 2.7037, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_f1": 0.1264069487053307, |
| "eval_loss": 2.657057762145996, |
| "eval_runtime": 557.0113, |
| "eval_samples_per_second": 213.974, |
| "eval_steps_per_second": 3.345, |
| "step": 8385 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.857407075950123e-06, |
| "loss": 2.6922, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.260783962770719e-06, |
| "loss": 2.6857, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.664160849591315e-06, |
| "loss": 2.6796, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 8.06753773641191e-06, |
| "loss": 2.6686, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_f1": 0.1275429879642049, |
| "eval_loss": 2.6493475437164307, |
| "eval_runtime": 556.9366, |
| "eval_samples_per_second": 214.003, |
| "eval_steps_per_second": 3.345, |
| "step": 10062 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 7.470914623232505e-06, |
| "loss": 2.6798, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 6.874291510053101e-06, |
| "loss": 2.6528, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 6.277668396873695e-06, |
| "loss": 2.6469, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_f1": 0.1396037444667752, |
| "eval_loss": 2.620657444000244, |
| "eval_runtime": 557.3053, |
| "eval_samples_per_second": 213.861, |
| "eval_steps_per_second": 3.343, |
| "step": 11739 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 5.681045283694291e-06, |
| "loss": 2.6569, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 5.0844221705148865e-06, |
| "loss": 2.6472, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 4.487799057335481e-06, |
| "loss": 2.6507, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_f1": 0.1391354358365679, |
| "eval_loss": 2.6126439571380615, |
| "eval_runtime": 556.9611, |
| "eval_samples_per_second": 213.993, |
| "eval_steps_per_second": 3.345, |
| "step": 13416 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 3.891175944156077e-06, |
| "loss": 2.6297, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.294552830976672e-06, |
| "loss": 2.6364, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 2.6979297177972674e-06, |
| "loss": 2.6287, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 2.101306604617863e-06, |
| "loss": 2.6269, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_f1": 0.14388305284850214, |
| "eval_loss": 2.6065549850463867, |
| "eval_runtime": 556.552, |
| "eval_samples_per_second": 214.151, |
| "eval_steps_per_second": 3.347, |
| "step": 15093 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 16761, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1677, |
| "total_flos": 4.1849662176682214e+17, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|