| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 30.303030303030305, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.6655681133270264, |
| "eval_runtime": 31.9535, |
| "eval_samples_per_second": 16.242, |
| "eval_steps_per_second": 1.033, |
| "step": 33 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.34202826023101807, |
| "eval_runtime": 32.6272, |
| "eval_samples_per_second": 15.907, |
| "eval_steps_per_second": 1.011, |
| "step": 66 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.1993253082036972, |
| "eval_runtime": 34.1947, |
| "eval_samples_per_second": 15.178, |
| "eval_steps_per_second": 0.965, |
| "step": 99 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.12108779698610306, |
| "eval_runtime": 33.5003, |
| "eval_samples_per_second": 15.492, |
| "eval_steps_per_second": 0.985, |
| "step": 132 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.08059267699718475, |
| "eval_runtime": 33.0363, |
| "eval_samples_per_second": 15.71, |
| "eval_steps_per_second": 0.999, |
| "step": 165 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.0539543516933918, |
| "eval_runtime": 33.3191, |
| "eval_samples_per_second": 15.577, |
| "eval_steps_per_second": 0.99, |
| "step": 198 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.033514220267534256, |
| "eval_runtime": 33.9315, |
| "eval_samples_per_second": 15.296, |
| "eval_steps_per_second": 0.973, |
| "step": 231 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.02793893776834011, |
| "eval_runtime": 33.1021, |
| "eval_samples_per_second": 15.679, |
| "eval_steps_per_second": 0.997, |
| "step": 264 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.017579322680830956, |
| "eval_runtime": 33.1929, |
| "eval_samples_per_second": 15.636, |
| "eval_steps_per_second": 0.994, |
| "step": 297 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.017992401495575905, |
| "eval_runtime": 33.5989, |
| "eval_samples_per_second": 15.447, |
| "eval_steps_per_second": 0.982, |
| "step": 330 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 0.011186002753674984, |
| "eval_runtime": 33.2114, |
| "eval_samples_per_second": 15.627, |
| "eval_steps_per_second": 0.994, |
| "step": 363 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.009413644671440125, |
| "eval_runtime": 33.6973, |
| "eval_samples_per_second": 15.402, |
| "eval_steps_per_second": 0.979, |
| "step": 396 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.008357277140021324, |
| "eval_runtime": 34.2116, |
| "eval_samples_per_second": 15.17, |
| "eval_steps_per_second": 0.965, |
| "step": 429 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.006698057986795902, |
| "eval_runtime": 32.3567, |
| "eval_samples_per_second": 16.04, |
| "eval_steps_per_second": 1.02, |
| "step": 462 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 0.005631112959235907, |
| "eval_runtime": 30.3383, |
| "eval_samples_per_second": 17.107, |
| "eval_steps_per_second": 1.088, |
| "step": 495 |
| }, |
| { |
| "epoch": 15.15, |
| "learning_rate": 1.2424242424242425e-05, |
| "loss": 0.1575, |
| "step": 500 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.00457022013142705, |
| "eval_runtime": 30.6053, |
| "eval_samples_per_second": 16.958, |
| "eval_steps_per_second": 1.078, |
| "step": 528 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 0.005157523322850466, |
| "eval_runtime": 30.0767, |
| "eval_samples_per_second": 17.256, |
| "eval_steps_per_second": 1.097, |
| "step": 561 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 0.0044335490092635155, |
| "eval_runtime": 29.0299, |
| "eval_samples_per_second": 17.878, |
| "eval_steps_per_second": 1.137, |
| "step": 594 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 0.003722449066117406, |
| "eval_runtime": 28.4937, |
| "eval_samples_per_second": 18.215, |
| "eval_steps_per_second": 1.158, |
| "step": 627 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.004425578285008669, |
| "eval_runtime": 32.1425, |
| "eval_samples_per_second": 16.147, |
| "eval_steps_per_second": 1.027, |
| "step": 660 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_loss": 0.0040681445971131325, |
| "eval_runtime": 28.8069, |
| "eval_samples_per_second": 18.017, |
| "eval_steps_per_second": 1.146, |
| "step": 693 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_loss": 0.003019771073013544, |
| "eval_runtime": 28.6404, |
| "eval_samples_per_second": 18.121, |
| "eval_steps_per_second": 1.152, |
| "step": 726 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_loss": 0.002829624805599451, |
| "eval_runtime": 29.787, |
| "eval_samples_per_second": 17.424, |
| "eval_steps_per_second": 1.108, |
| "step": 759 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 0.002751641208305955, |
| "eval_runtime": 28.377, |
| "eval_samples_per_second": 18.289, |
| "eval_steps_per_second": 1.163, |
| "step": 792 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 0.002945221494883299, |
| "eval_runtime": 29.5958, |
| "eval_samples_per_second": 17.536, |
| "eval_steps_per_second": 1.115, |
| "step": 825 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_loss": 0.0026160639245063066, |
| "eval_runtime": 29.0161, |
| "eval_samples_per_second": 17.887, |
| "eval_steps_per_second": 1.137, |
| "step": 858 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_loss": 0.002537393243983388, |
| "eval_runtime": 28.4904, |
| "eval_samples_per_second": 18.217, |
| "eval_steps_per_second": 1.158, |
| "step": 891 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_loss": 0.00242584478110075, |
| "eval_runtime": 29.437, |
| "eval_samples_per_second": 17.631, |
| "eval_steps_per_second": 1.121, |
| "step": 924 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_loss": 0.0026495754718780518, |
| "eval_runtime": 28.3889, |
| "eval_samples_per_second": 18.282, |
| "eval_steps_per_second": 1.162, |
| "step": 957 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_loss": 0.0023259243462234735, |
| "eval_runtime": 28.1977, |
| "eval_samples_per_second": 18.406, |
| "eval_steps_per_second": 1.17, |
| "step": 990 |
| }, |
| { |
| "epoch": 30.3, |
| "learning_rate": 4.848484848484849e-06, |
| "loss": 0.0065, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 1320, |
| "num_train_epochs": 40, |
| "save_steps": 500, |
| "total_flos": 278856790097838.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|