| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.3699849693606197, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.018499248468030985, |
| "grad_norm": 2.7567262649536133, |
| "learning_rate": 9.9e-06, |
| "loss": 2.3993, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03699849693606197, |
| "grad_norm": 2.469092845916748, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 1.8863, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05549774540409296, |
| "grad_norm": 2.287935256958008, |
| "learning_rate": 1.987638906230491e-05, |
| "loss": 1.6964, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07399699387212394, |
| "grad_norm": 2.158879518508911, |
| "learning_rate": 1.9751529529279564e-05, |
| "loss": 1.6172, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09249624234015492, |
| "grad_norm": 2.3301937580108643, |
| "learning_rate": 1.9626669996254215e-05, |
| "loss": 1.5604, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.09249624234015492, |
| "eval_loss": 1.5311822891235352, |
| "eval_runtime": 836.5935, |
| "eval_samples_per_second": 22.154, |
| "eval_steps_per_second": 11.077, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11099549080818592, |
| "grad_norm": 1.958460807800293, |
| "learning_rate": 1.950181046322887e-05, |
| "loss": 1.5221, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1294947392762169, |
| "grad_norm": 2.088949680328369, |
| "learning_rate": 1.9376950930203523e-05, |
| "loss": 1.5003, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.14799398774424788, |
| "grad_norm": 2.262972593307495, |
| "learning_rate": 1.9252091397178178e-05, |
| "loss": 1.4501, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.16649323621227888, |
| "grad_norm": 1.8716174364089966, |
| "learning_rate": 1.912723186415283e-05, |
| "loss": 1.4359, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.18499248468030985, |
| "grad_norm": 1.6942836046218872, |
| "learning_rate": 1.9002372331127483e-05, |
| "loss": 1.4317, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.18499248468030985, |
| "eval_loss": 1.4133305549621582, |
| "eval_runtime": 838.9381, |
| "eval_samples_per_second": 22.092, |
| "eval_steps_per_second": 11.046, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.20349173314834085, |
| "grad_norm": 2.1511948108673096, |
| "learning_rate": 1.8877512798102137e-05, |
| "loss": 1.4053, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.22199098161637185, |
| "grad_norm": 1.7339736223220825, |
| "learning_rate": 1.875265326507679e-05, |
| "loss": 1.3915, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.24049023008440282, |
| "grad_norm": 1.9543105363845825, |
| "learning_rate": 1.8627793732051446e-05, |
| "loss": 1.3825, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.2589894785524338, |
| "grad_norm": 1.7334178686141968, |
| "learning_rate": 1.8502934199026097e-05, |
| "loss": 1.3708, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.2774887270204648, |
| "grad_norm": 1.5893480777740479, |
| "learning_rate": 1.837807466600075e-05, |
| "loss": 1.3594, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.2774887270204648, |
| "eval_loss": 1.3516819477081299, |
| "eval_runtime": 838.2447, |
| "eval_samples_per_second": 22.11, |
| "eval_steps_per_second": 11.055, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.29598797548849576, |
| "grad_norm": 1.9415546655654907, |
| "learning_rate": 1.8253215132975405e-05, |
| "loss": 1.3602, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3144872239565268, |
| "grad_norm": 1.6256144046783447, |
| "learning_rate": 1.812835559995006e-05, |
| "loss": 1.3297, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.33298647242455776, |
| "grad_norm": 1.6166146993637085, |
| "learning_rate": 1.800349606692471e-05, |
| "loss": 1.3273, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.3514857208925887, |
| "grad_norm": 1.7110105752944946, |
| "learning_rate": 1.7878636533899365e-05, |
| "loss": 1.2984, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.3699849693606197, |
| "grad_norm": 1.4126442670822144, |
| "learning_rate": 1.7753777000874016e-05, |
| "loss": 1.3113, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.3699849693606197, |
| "eval_loss": 1.3114128112792969, |
| "eval_runtime": 836.695, |
| "eval_samples_per_second": 22.151, |
| "eval_steps_per_second": 11.076, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 16218, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.6722690048e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|