{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 252, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11940298507462686, "grad_norm": 0.2085999995470047, "learning_rate": 3.461538461538462e-05, "loss": 0.5328742027282715, "step": 10 }, { "epoch": 0.23880597014925373, "grad_norm": 0.17245711386203766, "learning_rate": 7.307692307692307e-05, "loss": 0.28172030448913576, "step": 20 }, { "epoch": 0.3582089552238806, "grad_norm": 0.1451202780008316, "learning_rate": 9.995652871840006e-05, "loss": 0.22340788841247558, "step": 30 }, { "epoch": 0.47761194029850745, "grad_norm": 0.12613239884376526, "learning_rate": 9.918580696928205e-05, "loss": 0.19026432037353516, "step": 40 }, { "epoch": 0.5970149253731343, "grad_norm": 0.10020812600851059, "learning_rate": 9.746617916180905e-05, "loss": 0.1776690363883972, "step": 50 }, { "epoch": 0.7164179104477612, "grad_norm": 0.11177698522806168, "learning_rate": 9.483082082658983e-05, "loss": 0.16183719635009766, "step": 60 }, { "epoch": 0.835820895522388, "grad_norm": 0.11473167687654495, "learning_rate": 9.133057401836159e-05, "loss": 0.18729711771011354, "step": 70 }, { "epoch": 0.9552238805970149, "grad_norm": 0.09883058816194534, "learning_rate": 8.703296645714609e-05, "loss": 0.17510976791381835, "step": 80 }, { "epoch": 1.0716417910447762, "grad_norm": 0.09465258568525314, "learning_rate": 8.202090876498144e-05, "loss": 0.15757997035980226, "step": 90 }, { "epoch": 1.191044776119403, "grad_norm": 0.15254873037338257, "learning_rate": 7.639109493149537e-05, "loss": 0.14641659259796141, "step": 100 }, { "epoch": 1.191044776119403, "eval_loss": 0.1869809627532959, "eval_runtime": 13.646, "eval_samples_per_second": 1.099, "eval_steps_per_second": 0.586, "step": 100 }, { "epoch": 1.31044776119403, "grad_norm": 0.08413254469633102, "learning_rate": 7.02521368669762e-05, "loss": 0.11394469738006592, "step": 110 }, { "epoch": 1.4298507462686567, "grad_norm": 0.07326336950063705, "learning_rate": 6.372246903165446e-05, "loss": 0.11726579666137696, "step": 120 }, { "epoch": 1.5492537313432835, "grad_norm": 0.10139299929141998, "learning_rate": 5.692806356566095e-05, "loss": 0.12615565061569214, "step": 130 }, { "epoch": 1.6686567164179105, "grad_norm": 0.08335951715707779, "learning_rate": 5e-05, "loss": 0.11287055015563965, "step": 140 }, { "epoch": 1.7880597014925375, "grad_norm": 0.07054048031568527, "learning_rate": 4.307193643433907e-05, "loss": 0.10663604736328125, "step": 150 }, { "epoch": 1.9074626865671642, "grad_norm": 0.06786229461431503, "learning_rate": 3.627753096834555e-05, "loss": 0.107212233543396, "step": 160 }, { "epoch": 2.023880597014925, "grad_norm": 0.07071981579065323, "learning_rate": 2.9747863133023803e-05, "loss": 0.11782596111297608, "step": 170 }, { "epoch": 2.1432835820895524, "grad_norm": 0.09423048794269562, "learning_rate": 2.3608905068504637e-05, "loss": 0.0834022581577301, "step": 180 }, { "epoch": 2.262686567164179, "grad_norm": 0.09003670513629913, "learning_rate": 1.7979091235018565e-05, "loss": 0.08206533193588257, "step": 190 }, { "epoch": 2.382089552238806, "grad_norm": 0.09670283645391464, "learning_rate": 1.2967033542853917e-05, "loss": 0.08110529780387879, "step": 200 }, { "epoch": 2.382089552238806, "eval_loss": 0.1738630086183548, "eval_runtime": 13.5313, "eval_samples_per_second": 1.109, "eval_steps_per_second": 0.591, "step": 200 }, { "epoch": 2.5014925373134327, "grad_norm": 0.09121183305978775, "learning_rate": 8.669425981638413e-06, "loss": 0.08261927366256713, "step": 210 }, { "epoch": 2.62089552238806, "grad_norm": 0.09510096907615662, "learning_rate": 5.169179173410177e-06, "loss": 0.08233516216278076, "step": 220 }, { "epoch": 2.7402985074626867, "grad_norm": 0.07058050483465195, "learning_rate": 2.533820838190959e-06, "loss": 0.07825937271118164, "step": 230 }, { "epoch": 2.8597014925373134, "grad_norm": 0.09087616205215454, "learning_rate": 8.141930307179468e-07, "loss": 0.08194915056228638, "step": 240 }, { "epoch": 2.97910447761194, "grad_norm": 0.078521728515625, "learning_rate": 4.3471281599938295e-08, "loss": 0.08224834203720092, "step": 250 }, { "epoch": 3.0, "step": 252, "total_flos": 1.3584580095246336e+16, "train_loss": 0.14724678906892974, "train_runtime": 4599.332, "train_samples_per_second": 0.437, "train_steps_per_second": 0.055 } ], "logging_steps": 10, "max_steps": 252, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3584580095246336e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }