| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 100, |
| "global_step": 252, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11940298507462686, |
| "grad_norm": 0.2085999995470047, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": 0.5328742027282715, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.23880597014925373, |
| "grad_norm": 0.17245711386203766, |
| "learning_rate": 7.307692307692307e-05, |
| "loss": 0.28172030448913576, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3582089552238806, |
| "grad_norm": 0.1451202780008316, |
| "learning_rate": 9.995652871840006e-05, |
| "loss": 0.22340788841247558, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.47761194029850745, |
| "grad_norm": 0.12613239884376526, |
| "learning_rate": 9.918580696928205e-05, |
| "loss": 0.19026432037353516, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5970149253731343, |
| "grad_norm": 0.10020812600851059, |
| "learning_rate": 9.746617916180905e-05, |
| "loss": 0.1776690363883972, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7164179104477612, |
| "grad_norm": 0.11177698522806168, |
| "learning_rate": 9.483082082658983e-05, |
| "loss": 0.16183719635009766, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.835820895522388, |
| "grad_norm": 0.11473167687654495, |
| "learning_rate": 9.133057401836159e-05, |
| "loss": 0.18729711771011354, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9552238805970149, |
| "grad_norm": 0.09883058816194534, |
| "learning_rate": 8.703296645714609e-05, |
| "loss": 0.17510976791381835, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.0716417910447762, |
| "grad_norm": 0.09465258568525314, |
| "learning_rate": 8.202090876498144e-05, |
| "loss": 0.15757997035980226, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.191044776119403, |
| "grad_norm": 0.15254873037338257, |
| "learning_rate": 7.639109493149537e-05, |
| "loss": 0.14641659259796141, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.191044776119403, |
| "eval_loss": 0.1869809627532959, |
| "eval_runtime": 13.646, |
| "eval_samples_per_second": 1.099, |
| "eval_steps_per_second": 0.586, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.31044776119403, |
| "grad_norm": 0.08413254469633102, |
| "learning_rate": 7.02521368669762e-05, |
| "loss": 0.11394469738006592, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.4298507462686567, |
| "grad_norm": 0.07326336950063705, |
| "learning_rate": 6.372246903165446e-05, |
| "loss": 0.11726579666137696, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.5492537313432835, |
| "grad_norm": 0.10139299929141998, |
| "learning_rate": 5.692806356566095e-05, |
| "loss": 0.12615565061569214, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.6686567164179105, |
| "grad_norm": 0.08335951715707779, |
| "learning_rate": 5e-05, |
| "loss": 0.11287055015563965, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.7880597014925375, |
| "grad_norm": 0.07054048031568527, |
| "learning_rate": 4.307193643433907e-05, |
| "loss": 0.10663604736328125, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.9074626865671642, |
| "grad_norm": 0.06786229461431503, |
| "learning_rate": 3.627753096834555e-05, |
| "loss": 0.107212233543396, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.023880597014925, |
| "grad_norm": 0.07071981579065323, |
| "learning_rate": 2.9747863133023803e-05, |
| "loss": 0.11782596111297608, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.1432835820895524, |
| "grad_norm": 0.09423048794269562, |
| "learning_rate": 2.3608905068504637e-05, |
| "loss": 0.0834022581577301, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.262686567164179, |
| "grad_norm": 0.09003670513629913, |
| "learning_rate": 1.7979091235018565e-05, |
| "loss": 0.08206533193588257, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.382089552238806, |
| "grad_norm": 0.09670283645391464, |
| "learning_rate": 1.2967033542853917e-05, |
| "loss": 0.08110529780387879, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.382089552238806, |
| "eval_loss": 0.1738630086183548, |
| "eval_runtime": 13.5313, |
| "eval_samples_per_second": 1.109, |
| "eval_steps_per_second": 0.591, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.5014925373134327, |
| "grad_norm": 0.09121183305978775, |
| "learning_rate": 8.669425981638413e-06, |
| "loss": 0.08261927366256713, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.62089552238806, |
| "grad_norm": 0.09510096907615662, |
| "learning_rate": 5.169179173410177e-06, |
| "loss": 0.08233516216278076, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.7402985074626867, |
| "grad_norm": 0.07058050483465195, |
| "learning_rate": 2.533820838190959e-06, |
| "loss": 0.07825937271118164, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.8597014925373134, |
| "grad_norm": 0.09087616205215454, |
| "learning_rate": 8.141930307179468e-07, |
| "loss": 0.08194915056228638, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.97910447761194, |
| "grad_norm": 0.078521728515625, |
| "learning_rate": 4.3471281599938295e-08, |
| "loss": 0.08224834203720092, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 252, |
| "total_flos": 1.3584580095246336e+16, |
| "train_loss": 0.14724678906892974, |
| "train_runtime": 4599.332, |
| "train_samples_per_second": 0.437, |
| "train_steps_per_second": 0.055 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 252, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3584580095246336e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|