| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.001779061865097297, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 8.895309325486485e-05, |
| "grad_norm": 0.1897999793291092, |
| "learning_rate": 2.5e-05, |
| "loss": 2.3044, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0001779061865097297, |
| "grad_norm": 0.23376008868217468, |
| "learning_rate": 2.5e-05, |
| "loss": 2.3592, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0002668592797645945, |
| "grad_norm": 0.13603591918945312, |
| "learning_rate": 2.5e-05, |
| "loss": 2.084, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0003558123730194594, |
| "grad_norm": 0.34403547644615173, |
| "learning_rate": 2.5e-05, |
| "loss": 2.076, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00044476546627432423, |
| "grad_norm": 0.1530287265777588, |
| "learning_rate": 2.5e-05, |
| "loss": 1.7806, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.000533718559529189, |
| "grad_norm": 0.344722718000412, |
| "learning_rate": 2.5e-05, |
| "loss": 1.5749, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0006226716527840539, |
| "grad_norm": 0.38181471824645996, |
| "learning_rate": 2.5e-05, |
| "loss": 1.5243, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0007116247460389188, |
| "grad_norm": 0.25272616744041443, |
| "learning_rate": 2.5e-05, |
| "loss": 1.4728, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0008005778392937836, |
| "grad_norm": 0.6237773299217224, |
| "learning_rate": 2.5e-05, |
| "loss": 1.3028, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.0008895309325486485, |
| "grad_norm": 0.5120233297348022, |
| "learning_rate": 2.5e-05, |
| "loss": 1.0721, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0009784840258035134, |
| "grad_norm": 0.6732835173606873, |
| "learning_rate": 2.5e-05, |
| "loss": 0.8071, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.001067437119058378, |
| "grad_norm": 0.5018543004989624, |
| "learning_rate": 2.5e-05, |
| "loss": 0.8138, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.001156390212313243, |
| "grad_norm": 0.24052944779396057, |
| "learning_rate": 2.5e-05, |
| "loss": 0.702, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.0012453433055681078, |
| "grad_norm": 0.2696482837200165, |
| "learning_rate": 2.5e-05, |
| "loss": 0.6689, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0013342963988229727, |
| "grad_norm": 0.21222035586833954, |
| "learning_rate": 2.5e-05, |
| "loss": 0.7498, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.0014232494920778376, |
| "grad_norm": 0.27624765038490295, |
| "learning_rate": 2.5e-05, |
| "loss": 0.6544, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0015122025853327023, |
| "grad_norm": 0.8518249988555908, |
| "learning_rate": 2.5e-05, |
| "loss": 0.6908, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.0016011556785875672, |
| "grad_norm": 0.588943600654602, |
| "learning_rate": 2.5e-05, |
| "loss": 0.7507, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.001690108771842432, |
| "grad_norm": 0.4197629690170288, |
| "learning_rate": 2.5e-05, |
| "loss": 0.8013, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.001779061865097297, |
| "grad_norm": 0.48924073576927185, |
| "learning_rate": 2.5e-05, |
| "loss": 0.6292, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.087597314048e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|