| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 260, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.3042053000070155, |
| "epoch": 0.09633911368015415, |
| "grad_norm": 0.0284423828125, |
| "learning_rate": 0.00019801724878485438, |
| "loss": 1.2869200134277343, |
| "mean_token_accuracy": 0.7155025105923414, |
| "num_tokens": 341278.0, |
| "step": 25 |
| }, |
| { |
| "entropy": 0.7955485013616271, |
| "epoch": 0.1926782273603083, |
| "grad_norm": 0.02880859375, |
| "learning_rate": 0.00018721912643966055, |
| "loss": 0.7173126220703125, |
| "mean_token_accuracy": 0.8372970945760607, |
| "num_tokens": 686260.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 0.47250107586733064, |
| "epoch": 0.28901734104046245, |
| "grad_norm": 0.032958984375, |
| "learning_rate": 0.00016801727377709194, |
| "loss": 0.4062905502319336, |
| "mean_token_accuracy": 0.9038537470623851, |
| "num_tokens": 1030728.0, |
| "step": 75 |
| }, |
| { |
| "entropy": 0.422030293449061, |
| "epoch": 0.3853564547206166, |
| "grad_norm": 0.031494140625, |
| "learning_rate": 0.00014226182617406996, |
| "loss": 0.3915938568115234, |
| "mean_token_accuracy": 0.919415234401822, |
| "num_tokens": 1373086.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.28425011701183395, |
| "epoch": 0.4816955684007707, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 0.00011243437046474853, |
| "loss": 0.2796792221069336, |
| "mean_token_accuracy": 0.9474510619416833, |
| "num_tokens": 1719238.0, |
| "step": 125 |
| }, |
| { |
| "entropy": 0.26716867173672654, |
| "epoch": 0.5780346820809249, |
| "grad_norm": 0.0118408203125, |
| "learning_rate": 8.140883928370855e-05, |
| "loss": 0.2717741394042969, |
| "mean_token_accuracy": 0.9527556761726736, |
| "num_tokens": 2061582.0, |
| "step": 150 |
| }, |
| { |
| "entropy": 0.18072416053852067, |
| "epoch": 0.674373795761079, |
| "grad_norm": 0.02587890625, |
| "learning_rate": 5.217460213786821e-05, |
| "loss": 0.17744203567504882, |
| "mean_token_accuracy": 0.9676880412921309, |
| "num_tokens": 2401754.0, |
| "step": 175 |
| }, |
| { |
| "entropy": 0.20998084332444705, |
| "epoch": 0.7707129094412332, |
| "grad_norm": 0.01416015625, |
| "learning_rate": 2.7548433914072734e-05, |
| "loss": 0.21284116744995119, |
| "mean_token_accuracy": 0.9640939806401729, |
| "num_tokens": 2745913.0, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.19917077658581547, |
| "epoch": 0.8670520231213873, |
| "grad_norm": 0.01226806640625, |
| "learning_rate": 9.903113209758096e-06, |
| "loss": 0.2105904006958008, |
| "mean_token_accuracy": 0.9653056095913053, |
| "num_tokens": 3084344.0, |
| "step": 225 |
| }, |
| { |
| "entropy": 0.26477424080716444, |
| "epoch": 0.9633911368015414, |
| "grad_norm": 0.01068115234375, |
| "learning_rate": 9.388005586947191e-07, |
| "loss": 0.28103506088256835, |
| "mean_token_accuracy": 0.9547687808051706, |
| "num_tokens": 3427378.0, |
| "step": 250 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 260, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.519335256379996e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|