{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.3042053000070155, "epoch": 0.09633911368015415, "grad_norm": 0.0284423828125, "learning_rate": 0.00019801724878485438, "loss": 1.2869200134277343, "mean_token_accuracy": 0.7155025105923414, "num_tokens": 341278.0, "step": 25 }, { "entropy": 0.7955485013616271, "epoch": 0.1926782273603083, "grad_norm": 0.02880859375, "learning_rate": 0.00018721912643966055, "loss": 0.7173126220703125, "mean_token_accuracy": 0.8372970945760607, "num_tokens": 686260.0, "step": 50 }, { "entropy": 0.47250107586733064, "epoch": 0.28901734104046245, "grad_norm": 0.032958984375, "learning_rate": 0.00016801727377709194, "loss": 0.4062905502319336, "mean_token_accuracy": 0.9038537470623851, "num_tokens": 1030728.0, "step": 75 }, { "entropy": 0.422030293449061, "epoch": 0.3853564547206166, "grad_norm": 0.031494140625, "learning_rate": 0.00014226182617406996, "loss": 0.3915938568115234, "mean_token_accuracy": 0.919415234401822, "num_tokens": 1373086.0, "step": 100 }, { "entropy": 0.28425011701183395, "epoch": 0.4816955684007707, "grad_norm": 0.038330078125, "learning_rate": 0.00011243437046474853, "loss": 0.2796792221069336, "mean_token_accuracy": 0.9474510619416833, "num_tokens": 1719238.0, "step": 125 }, { "entropy": 0.26716867173672654, "epoch": 0.5780346820809249, "grad_norm": 0.0118408203125, "learning_rate": 8.140883928370855e-05, "loss": 0.2717741394042969, "mean_token_accuracy": 0.9527556761726736, "num_tokens": 2061582.0, "step": 150 }, { "entropy": 0.18072416053852067, "epoch": 0.674373795761079, "grad_norm": 0.02587890625, "learning_rate": 5.217460213786821e-05, "loss": 0.17744203567504882, "mean_token_accuracy": 0.9676880412921309, "num_tokens": 2401754.0, "step": 175 }, { "entropy": 0.20998084332444705, "epoch": 0.7707129094412332, "grad_norm": 0.01416015625, "learning_rate": 2.7548433914072734e-05, "loss": 0.21284116744995119, "mean_token_accuracy": 0.9640939806401729, "num_tokens": 2745913.0, "step": 200 }, { "entropy": 0.19917077658581547, "epoch": 0.8670520231213873, "grad_norm": 0.01226806640625, "learning_rate": 9.903113209758096e-06, "loss": 0.2105904006958008, "mean_token_accuracy": 0.9653056095913053, "num_tokens": 3084344.0, "step": 225 }, { "entropy": 0.26477424080716444, "epoch": 0.9633911368015414, "grad_norm": 0.01068115234375, "learning_rate": 9.388005586947191e-07, "loss": 0.28103506088256835, "mean_token_accuracy": 0.9547687808051706, "num_tokens": 3427378.0, "step": 250 } ], "logging_steps": 25, "max_steps": 260, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.519335256379996e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }