{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 2.442070857435465, "epoch": 0.2014098690835851, "grad_norm": 0.048095703125, "learning_rate": 0.0001868180920098644, "loss": 2.5606924438476564, "mean_token_accuracy": 0.5233748442679643, "num_tokens": 153496.0, "step": 25 }, { "entropy": 1.7652787639200687, "epoch": 0.4028197381671702, "grad_norm": 0.11083984375, "learning_rate": 0.00013916603579471705, "loss": 1.7991661071777343, "mean_token_accuracy": 0.6229101818054914, "num_tokens": 302161.0, "step": 50 }, { "entropy": 1.7339335941150784, "epoch": 0.6042296072507553, "grad_norm": 0.083984375, "learning_rate": 7.558394309716088e-05, "loss": 1.7994984436035155, "mean_token_accuracy": 0.6460683711618185, "num_tokens": 449627.0, "step": 75 }, { "entropy": 1.5877810321189463, "epoch": 0.8056394763343404, "grad_norm": 0.083984375, "learning_rate": 2.1932614882827197e-05, "loss": 1.6731886291503906, "mean_token_accuracy": 0.6800274739414454, "num_tokens": 599747.0, "step": 100 }, { "entropy": 1.7318128037213352, "epoch": 1.0, "grad_norm": 0.1669921875, "learning_rate": 3.370346964876036e-08, "loss": 1.7769248962402344, "mean_token_accuracy": 0.6527638324184121, "num_tokens": 746252.0, "step": 125 } ], "logging_steps": 25, "max_steps": 125, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.1839539976351744e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }