{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1062534587714443, "eval_steps": 100, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22136137244050913, "grad_norm": 1.6171875, "learning_rate": 0.00017809734513274337, "loss": 1.2949, "step": 100 }, { "epoch": 0.22136137244050913, "eval_loss": 1.2047334909439087, "eval_mean_token_accuracy": 0.7032503170787163, "eval_num_tokens": 1635208.0, "eval_runtime": 55.1857, "eval_samples_per_second": 23.031, "eval_steps_per_second": 2.881, "step": 100 }, { "epoch": 0.44272274488101826, "grad_norm": 1.5234375, "learning_rate": 0.00015597345132743363, "loss": 1.187, "step": 200 }, { "epoch": 0.44272274488101826, "eval_loss": 1.1497339010238647, "eval_mean_token_accuracy": 0.7146404561756542, "eval_num_tokens": 3271888.0, "eval_runtime": 54.9525, "eval_samples_per_second": 23.129, "eval_steps_per_second": 2.893, "step": 200 }, { "epoch": 0.6640841173215274, "grad_norm": 1.4140625, "learning_rate": 0.00013384955752212388, "loss": 1.1394, "step": 300 }, { "epoch": 0.6640841173215274, "eval_loss": 1.1108253002166748, "eval_mean_token_accuracy": 0.7227962376936427, "eval_num_tokens": 4908096.0, "eval_runtime": 54.9495, "eval_samples_per_second": 23.13, "eval_steps_per_second": 2.894, "step": 300 }, { "epoch": 0.8854454897620365, "grad_norm": 1.484375, "learning_rate": 0.00011172566371681417, "loss": 1.0831, "step": 400 }, { "epoch": 0.8854454897620365, "eval_loss": 1.0701380968093872, "eval_mean_token_accuracy": 0.7318694584024777, "eval_num_tokens": 6544993.0, "eval_runtime": 54.9634, "eval_samples_per_second": 23.124, "eval_steps_per_second": 2.893, "step": 400 }, { "epoch": 1.1062534587714443, "grad_norm": 1.296875, "learning_rate": 8.960176991150443e-05, "loss": 0.8872, "step": 500 }, { "epoch": 1.1062534587714443, "eval_loss": 1.084444284439087, "eval_mean_token_accuracy": 0.735802007921087, "eval_num_tokens": 8174028.0, "eval_runtime": 54.9918, "eval_samples_per_second": 23.113, "eval_steps_per_second": 2.891, "step": 500 } ], "logging_steps": 100, "max_steps": 904, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.163091277296435e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }