{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 267, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.8215227667987346, "epoch": 0.0937426763534099, "grad_norm": 0.0191650390625, "learning_rate": 0.00019833656768294662, "loss": 1.8891368103027344, "mean_token_accuracy": 0.5957115419581532, "num_tokens": 317950.0, "step": 25 }, { "entropy": 1.5572059528529645, "epoch": 0.1874853527068198, "grad_norm": 0.04345703125, "learning_rate": 0.00018837086450537193, "loss": 1.5083665466308593, "mean_token_accuracy": 0.6573587663751096, "num_tokens": 621644.0, "step": 50 }, { "entropy": 1.1745702652819454, "epoch": 0.28122802906022965, "grad_norm": 0.05126953125, "learning_rate": 0.00017027885831450318, "loss": 1.1137271881103517, "mean_token_accuracy": 0.7345353902876377, "num_tokens": 932160.0, "step": 75 }, { "entropy": 0.9317568638548255, "epoch": 0.3749707054136396, "grad_norm": 0.034423828125, "learning_rate": 0.00014572423233046386, "loss": 0.8523539733886719, "mean_token_accuracy": 0.8004629289358854, "num_tokens": 1238125.0, "step": 100 }, { "entropy": 0.7057386192120612, "epoch": 0.46871338176704946, "grad_norm": 0.054931640625, "learning_rate": 0.00011696495168962847, "loss": 0.6438381958007813, "mean_token_accuracy": 0.8506827702745795, "num_tokens": 1548587.0, "step": 125 }, { "entropy": 0.6799281437788158, "epoch": 0.5624560581204593, "grad_norm": 0.05126953125, "learning_rate": 8.664562816806022e-05, "loss": 0.624417495727539, "mean_token_accuracy": 0.8625156116485596, "num_tokens": 1856691.0, "step": 150 }, { "entropy": 0.6683373341057449, "epoch": 0.6561987344738692, "grad_norm": 0.032470703125, "learning_rate": 5.755433011241851e-05, "loss": 0.6324382781982422, "mean_token_accuracy": 0.8631715876888484, "num_tokens": 2165370.0, "step": 175 }, { "entropy": 0.522582174600102, "epoch": 0.7499414108272792, "grad_norm": 0.0257568359375, "learning_rate": 3.236620056190972e-05, "loss": 0.4893897247314453, "mean_token_accuracy": 0.8958207304775715, "num_tokens": 2477308.0, "step": 200 }, { "entropy": 0.5211648133769632, "epoch": 0.843684087180689, "grad_norm": 0.02978515625, "learning_rate": 1.339745962155613e-05, "loss": 0.4698557662963867, "mean_token_accuracy": 0.897075667232275, "num_tokens": 2780765.0, "step": 225 }, { "entropy": 0.5598523593321443, "epoch": 0.9374267635340989, "grad_norm": 0.0281982421875, "learning_rate": 2.392412244407294e-06, "loss": 0.5123196792602539, "mean_token_accuracy": 0.8904222106188535, "num_tokens": 3094212.0, "step": 250 } ], "logging_steps": 25, "max_steps": 267, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4085395007888691e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }