{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 171, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.4910585768520832, "epoch": 0.14700477765527378, "grad_norm": 0.0242919921875, "learning_rate": 0.00019418443636395248, "loss": 1.5326654052734374, "mean_token_accuracy": 0.6706722094118596, "num_tokens": 304280.0, "step": 25 }, { "entropy": 0.750638470617123, "epoch": 0.29400955531054757, "grad_norm": 0.0322265625, "learning_rate": 0.00016831579129369346, "loss": 0.709859619140625, "mean_token_accuracy": 0.8301207780092955, "num_tokens": 606254.0, "step": 50 }, { "entropy": 0.3453210424515419, "epoch": 0.4410143329658214, "grad_norm": 0.033447265625, "learning_rate": 0.0001272585576455398, "loss": 0.31636102676391603, "mean_token_accuracy": 0.9236685149371624, "num_tokens": 906841.0, "step": 75 }, { "entropy": 0.27239121608203276, "epoch": 0.5880191106210951, "grad_norm": 0.030517578125, "learning_rate": 8.014095333542548e-05, "loss": 0.24910541534423827, "mean_token_accuracy": 0.9407559935003519, "num_tokens": 1210327.0, "step": 100 }, { "entropy": 0.23772386389784514, "epoch": 0.735023888276369, "grad_norm": 0.019287109375, "learning_rate": 3.7438593050527845e-05, "loss": 0.21553144454956055, "mean_token_accuracy": 0.9497649263590574, "num_tokens": 1511549.0, "step": 125 }, { "entropy": 0.19384966955753044, "epoch": 0.8820286659316428, "grad_norm": 0.0228271484375, "learning_rate": 8.645454235739903e-06, "loss": 0.1665646743774414, "mean_token_accuracy": 0.9595617589354515, "num_tokens": 1815980.0, "step": 150 } ], "logging_steps": 25, "max_steps": 171, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.763830690536858e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }