{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 377, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "entropy": 1.5562486915290357, "epoch": 0.06635700066357, "grad_norm": 0.0184326171875, "learning_rate": 0.00019946708199115211, "loss": 1.6421920776367187, "mean_token_accuracy": 0.6302989659458399, "num_tokens": 340197.0, "step": 25 }, { "entropy": 1.2358380392193795, "epoch": 0.13271400132714, "grad_norm": 0.034912109375, "learning_rate": 0.0001949717842791432, "loss": 1.1825144958496094, "mean_token_accuracy": 0.7164402473717928, "num_tokens": 681873.0, "step": 50 }, { "entropy": 0.9332277816906571, "epoch": 0.19907100199071, "grad_norm": 0.0302734375, "learning_rate": 0.00018609610158889942, "loss": 0.860277099609375, "mean_token_accuracy": 0.7937182428687811, "num_tokens": 1025821.0, "step": 75 }, { "entropy": 0.6990280481893569, "epoch": 0.26542800265428, "grad_norm": 0.033447265625, "learning_rate": 0.0001732494071613579, "loss": 0.6413444519042969, "mean_token_accuracy": 0.8473471283167601, "num_tokens": 1363016.0, "step": 100 }, { "entropy": 0.6252105168486014, "epoch": 0.33178500331785005, "grad_norm": 0.040771484375, "learning_rate": 0.00015702422926917872, "loss": 0.568554801940918, "mean_token_accuracy": 0.8674215438961983, "num_tokens": 1708078.0, "step": 125 }, { "entropy": 0.4965320496726781, "epoch": 0.39814200398142, "grad_norm": 0.03662109375, "learning_rate": 0.0001381689220266659, "loss": 0.45257919311523437, "mean_token_accuracy": 0.8954442175477744, "num_tokens": 2046077.0, "step": 150 }, { "entropy": 0.47735783314332364, "epoch": 0.46449900464499005, "grad_norm": 0.033935546875, "learning_rate": 0.00011755314904214284, "loss": 0.4485871124267578, "mean_token_accuracy": 0.9003452565521002, "num_tokens": 2387971.0, "step": 175 }, { "entropy": 0.4234251272957772, "epoch": 0.53085600530856, "grad_norm": 0.022705078125, "learning_rate": 9.612777191078258e-05, "loss": 0.3898358535766602, "mean_token_accuracy": 0.9132146901637316, "num_tokens": 2731004.0, "step": 200 }, { "entropy": 0.512334642924834, "epoch": 0.59721300597213, "grad_norm": 0.027587890625, "learning_rate": 7.48809936115181e-05, "loss": 0.48469039916992185, "mean_token_accuracy": 0.8931490843743086, "num_tokens": 3075845.0, "step": 225 }, { "entropy": 0.4335173129173927, "epoch": 0.6635700066357001, "grad_norm": 0.0196533203125, "learning_rate": 5.479277960676958e-05, "loss": 0.4188508987426758, "mean_token_accuracy": 0.908899156153202, "num_tokens": 3419572.0, "step": 250 }, { "entropy": 0.4149061946058646, "epoch": 0.7299270072992701, "grad_norm": 0.0157470703125, "learning_rate": 3.678965888126513e-05, "loss": 0.3769934844970703, "mean_token_accuracy": 0.9135672262310982, "num_tokens": 3761523.0, "step": 275 }, { "entropy": 0.39260273962281644, "epoch": 0.79628400796284, "grad_norm": 0.0169677734375, "learning_rate": 2.1701989632293717e-05, "loss": 0.35523429870605466, "mean_token_accuracy": 0.9186709802597761, "num_tokens": 4093017.0, "step": 300 }, { "entropy": 0.4600309434533119, "epoch": 0.8626410086264101, "grad_norm": 0.021240234375, "learning_rate": 1.0225660646576629e-05, "loss": 0.43257938385009764, "mean_token_accuracy": 0.9030144046247005, "num_tokens": 4439510.0, "step": 325 }, { "entropy": 0.4613286603335291, "epoch": 0.9289980092899801, "grad_norm": 0.030517578125, "learning_rate": 2.889994811704966e-06, "loss": 0.44339012145996093, "mean_token_accuracy": 0.9041482334583998, "num_tokens": 4784614.0, "step": 350 }, { "entropy": 0.3619327815785073, "epoch": 0.9953550099535501, "grad_norm": 0.0159912109375, "learning_rate": 3.333514894887646e-08, "loss": 0.3405461883544922, "mean_token_accuracy": 0.9234929252415895, "num_tokens": 5127234.0, "step": 375 } ], "logging_steps": 25, "max_steps": 377, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.1977274766790246e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }