| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 377, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.5562486915290357, |
| "epoch": 0.06635700066357, |
| "grad_norm": 0.0184326171875, |
| "learning_rate": 0.00019946708199115211, |
| "loss": 1.6421920776367187, |
| "mean_token_accuracy": 0.6302989659458399, |
| "num_tokens": 340197.0, |
| "step": 25 |
| }, |
| { |
| "entropy": 1.2358380392193795, |
| "epoch": 0.13271400132714, |
| "grad_norm": 0.034912109375, |
| "learning_rate": 0.0001949717842791432, |
| "loss": 1.1825144958496094, |
| "mean_token_accuracy": 0.7164402473717928, |
| "num_tokens": 681873.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 0.9332277816906571, |
| "epoch": 0.19907100199071, |
| "grad_norm": 0.0302734375, |
| "learning_rate": 0.00018609610158889942, |
| "loss": 0.860277099609375, |
| "mean_token_accuracy": 0.7937182428687811, |
| "num_tokens": 1025821.0, |
| "step": 75 |
| }, |
| { |
| "entropy": 0.6990280481893569, |
| "epoch": 0.26542800265428, |
| "grad_norm": 0.033447265625, |
| "learning_rate": 0.0001732494071613579, |
| "loss": 0.6413444519042969, |
| "mean_token_accuracy": 0.8473471283167601, |
| "num_tokens": 1363016.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.6252105168486014, |
| "epoch": 0.33178500331785005, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 0.00015702422926917872, |
| "loss": 0.568554801940918, |
| "mean_token_accuracy": 0.8674215438961983, |
| "num_tokens": 1708078.0, |
| "step": 125 |
| }, |
| { |
| "entropy": 0.4965320496726781, |
| "epoch": 0.39814200398142, |
| "grad_norm": 0.03662109375, |
| "learning_rate": 0.0001381689220266659, |
| "loss": 0.45257919311523437, |
| "mean_token_accuracy": 0.8954442175477744, |
| "num_tokens": 2046077.0, |
| "step": 150 |
| }, |
| { |
| "entropy": 0.47735783314332364, |
| "epoch": 0.46449900464499005, |
| "grad_norm": 0.033935546875, |
| "learning_rate": 0.00011755314904214284, |
| "loss": 0.4485871124267578, |
| "mean_token_accuracy": 0.9003452565521002, |
| "num_tokens": 2387971.0, |
| "step": 175 |
| }, |
| { |
| "entropy": 0.4234251272957772, |
| "epoch": 0.53085600530856, |
| "grad_norm": 0.022705078125, |
| "learning_rate": 9.612777191078258e-05, |
| "loss": 0.3898358535766602, |
| "mean_token_accuracy": 0.9132146901637316, |
| "num_tokens": 2731004.0, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.512334642924834, |
| "epoch": 0.59721300597213, |
| "grad_norm": 0.027587890625, |
| "learning_rate": 7.48809936115181e-05, |
| "loss": 0.48469039916992185, |
| "mean_token_accuracy": 0.8931490843743086, |
| "num_tokens": 3075845.0, |
| "step": 225 |
| }, |
| { |
| "entropy": 0.4335173129173927, |
| "epoch": 0.6635700066357001, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 5.479277960676958e-05, |
| "loss": 0.4188508987426758, |
| "mean_token_accuracy": 0.908899156153202, |
| "num_tokens": 3419572.0, |
| "step": 250 |
| }, |
| { |
| "entropy": 0.4149061946058646, |
| "epoch": 0.7299270072992701, |
| "grad_norm": 0.0157470703125, |
| "learning_rate": 3.678965888126513e-05, |
| "loss": 0.3769934844970703, |
| "mean_token_accuracy": 0.9135672262310982, |
| "num_tokens": 3761523.0, |
| "step": 275 |
| }, |
| { |
| "entropy": 0.39260273962281644, |
| "epoch": 0.79628400796284, |
| "grad_norm": 0.0169677734375, |
| "learning_rate": 2.1701989632293717e-05, |
| "loss": 0.35523429870605466, |
| "mean_token_accuracy": 0.9186709802597761, |
| "num_tokens": 4093017.0, |
| "step": 300 |
| }, |
| { |
| "entropy": 0.4600309434533119, |
| "epoch": 0.8626410086264101, |
| "grad_norm": 0.021240234375, |
| "learning_rate": 1.0225660646576629e-05, |
| "loss": 0.43257938385009764, |
| "mean_token_accuracy": 0.9030144046247005, |
| "num_tokens": 4439510.0, |
| "step": 325 |
| }, |
| { |
| "entropy": 0.4613286603335291, |
| "epoch": 0.9289980092899801, |
| "grad_norm": 0.030517578125, |
| "learning_rate": 2.889994811704966e-06, |
| "loss": 0.44339012145996093, |
| "mean_token_accuracy": 0.9041482334583998, |
| "num_tokens": 4784614.0, |
| "step": 350 |
| }, |
| { |
| "entropy": 0.3619327815785073, |
| "epoch": 0.9953550099535501, |
| "grad_norm": 0.0159912109375, |
| "learning_rate": 3.333514894887646e-08, |
| "loss": 0.3405461883544922, |
| "mean_token_accuracy": 0.9234929252415895, |
| "num_tokens": 5127234.0, |
| "step": 375 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 377, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.1977274766790246e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|