| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 325, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.8578038491308688, |
| "epoch": 0.07714561234329798, |
| "grad_norm": 0.0174560546875, |
| "learning_rate": 0.00019902680687415705, |
| "loss": 1.9434934997558593, |
| "mean_token_accuracy": 0.5780243317037821, |
| "num_tokens": 296483.0, |
| "step": 25 |
| }, |
| { |
| "entropy": 1.5701066633313894, |
| "epoch": 0.15429122468659595, |
| "grad_norm": 0.04052734375, |
| "learning_rate": 0.00019253043004739968, |
| "loss": 1.5351368713378906, |
| "mean_token_accuracy": 0.6528984536230564, |
| "num_tokens": 595349.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 1.1479615284875035, |
| "epoch": 0.23143683702989393, |
| "grad_norm": 0.037109375, |
| "learning_rate": 0.00018031146921373018, |
| "loss": 1.0957810974121094, |
| "mean_token_accuracy": 0.7462421279400587, |
| "num_tokens": 896327.0, |
| "step": 75 |
| }, |
| { |
| "entropy": 0.8900932836066932, |
| "epoch": 0.3085824493731919, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 0.0001631256112300239, |
| "loss": 0.8309718322753906, |
| "mean_token_accuracy": 0.8106287607550621, |
| "num_tokens": 1190579.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.722694746227935, |
| "epoch": 0.3857280617164899, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 0.00014203572283095657, |
| "loss": 0.6702272796630859, |
| "mean_token_accuracy": 0.8465786771476269, |
| "num_tokens": 1483157.0, |
| "step": 125 |
| }, |
| { |
| "entropy": 0.6168222531164065, |
| "epoch": 0.46287367405978785, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 0.00011834611718137824, |
| "loss": 0.5611977386474609, |
| "mean_token_accuracy": 0.869293844178319, |
| "num_tokens": 1776533.0, |
| "step": 150 |
| }, |
| { |
| "entropy": 0.5858749843109399, |
| "epoch": 0.5400192864030858, |
| "grad_norm": 0.037353515625, |
| "learning_rate": 9.352188807098481e-05, |
| "loss": 0.5254201889038086, |
| "mean_token_accuracy": 0.8756095879524947, |
| "num_tokens": 2065558.0, |
| "step": 175 |
| }, |
| { |
| "entropy": 0.5470695828087628, |
| "epoch": 0.6171648987463838, |
| "grad_norm": 0.0299072265625, |
| "learning_rate": 6.909830056250527e-05, |
| "loss": 0.5220871353149414, |
| "mean_token_accuracy": 0.8878432418406009, |
| "num_tokens": 2361335.0, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.5989037967612967, |
| "epoch": 0.6943105110896818, |
| "grad_norm": 0.025390625, |
| "learning_rate": 4.658584186750713e-05, |
| "loss": 0.5457814025878907, |
| "mean_token_accuracy": 0.880027602687478, |
| "num_tokens": 2655008.0, |
| "step": 225 |
| }, |
| { |
| "entropy": 0.502092773411423, |
| "epoch": 0.7714561234329798, |
| "grad_norm": 0.01806640625, |
| "learning_rate": 2.7376804619000707e-05, |
| "loss": 0.4714463043212891, |
| "mean_token_accuracy": 0.8987671569734812, |
| "num_tokens": 2957052.0, |
| "step": 250 |
| }, |
| { |
| "entropy": 0.49828358624130487, |
| "epoch": 0.8486017357762777, |
| "grad_norm": 0.022216796875, |
| "learning_rate": 1.2659179938287035e-05, |
| "loss": 0.44431716918945313, |
| "mean_token_accuracy": 0.9000337335467339, |
| "num_tokens": 3252025.0, |
| "step": 275 |
| }, |
| { |
| "entropy": 0.49207511749817057, |
| "epoch": 0.9257473481195757, |
| "grad_norm": 0.026123046875, |
| "learning_rate": 3.3431856161452835e-06, |
| "loss": 0.4675511932373047, |
| "mean_token_accuracy": 0.9023960041999817, |
| "num_tokens": 3544940.0, |
| "step": 300 |
| }, |
| { |
| "entropy": 0.5535661403912229, |
| "epoch": 1.0, |
| "grad_norm": 0.0186767578125, |
| "learning_rate": 4.973304405697654e-09, |
| "loss": 0.49514381408691405, |
| "mean_token_accuracy": 0.8885030556034732, |
| "num_tokens": 3826435.0, |
| "step": 325 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 325, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.6325843032837632e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|