| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9049489395129615, |
| "eval_steps": 500, |
| "global_step": 360, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012568735271013355, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 0.0001, |
| "loss": 0.6012, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02513747054202671, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 0.0001, |
| "loss": 0.3895, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.037706205813040065, |
| "grad_norm": 0.0908203125, |
| "learning_rate": 0.0001, |
| "loss": 0.2298, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05027494108405342, |
| "grad_norm": 0.068359375, |
| "learning_rate": 0.0001, |
| "loss": 0.1486, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06284367635506677, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 0.0001, |
| "loss": 0.1333, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07541241162608013, |
| "grad_norm": 0.0849609375, |
| "learning_rate": 0.0001, |
| "loss": 0.1203, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08798114689709348, |
| "grad_norm": 0.0908203125, |
| "learning_rate": 0.0001, |
| "loss": 0.0904, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10054988216810684, |
| "grad_norm": 0.05859375, |
| "learning_rate": 0.0001, |
| "loss": 0.0617, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11311861743912019, |
| "grad_norm": 0.0478515625, |
| "learning_rate": 0.0001, |
| "loss": 0.0515, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12568735271013354, |
| "grad_norm": 0.0634765625, |
| "learning_rate": 0.0001, |
| "loss": 0.0295, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13825608798114689, |
| "grad_norm": 0.07421875, |
| "learning_rate": 0.0001, |
| "loss": 0.2835, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.15082482325216026, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 0.0001, |
| "loss": 0.0973, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1633935585231736, |
| "grad_norm": 0.026611328125, |
| "learning_rate": 0.0001, |
| "loss": 0.0755, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.17596229379418696, |
| "grad_norm": 0.0244140625, |
| "learning_rate": 0.0001, |
| "loss": 0.0547, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1885310290652003, |
| "grad_norm": 0.0274658203125, |
| "learning_rate": 0.0001, |
| "loss": 0.0638, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.20109976433621368, |
| "grad_norm": 0.029052734375, |
| "learning_rate": 0.0001, |
| "loss": 0.0541, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.21366849960722703, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 0.0001, |
| "loss": 0.0511, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.22623723487824038, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 0.0001, |
| "loss": 0.0392, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.23880597014925373, |
| "grad_norm": 0.0269775390625, |
| "learning_rate": 0.0001, |
| "loss": 0.0373, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2513747054202671, |
| "grad_norm": 0.02734375, |
| "learning_rate": 0.0001, |
| "loss": 0.0168, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.26394344069128045, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 0.0001, |
| "loss": 0.2346, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.27651217596229377, |
| "grad_norm": 0.0301513671875, |
| "learning_rate": 0.0001, |
| "loss": 0.0746, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.28908091123330715, |
| "grad_norm": 0.0294189453125, |
| "learning_rate": 0.0001, |
| "loss": 0.0534, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3016496465043205, |
| "grad_norm": 0.0247802734375, |
| "learning_rate": 0.0001, |
| "loss": 0.0371, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.31421838177533384, |
| "grad_norm": 0.0225830078125, |
| "learning_rate": 0.0001, |
| "loss": 0.0488, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3267871170463472, |
| "grad_norm": 0.02490234375, |
| "learning_rate": 0.0001, |
| "loss": 0.0444, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.33935585231736054, |
| "grad_norm": 0.0250244140625, |
| "learning_rate": 0.0001, |
| "loss": 0.038, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3519245875883739, |
| "grad_norm": 0.0264892578125, |
| "learning_rate": 0.0001, |
| "loss": 0.0308, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3644933228593873, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 0.0001, |
| "loss": 0.0291, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3770620581304006, |
| "grad_norm": 0.031982421875, |
| "learning_rate": 0.0001, |
| "loss": 0.0142, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.389630793401414, |
| "grad_norm": 0.045654296875, |
| "learning_rate": 0.0001, |
| "loss": 0.2053, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.40219952867242736, |
| "grad_norm": 0.0400390625, |
| "learning_rate": 0.0001, |
| "loss": 0.0658, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4147682639434407, |
| "grad_norm": 0.0272216796875, |
| "learning_rate": 0.0001, |
| "loss": 0.045, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.42733699921445406, |
| "grad_norm": 0.021240234375, |
| "learning_rate": 0.0001, |
| "loss": 0.0343, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4399057344854674, |
| "grad_norm": 0.0263671875, |
| "learning_rate": 0.0001, |
| "loss": 0.041, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.45247446975648076, |
| "grad_norm": 0.0311279296875, |
| "learning_rate": 0.0001, |
| "loss": 0.0382, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.46504320502749413, |
| "grad_norm": 0.022705078125, |
| "learning_rate": 0.0001, |
| "loss": 0.0295, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.47761194029850745, |
| "grad_norm": 0.0301513671875, |
| "learning_rate": 0.0001, |
| "loss": 0.0257, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.49018067556952083, |
| "grad_norm": 0.02490234375, |
| "learning_rate": 0.0001, |
| "loss": 0.0217, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5027494108405341, |
| "grad_norm": 0.006866455078125, |
| "learning_rate": 0.0001, |
| "loss": 0.0073, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5153181461115475, |
| "grad_norm": 0.04443359375, |
| "learning_rate": 0.0001, |
| "loss": 0.1655, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5278868813825609, |
| "grad_norm": 0.056640625, |
| "learning_rate": 0.0001, |
| "loss": 0.051, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5404556166535742, |
| "grad_norm": 0.026123046875, |
| "learning_rate": 0.0001, |
| "loss": 0.0393, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5530243519245875, |
| "grad_norm": 0.01806640625, |
| "learning_rate": 0.0001, |
| "loss": 0.0255, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.565593087195601, |
| "grad_norm": 0.0230712890625, |
| "learning_rate": 0.0001, |
| "loss": 0.0333, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5781618224666143, |
| "grad_norm": 0.024658203125, |
| "learning_rate": 0.0001, |
| "loss": 0.0289, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5907305577376276, |
| "grad_norm": 0.0301513671875, |
| "learning_rate": 0.0001, |
| "loss": 0.0235, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.603299293008641, |
| "grad_norm": 0.0284423828125, |
| "learning_rate": 0.0001, |
| "loss": 0.0208, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6158680282796544, |
| "grad_norm": 0.025634765625, |
| "learning_rate": 0.0001, |
| "loss": 0.0119, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6284367635506677, |
| "grad_norm": 0.0125732421875, |
| "learning_rate": 0.0001, |
| "loss": 0.0093, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6410054988216811, |
| "grad_norm": 0.051025390625, |
| "learning_rate": 0.0001, |
| "loss": 0.1598, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.6535742340926944, |
| "grad_norm": 0.0546875, |
| "learning_rate": 0.0001, |
| "loss": 0.0457, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6661429693637078, |
| "grad_norm": 0.03564453125, |
| "learning_rate": 0.0001, |
| "loss": 0.0352, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.6787117046347211, |
| "grad_norm": 0.019775390625, |
| "learning_rate": 0.0001, |
| "loss": 0.024, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6912804399057345, |
| "grad_norm": 0.0234375, |
| "learning_rate": 0.0001, |
| "loss": 0.0296, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7038491751767478, |
| "grad_norm": 0.0264892578125, |
| "learning_rate": 0.0001, |
| "loss": 0.0249, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7164179104477612, |
| "grad_norm": 0.029541015625, |
| "learning_rate": 0.0001, |
| "loss": 0.0199, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.7289866457187746, |
| "grad_norm": 0.02294921875, |
| "learning_rate": 0.0001, |
| "loss": 0.0154, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7415553809897879, |
| "grad_norm": 0.0220947265625, |
| "learning_rate": 0.0001, |
| "loss": 0.0116, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.7541241162608012, |
| "grad_norm": 0.00531005859375, |
| "learning_rate": 0.0001, |
| "loss": 0.0058, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7666928515318147, |
| "grad_norm": 0.049560546875, |
| "learning_rate": 0.0001, |
| "loss": 0.1521, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.779261586802828, |
| "grad_norm": 0.140625, |
| "learning_rate": 0.0001, |
| "loss": 0.0482, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7918303220738413, |
| "grad_norm": 0.035888671875, |
| "learning_rate": 0.0001, |
| "loss": 0.0372, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8043990573448547, |
| "grad_norm": 0.036865234375, |
| "learning_rate": 0.0001, |
| "loss": 0.0244, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.816967792615868, |
| "grad_norm": 0.030517578125, |
| "learning_rate": 0.0001, |
| "loss": 0.0263, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8295365278868814, |
| "grad_norm": 0.024169921875, |
| "learning_rate": 0.0001, |
| "loss": 0.0218, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.0308837890625, |
| "learning_rate": 0.0001, |
| "loss": 0.0182, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8546739984289081, |
| "grad_norm": 0.02880859375, |
| "learning_rate": 0.0001, |
| "loss": 0.014, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8672427336999214, |
| "grad_norm": 0.03173828125, |
| "learning_rate": 0.0001, |
| "loss": 0.0109, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.8798114689709348, |
| "grad_norm": 0.01483154296875, |
| "learning_rate": 0.0001, |
| "loss": 0.0044, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8923802042419482, |
| "grad_norm": 0.03955078125, |
| "learning_rate": 0.0001, |
| "loss": 0.1312, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.9049489395129615, |
| "grad_norm": 0.031982421875, |
| "learning_rate": 0.0001, |
| "loss": 0.0403, |
| "step": 360 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 360, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 90, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.6220305320330854e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|