| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 0, |
| "global_step": 633, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001579778830963665, |
| "grad_norm": 3.3021833896636963, |
| "learning_rate": 1e-05, |
| "loss": 0.8142, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00315955766192733, |
| "grad_norm": 0.5667713284492493, |
| "learning_rate": 9.984202211690363e-06, |
| "loss": 0.4081, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.004739336492890996, |
| "grad_norm": 7.904314994812012, |
| "learning_rate": 9.968404423380728e-06, |
| "loss": 1.1876, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00631911532385466, |
| "grad_norm": 10.157713890075684, |
| "learning_rate": 9.95260663507109e-06, |
| "loss": 1.4092, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.007898894154818325, |
| "grad_norm": 4.723056316375732, |
| "learning_rate": 9.936808846761454e-06, |
| "loss": 0.7578, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.009478672985781991, |
| "grad_norm": 7.033465385437012, |
| "learning_rate": 9.921011058451816e-06, |
| "loss": 0.5175, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.011058451816745656, |
| "grad_norm": 0.800440788269043, |
| "learning_rate": 9.905213270142182e-06, |
| "loss": 0.4077, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01263823064770932, |
| "grad_norm": 0.6944026350975037, |
| "learning_rate": 9.889415481832544e-06, |
| "loss": 0.4686, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.014218009478672985, |
| "grad_norm": 0.5700448751449585, |
| "learning_rate": 9.873617693522908e-06, |
| "loss": 0.3623, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.01579778830963665, |
| "grad_norm": 0.7115408778190613, |
| "learning_rate": 9.85781990521327e-06, |
| "loss": 0.4727, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.017377567140600316, |
| "grad_norm": 0.5764197707176208, |
| "learning_rate": 9.842022116903635e-06, |
| "loss": 0.4054, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.018957345971563982, |
| "grad_norm": 0.615205705165863, |
| "learning_rate": 9.826224328593997e-06, |
| "loss": 0.3798, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.020537124802527645, |
| "grad_norm": 0.6402739882469177, |
| "learning_rate": 9.810426540284361e-06, |
| "loss": 0.3966, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.022116903633491312, |
| "grad_norm": 0.6007937788963318, |
| "learning_rate": 9.794628751974725e-06, |
| "loss": 0.4158, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.023696682464454975, |
| "grad_norm": 0.5462563037872314, |
| "learning_rate": 9.778830963665089e-06, |
| "loss": 0.4795, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02527646129541864, |
| "grad_norm": 0.6038461923599243, |
| "learning_rate": 9.76303317535545e-06, |
| "loss": 0.4142, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.026856240126382307, |
| "grad_norm": 0.514258861541748, |
| "learning_rate": 9.747235387045815e-06, |
| "loss": 0.4139, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.02843601895734597, |
| "grad_norm": 0.728235125541687, |
| "learning_rate": 9.731437598736178e-06, |
| "loss": 0.3129, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.030015797788309637, |
| "grad_norm": 0.7013534307479858, |
| "learning_rate": 9.715639810426542e-06, |
| "loss": 0.4275, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0315955766192733, |
| "grad_norm": 0.6062476634979248, |
| "learning_rate": 9.699842022116904e-06, |
| "loss": 0.3961, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03317535545023697, |
| "grad_norm": 0.6089779138565063, |
| "learning_rate": 9.684044233807268e-06, |
| "loss": 0.4972, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03475513428120063, |
| "grad_norm": 0.6651365756988525, |
| "learning_rate": 9.668246445497632e-06, |
| "loss": 0.4714, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.036334913112164295, |
| "grad_norm": 0.6064260601997375, |
| "learning_rate": 9.652448657187995e-06, |
| "loss": 0.4358, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.037914691943127965, |
| "grad_norm": 0.5868542790412903, |
| "learning_rate": 9.636650868878358e-06, |
| "loss": 0.5178, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03949447077409163, |
| "grad_norm": 0.6516690850257874, |
| "learning_rate": 9.620853080568721e-06, |
| "loss": 0.4281, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04107424960505529, |
| "grad_norm": 0.7721027731895447, |
| "learning_rate": 9.605055292259085e-06, |
| "loss": 0.4979, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04265402843601896, |
| "grad_norm": 0.6200973987579346, |
| "learning_rate": 9.589257503949447e-06, |
| "loss": 0.347, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.044233807266982623, |
| "grad_norm": 0.6557235717773438, |
| "learning_rate": 9.573459715639811e-06, |
| "loss": 0.3422, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.045813586097946286, |
| "grad_norm": 1.0422502756118774, |
| "learning_rate": 9.557661927330175e-06, |
| "loss": 0.4955, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.04739336492890995, |
| "grad_norm": 0.8272190093994141, |
| "learning_rate": 9.541864139020539e-06, |
| "loss": 0.434, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04897314375987362, |
| "grad_norm": 0.5929948091506958, |
| "learning_rate": 9.5260663507109e-06, |
| "loss": 0.5042, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.05055292259083728, |
| "grad_norm": 0.7872880101203918, |
| "learning_rate": 9.510268562401264e-06, |
| "loss": 0.5175, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.052132701421800945, |
| "grad_norm": 0.6884463429450989, |
| "learning_rate": 9.494470774091628e-06, |
| "loss": 0.5104, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.053712480252764615, |
| "grad_norm": 1.215976357460022, |
| "learning_rate": 9.478672985781992e-06, |
| "loss": 0.4742, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.05529225908372828, |
| "grad_norm": 0.7471550107002258, |
| "learning_rate": 9.462875197472354e-06, |
| "loss": 0.4374, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.05687203791469194, |
| "grad_norm": 0.6779741048812866, |
| "learning_rate": 9.447077409162718e-06, |
| "loss": 0.4337, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.05845181674565561, |
| "grad_norm": 0.5205997824668884, |
| "learning_rate": 9.431279620853082e-06, |
| "loss": 0.4296, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.06003159557661927, |
| "grad_norm": 0.381757527589798, |
| "learning_rate": 9.415481832543445e-06, |
| "loss": 0.2223, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.061611374407582936, |
| "grad_norm": 0.650593101978302, |
| "learning_rate": 9.399684044233807e-06, |
| "loss": 0.5066, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0631911532385466, |
| "grad_norm": 0.5445153117179871, |
| "learning_rate": 9.383886255924171e-06, |
| "loss": 0.4998, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06477093206951026, |
| "grad_norm": 0.5024020671844482, |
| "learning_rate": 9.368088467614535e-06, |
| "loss": 0.4121, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.06635071090047394, |
| "grad_norm": 0.6259915232658386, |
| "learning_rate": 9.352290679304899e-06, |
| "loss": 0.4969, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0679304897314376, |
| "grad_norm": 0.49405789375305176, |
| "learning_rate": 9.336492890995261e-06, |
| "loss": 0.4121, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.06951026856240126, |
| "grad_norm": 0.7586628198623657, |
| "learning_rate": 9.320695102685625e-06, |
| "loss": 0.4782, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.07109004739336493, |
| "grad_norm": 0.6203773021697998, |
| "learning_rate": 9.304897314375988e-06, |
| "loss": 0.3579, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.07266982622432859, |
| "grad_norm": 0.6982845067977905, |
| "learning_rate": 9.289099526066352e-06, |
| "loss": 0.3876, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07424960505529225, |
| "grad_norm": 0.5712842345237732, |
| "learning_rate": 9.273301737756714e-06, |
| "loss": 0.4288, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.07582938388625593, |
| "grad_norm": 0.6829891204833984, |
| "learning_rate": 9.257503949447078e-06, |
| "loss": 0.4939, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.07740916271721959, |
| "grad_norm": 0.5508958101272583, |
| "learning_rate": 9.241706161137442e-06, |
| "loss": 0.372, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.07898894154818326, |
| "grad_norm": 0.9345032572746277, |
| "learning_rate": 9.225908372827806e-06, |
| "loss": 0.4896, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.08056872037914692, |
| "grad_norm": 0.6280492544174194, |
| "learning_rate": 9.210110584518168e-06, |
| "loss": 0.4375, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.08214849921011058, |
| "grad_norm": 0.6853601336479187, |
| "learning_rate": 9.194312796208532e-06, |
| "loss": 0.4294, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.08372827804107424, |
| "grad_norm": 0.6665984392166138, |
| "learning_rate": 9.178515007898895e-06, |
| "loss": 0.5894, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.08530805687203792, |
| "grad_norm": 0.5088407397270203, |
| "learning_rate": 9.162717219589257e-06, |
| "loss": 0.3853, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.08688783570300158, |
| "grad_norm": 0.5319867730140686, |
| "learning_rate": 9.146919431279621e-06, |
| "loss": 0.4791, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.08846761453396525, |
| "grad_norm": 0.6452597975730896, |
| "learning_rate": 9.131121642969985e-06, |
| "loss": 0.4056, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.09004739336492891, |
| "grad_norm": 0.6769601106643677, |
| "learning_rate": 9.115323854660349e-06, |
| "loss": 0.4253, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.09162717219589257, |
| "grad_norm": 0.5170547962188721, |
| "learning_rate": 9.09952606635071e-06, |
| "loss": 0.4211, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.09320695102685624, |
| "grad_norm": 0.5035193562507629, |
| "learning_rate": 9.083728278041075e-06, |
| "loss": 0.3144, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0947867298578199, |
| "grad_norm": 0.5919070243835449, |
| "learning_rate": 9.067930489731438e-06, |
| "loss": 0.4533, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09636650868878358, |
| "grad_norm": 0.6510637998580933, |
| "learning_rate": 9.052132701421802e-06, |
| "loss": 0.4701, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.09794628751974724, |
| "grad_norm": 0.5784177780151367, |
| "learning_rate": 9.036334913112164e-06, |
| "loss": 0.3896, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0995260663507109, |
| "grad_norm": 0.7009139060974121, |
| "learning_rate": 9.020537124802528e-06, |
| "loss": 0.5018, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.10110584518167456, |
| "grad_norm": 0.5086057186126709, |
| "learning_rate": 9.004739336492892e-06, |
| "loss": 0.4305, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.10268562401263823, |
| "grad_norm": 0.5124595761299133, |
| "learning_rate": 8.988941548183256e-06, |
| "loss": 0.4473, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.10426540284360189, |
| "grad_norm": 0.6409702897071838, |
| "learning_rate": 8.973143759873618e-06, |
| "loss": 0.429, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.10584518167456557, |
| "grad_norm": 0.5651409029960632, |
| "learning_rate": 8.957345971563981e-06, |
| "loss": 0.4036, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.10742496050552923, |
| "grad_norm": 0.6658238172531128, |
| "learning_rate": 8.941548183254345e-06, |
| "loss": 0.4726, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.10900473933649289, |
| "grad_norm": 0.444815993309021, |
| "learning_rate": 8.925750394944709e-06, |
| "loss": 0.4016, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.11058451816745656, |
| "grad_norm": 0.5855506658554077, |
| "learning_rate": 8.909952606635071e-06, |
| "loss": 0.4531, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.11216429699842022, |
| "grad_norm": 0.693794310092926, |
| "learning_rate": 8.894154818325435e-06, |
| "loss": 0.4382, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.11374407582938388, |
| "grad_norm": 0.6658089756965637, |
| "learning_rate": 8.878357030015799e-06, |
| "loss": 0.4571, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.11532385466034756, |
| "grad_norm": 1.0504828691482544, |
| "learning_rate": 8.862559241706162e-06, |
| "loss": 0.4311, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.11690363349131122, |
| "grad_norm": 0.5297814607620239, |
| "learning_rate": 8.846761453396524e-06, |
| "loss": 0.4391, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.11848341232227488, |
| "grad_norm": 0.6601409316062927, |
| "learning_rate": 8.830963665086888e-06, |
| "loss": 0.5125, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.12006319115323855, |
| "grad_norm": 0.6345618963241577, |
| "learning_rate": 8.815165876777252e-06, |
| "loss": 0.4471, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.12164296998420221, |
| "grad_norm": 0.5008222460746765, |
| "learning_rate": 8.799368088467614e-06, |
| "loss": 0.3845, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.12322274881516587, |
| "grad_norm": 0.5394203066825867, |
| "learning_rate": 8.783570300157978e-06, |
| "loss": 0.4117, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.12480252764612954, |
| "grad_norm": 0.6255345940589905, |
| "learning_rate": 8.767772511848342e-06, |
| "loss": 0.512, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1263823064770932, |
| "grad_norm": 0.6215748190879822, |
| "learning_rate": 8.751974723538705e-06, |
| "loss": 0.509, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.12796208530805686, |
| "grad_norm": 0.611587405204773, |
| "learning_rate": 8.736176935229068e-06, |
| "loss": 0.4036, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.12954186413902052, |
| "grad_norm": 0.5373330116271973, |
| "learning_rate": 8.720379146919431e-06, |
| "loss": 0.393, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.13112164296998421, |
| "grad_norm": 0.5936598181724548, |
| "learning_rate": 8.704581358609795e-06, |
| "loss": 0.4092, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.13270142180094788, |
| "grad_norm": 0.576614260673523, |
| "learning_rate": 8.688783570300159e-06, |
| "loss": 0.5513, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.13428120063191154, |
| "grad_norm": 0.5715078711509705, |
| "learning_rate": 8.672985781990521e-06, |
| "loss": 0.4403, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1358609794628752, |
| "grad_norm": 0.6212042570114136, |
| "learning_rate": 8.657187993680885e-06, |
| "loss": 0.391, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.13744075829383887, |
| "grad_norm": 0.5439122319221497, |
| "learning_rate": 8.641390205371249e-06, |
| "loss": 0.4764, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.13902053712480253, |
| "grad_norm": 0.6808428168296814, |
| "learning_rate": 8.625592417061612e-06, |
| "loss": 0.512, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1406003159557662, |
| "grad_norm": 0.7429847717285156, |
| "learning_rate": 8.609794628751974e-06, |
| "loss": 0.3834, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.14218009478672985, |
| "grad_norm": 0.6030511260032654, |
| "learning_rate": 8.59399684044234e-06, |
| "loss": 0.4631, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.14375987361769352, |
| "grad_norm": 0.6499682068824768, |
| "learning_rate": 8.578199052132702e-06, |
| "loss": 0.4484, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.14533965244865718, |
| "grad_norm": 0.6490275859832764, |
| "learning_rate": 8.562401263823066e-06, |
| "loss": 0.414, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.14691943127962084, |
| "grad_norm": 0.6859791874885559, |
| "learning_rate": 8.546603475513428e-06, |
| "loss": 0.386, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1484992101105845, |
| "grad_norm": 0.5281291007995605, |
| "learning_rate": 8.530805687203793e-06, |
| "loss": 0.4036, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1500789889415482, |
| "grad_norm": 0.5261964797973633, |
| "learning_rate": 8.515007898894155e-06, |
| "loss": 0.33, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.15165876777251186, |
| "grad_norm": 0.4350665211677551, |
| "learning_rate": 8.499210110584519e-06, |
| "loss": 0.3347, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.15323854660347552, |
| "grad_norm": 0.8448456525802612, |
| "learning_rate": 8.483412322274883e-06, |
| "loss": 0.4253, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.15481832543443919, |
| "grad_norm": 0.6256837248802185, |
| "learning_rate": 8.467614533965247e-06, |
| "loss": 0.4464, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.15639810426540285, |
| "grad_norm": 0.7007749676704407, |
| "learning_rate": 8.451816745655609e-06, |
| "loss": 0.4641, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.1579778830963665, |
| "grad_norm": 0.6551494002342224, |
| "learning_rate": 8.436018957345973e-06, |
| "loss": 0.5097, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.15955766192733017, |
| "grad_norm": 0.5944113731384277, |
| "learning_rate": 8.420221169036336e-06, |
| "loss": 0.4554, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.16113744075829384, |
| "grad_norm": 0.5755615234375, |
| "learning_rate": 8.4044233807267e-06, |
| "loss": 0.443, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1627172195892575, |
| "grad_norm": 0.5263962745666504, |
| "learning_rate": 8.388625592417062e-06, |
| "loss": 0.4355, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.16429699842022116, |
| "grad_norm": 0.6115814447402954, |
| "learning_rate": 8.372827804107424e-06, |
| "loss": 0.4863, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.16587677725118483, |
| "grad_norm": 0.5544970631599426, |
| "learning_rate": 8.35703001579779e-06, |
| "loss": 0.3979, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1674565560821485, |
| "grad_norm": 0.5588533878326416, |
| "learning_rate": 8.341232227488152e-06, |
| "loss": 0.4073, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.16903633491311215, |
| "grad_norm": 0.578982949256897, |
| "learning_rate": 8.325434439178516e-06, |
| "loss": 0.3745, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.17061611374407584, |
| "grad_norm": 0.4955246150493622, |
| "learning_rate": 8.30963665086888e-06, |
| "loss": 0.438, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.1721958925750395, |
| "grad_norm": 0.593362033367157, |
| "learning_rate": 8.293838862559243e-06, |
| "loss": 0.4161, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.17377567140600317, |
| "grad_norm": 0.5000883340835571, |
| "learning_rate": 8.278041074249605e-06, |
| "loss": 0.432, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.17535545023696683, |
| "grad_norm": 0.5794082880020142, |
| "learning_rate": 8.262243285939969e-06, |
| "loss": 0.4431, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1769352290679305, |
| "grad_norm": 0.6179563999176025, |
| "learning_rate": 8.246445497630333e-06, |
| "loss": 0.3871, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.17851500789889416, |
| "grad_norm": 0.6540956497192383, |
| "learning_rate": 8.230647709320697e-06, |
| "loss": 0.3706, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.18009478672985782, |
| "grad_norm": 0.7029737234115601, |
| "learning_rate": 8.214849921011059e-06, |
| "loss": 0.5077, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.18167456556082148, |
| "grad_norm": 0.5466600656509399, |
| "learning_rate": 8.199052132701422e-06, |
| "loss": 0.4634, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.18325434439178515, |
| "grad_norm": 0.5513831973075867, |
| "learning_rate": 8.183254344391786e-06, |
| "loss": 0.4457, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1848341232227488, |
| "grad_norm": 0.7652455568313599, |
| "learning_rate": 8.16745655608215e-06, |
| "loss": 0.4376, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.18641390205371247, |
| "grad_norm": 0.6213077902793884, |
| "learning_rate": 8.151658767772512e-06, |
| "loss": 0.3988, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.18799368088467613, |
| "grad_norm": 0.50051349401474, |
| "learning_rate": 8.135860979462876e-06, |
| "loss": 0.4142, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.1895734597156398, |
| "grad_norm": 0.8015328049659729, |
| "learning_rate": 8.12006319115324e-06, |
| "loss": 0.4474, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1911532385466035, |
| "grad_norm": 0.6595532298088074, |
| "learning_rate": 8.104265402843603e-06, |
| "loss": 0.5173, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.19273301737756715, |
| "grad_norm": 0.7859697937965393, |
| "learning_rate": 8.088467614533966e-06, |
| "loss": 0.4465, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.1943127962085308, |
| "grad_norm": 0.6508023738861084, |
| "learning_rate": 8.07266982622433e-06, |
| "loss": 0.4448, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.19589257503949448, |
| "grad_norm": 0.49232304096221924, |
| "learning_rate": 8.056872037914693e-06, |
| "loss": 0.4005, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.19747235387045814, |
| "grad_norm": 0.6464349031448364, |
| "learning_rate": 8.041074249605057e-06, |
| "loss": 0.47, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.1990521327014218, |
| "grad_norm": 0.5296919345855713, |
| "learning_rate": 8.025276461295419e-06, |
| "loss": 0.4247, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.20063191153238547, |
| "grad_norm": 0.6270297765731812, |
| "learning_rate": 8.009478672985783e-06, |
| "loss": 0.5397, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.20221169036334913, |
| "grad_norm": 0.6148909330368042, |
| "learning_rate": 7.993680884676147e-06, |
| "loss": 0.4133, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2037914691943128, |
| "grad_norm": 0.7778130173683167, |
| "learning_rate": 7.977883096366509e-06, |
| "loss": 0.5119, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.20537124802527645, |
| "grad_norm": 0.47952044010162354, |
| "learning_rate": 7.962085308056872e-06, |
| "loss": 0.386, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.20695102685624012, |
| "grad_norm": 0.5951160788536072, |
| "learning_rate": 7.946287519747236e-06, |
| "loss": 0.5101, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.20853080568720378, |
| "grad_norm": 0.6209789514541626, |
| "learning_rate": 7.9304897314376e-06, |
| "loss": 0.4988, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.21011058451816747, |
| "grad_norm": 0.5093654990196228, |
| "learning_rate": 7.914691943127962e-06, |
| "loss": 0.374, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.21169036334913113, |
| "grad_norm": 0.5125884413719177, |
| "learning_rate": 7.898894154818326e-06, |
| "loss": 0.4097, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2132701421800948, |
| "grad_norm": 0.5116066932678223, |
| "learning_rate": 7.88309636650869e-06, |
| "loss": 0.4643, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.21484992101105846, |
| "grad_norm": 0.5778034329414368, |
| "learning_rate": 7.867298578199053e-06, |
| "loss": 0.4645, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.21642969984202212, |
| "grad_norm": 0.6490422487258911, |
| "learning_rate": 7.851500789889415e-06, |
| "loss": 0.4825, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.21800947867298578, |
| "grad_norm": 0.644008219242096, |
| "learning_rate": 7.83570300157978e-06, |
| "loss": 0.3954, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.21958925750394945, |
| "grad_norm": 0.8628047704696655, |
| "learning_rate": 7.819905213270143e-06, |
| "loss": 0.5322, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.2211690363349131, |
| "grad_norm": 0.6286507844924927, |
| "learning_rate": 7.804107424960507e-06, |
| "loss": 0.3741, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.22274881516587677, |
| "grad_norm": 0.6210809350013733, |
| "learning_rate": 7.788309636650869e-06, |
| "loss": 0.4572, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.22432859399684044, |
| "grad_norm": 0.5337722897529602, |
| "learning_rate": 7.772511848341233e-06, |
| "loss": 0.3788, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2259083728278041, |
| "grad_norm": 0.5743194818496704, |
| "learning_rate": 7.756714060031596e-06, |
| "loss": 0.3963, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.22748815165876776, |
| "grad_norm": 0.4972652792930603, |
| "learning_rate": 7.74091627172196e-06, |
| "loss": 0.2906, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.22906793048973143, |
| "grad_norm": 0.5239664316177368, |
| "learning_rate": 7.725118483412322e-06, |
| "loss": 0.4009, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.23064770932069512, |
| "grad_norm": 0.5151936411857605, |
| "learning_rate": 7.709320695102686e-06, |
| "loss": 0.4208, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.23222748815165878, |
| "grad_norm": 0.6128547191619873, |
| "learning_rate": 7.69352290679305e-06, |
| "loss": 0.4779, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.23380726698262244, |
| "grad_norm": 0.5268502235412598, |
| "learning_rate": 7.677725118483414e-06, |
| "loss": 0.4219, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2353870458135861, |
| "grad_norm": 0.5439866185188293, |
| "learning_rate": 7.661927330173776e-06, |
| "loss": 0.4436, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.23696682464454977, |
| "grad_norm": 0.5291867852210999, |
| "learning_rate": 7.64612954186414e-06, |
| "loss": 0.407, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.23854660347551343, |
| "grad_norm": 0.6638155579566956, |
| "learning_rate": 7.630331753554503e-06, |
| "loss": 0.403, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2401263823064771, |
| "grad_norm": 0.5501230955123901, |
| "learning_rate": 7.614533965244867e-06, |
| "loss": 0.5004, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.24170616113744076, |
| "grad_norm": 0.5949499011039734, |
| "learning_rate": 7.59873617693523e-06, |
| "loss": 0.4708, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.24328593996840442, |
| "grad_norm": 0.5841517448425293, |
| "learning_rate": 7.582938388625593e-06, |
| "loss": 0.4836, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.24486571879936808, |
| "grad_norm": 0.6298154592514038, |
| "learning_rate": 7.567140600315957e-06, |
| "loss": 0.4728, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.24644549763033174, |
| "grad_norm": 0.6107637882232666, |
| "learning_rate": 7.55134281200632e-06, |
| "loss": 0.4243, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2480252764612954, |
| "grad_norm": 0.5174968838691711, |
| "learning_rate": 7.535545023696683e-06, |
| "loss": 0.4657, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.24960505529225907, |
| "grad_norm": 0.5588591694831848, |
| "learning_rate": 7.519747235387046e-06, |
| "loss": 0.4567, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.25118483412322273, |
| "grad_norm": 0.8415222764015198, |
| "learning_rate": 7.50394944707741e-06, |
| "loss": 0.4625, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.2527646129541864, |
| "grad_norm": 0.6054974794387817, |
| "learning_rate": 7.488151658767773e-06, |
| "loss": 0.3843, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.25434439178515006, |
| "grad_norm": 0.5117557644844055, |
| "learning_rate": 7.472353870458137e-06, |
| "loss": 0.3887, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.2559241706161137, |
| "grad_norm": 0.5849332213401794, |
| "learning_rate": 7.4565560821485e-06, |
| "loss": 0.4528, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.2575039494470774, |
| "grad_norm": 0.5625325441360474, |
| "learning_rate": 7.4407582938388635e-06, |
| "loss": 0.4542, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.25908372827804105, |
| "grad_norm": 0.5406492352485657, |
| "learning_rate": 7.4249605055292264e-06, |
| "loss": 0.4592, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.26066350710900477, |
| "grad_norm": 0.6318654417991638, |
| "learning_rate": 7.40916271721959e-06, |
| "loss": 0.4361, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.26224328593996843, |
| "grad_norm": 0.5719902515411377, |
| "learning_rate": 7.393364928909953e-06, |
| "loss": 0.4799, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.2638230647709321, |
| "grad_norm": 0.5211177468299866, |
| "learning_rate": 7.377567140600317e-06, |
| "loss": 0.33, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.26540284360189575, |
| "grad_norm": 0.6400920152664185, |
| "learning_rate": 7.36176935229068e-06, |
| "loss": 0.4235, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2669826224328594, |
| "grad_norm": 0.5302186608314514, |
| "learning_rate": 7.345971563981044e-06, |
| "loss": 0.4342, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.2685624012638231, |
| "grad_norm": 0.5393325686454773, |
| "learning_rate": 7.3301737756714066e-06, |
| "loss": 0.3632, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.27014218009478674, |
| "grad_norm": 0.5409063696861267, |
| "learning_rate": 7.31437598736177e-06, |
| "loss": 0.4076, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.2717219589257504, |
| "grad_norm": 0.5056774616241455, |
| "learning_rate": 7.298578199052133e-06, |
| "loss": 0.4821, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.27330173775671407, |
| "grad_norm": 0.6061700582504272, |
| "learning_rate": 7.282780410742497e-06, |
| "loss": 0.5137, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.27488151658767773, |
| "grad_norm": 0.5524815917015076, |
| "learning_rate": 7.26698262243286e-06, |
| "loss": 0.4116, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2764612954186414, |
| "grad_norm": 0.5045567750930786, |
| "learning_rate": 7.251184834123224e-06, |
| "loss": 0.3969, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.27804107424960506, |
| "grad_norm": 0.604505717754364, |
| "learning_rate": 7.235387045813587e-06, |
| "loss": 0.5176, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2796208530805687, |
| "grad_norm": 0.6067575812339783, |
| "learning_rate": 7.2195892575039505e-06, |
| "loss": 0.4438, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2812006319115324, |
| "grad_norm": 0.6412494778633118, |
| "learning_rate": 7.203791469194313e-06, |
| "loss": 0.4758, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.28278041074249605, |
| "grad_norm": 0.5432886481285095, |
| "learning_rate": 7.187993680884676e-06, |
| "loss": 0.4387, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.2843601895734597, |
| "grad_norm": 0.4622472822666168, |
| "learning_rate": 7.17219589257504e-06, |
| "loss": 0.4775, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2859399684044234, |
| "grad_norm": 0.643259584903717, |
| "learning_rate": 7.156398104265403e-06, |
| "loss": 0.4479, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.28751974723538704, |
| "grad_norm": 0.48998138308525085, |
| "learning_rate": 7.140600315955767e-06, |
| "loss": 0.399, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2890995260663507, |
| "grad_norm": 0.5146614909172058, |
| "learning_rate": 7.12480252764613e-06, |
| "loss": 0.4475, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.29067930489731436, |
| "grad_norm": 0.5386670231819153, |
| "learning_rate": 7.1090047393364935e-06, |
| "loss": 0.3892, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.292259083728278, |
| "grad_norm": 0.5147759318351746, |
| "learning_rate": 7.0932069510268565e-06, |
| "loss": 0.3755, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2938388625592417, |
| "grad_norm": 0.5141321420669556, |
| "learning_rate": 7.07740916271722e-06, |
| "loss": 0.355, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.29541864139020535, |
| "grad_norm": 0.9518134593963623, |
| "learning_rate": 7.061611374407583e-06, |
| "loss": 0.4021, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.296998420221169, |
| "grad_norm": 0.5844981670379639, |
| "learning_rate": 7.045813586097947e-06, |
| "loss": 0.4233, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2985781990521327, |
| "grad_norm": 0.6381546854972839, |
| "learning_rate": 7.03001579778831e-06, |
| "loss": 0.4862, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.3001579778830964, |
| "grad_norm": 0.7311195135116577, |
| "learning_rate": 7.014218009478674e-06, |
| "loss": 0.4822, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.30173775671406006, |
| "grad_norm": 0.5827596783638, |
| "learning_rate": 6.998420221169037e-06, |
| "loss": 0.4027, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3033175355450237, |
| "grad_norm": 0.6907688975334167, |
| "learning_rate": 6.9826224328594e-06, |
| "loss": 0.4374, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3048973143759874, |
| "grad_norm": 0.5060120820999146, |
| "learning_rate": 6.966824644549763e-06, |
| "loss": 0.4226, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.30647709320695105, |
| "grad_norm": 0.41480544209480286, |
| "learning_rate": 6.951026856240127e-06, |
| "loss": 0.3766, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.3080568720379147, |
| "grad_norm": 0.5637404322624207, |
| "learning_rate": 6.93522906793049e-06, |
| "loss": 0.4365, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.30963665086887837, |
| "grad_norm": 0.6389409899711609, |
| "learning_rate": 6.919431279620854e-06, |
| "loss": 0.4186, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.31121642969984203, |
| "grad_norm": 0.48588162660598755, |
| "learning_rate": 6.903633491311217e-06, |
| "loss": 0.4023, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.3127962085308057, |
| "grad_norm": 0.6066514253616333, |
| "learning_rate": 6.8878357030015805e-06, |
| "loss": 0.4652, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.31437598736176936, |
| "grad_norm": 0.6308689117431641, |
| "learning_rate": 6.8720379146919435e-06, |
| "loss": 0.3885, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.315955766192733, |
| "grad_norm": 0.4883437752723694, |
| "learning_rate": 6.856240126382307e-06, |
| "loss": 0.4128, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3175355450236967, |
| "grad_norm": 0.720086932182312, |
| "learning_rate": 6.84044233807267e-06, |
| "loss": 0.4333, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.31911532385466035, |
| "grad_norm": 0.6698761582374573, |
| "learning_rate": 6.824644549763034e-06, |
| "loss": 0.3967, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.320695102685624, |
| "grad_norm": 0.5240082740783691, |
| "learning_rate": 6.808846761453397e-06, |
| "loss": 0.4055, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3222748815165877, |
| "grad_norm": 0.6142946481704712, |
| "learning_rate": 6.79304897314376e-06, |
| "loss": 0.3645, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.32385466034755134, |
| "grad_norm": 0.6439379453659058, |
| "learning_rate": 6.777251184834124e-06, |
| "loss": 0.3207, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.325434439178515, |
| "grad_norm": 0.6862720847129822, |
| "learning_rate": 6.7614533965244865e-06, |
| "loss": 0.4944, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.32701421800947866, |
| "grad_norm": 0.6720433235168457, |
| "learning_rate": 6.74565560821485e-06, |
| "loss": 0.4335, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.3285939968404423, |
| "grad_norm": 0.531577467918396, |
| "learning_rate": 6.729857819905213e-06, |
| "loss": 0.5327, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.330173775671406, |
| "grad_norm": 0.5542590022087097, |
| "learning_rate": 6.714060031595577e-06, |
| "loss": 0.3629, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.33175355450236965, |
| "grad_norm": 0.5614448189735413, |
| "learning_rate": 6.69826224328594e-06, |
| "loss": 0.4097, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.7383466362953186, |
| "learning_rate": 6.682464454976304e-06, |
| "loss": 0.5031, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.334913112164297, |
| "grad_norm": 0.6345497965812683, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.5029, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.33649289099526064, |
| "grad_norm": 0.579641580581665, |
| "learning_rate": 6.6508688783570304e-06, |
| "loss": 0.4949, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3380726698262243, |
| "grad_norm": 0.5040780305862427, |
| "learning_rate": 6.635071090047393e-06, |
| "loss": 0.4537, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.33965244865718797, |
| "grad_norm": 0.5917491316795349, |
| "learning_rate": 6.619273301737757e-06, |
| "loss": 0.3883, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.3412322274881517, |
| "grad_norm": 0.7031399011611938, |
| "learning_rate": 6.60347551342812e-06, |
| "loss": 0.4554, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.34281200631911535, |
| "grad_norm": 0.5503798127174377, |
| "learning_rate": 6.587677725118484e-06, |
| "loss": 0.352, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.344391785150079, |
| "grad_norm": 0.5412716269493103, |
| "learning_rate": 6.571879936808847e-06, |
| "loss": 0.4191, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3459715639810427, |
| "grad_norm": 0.6272369623184204, |
| "learning_rate": 6.556082148499211e-06, |
| "loss": 0.4595, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.34755134281200634, |
| "grad_norm": 0.5309504270553589, |
| "learning_rate": 6.5402843601895735e-06, |
| "loss": 0.4095, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.34913112164297, |
| "grad_norm": 0.5687200427055359, |
| "learning_rate": 6.524486571879938e-06, |
| "loss": 0.435, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.35071090047393366, |
| "grad_norm": 0.5819438099861145, |
| "learning_rate": 6.5086887835703e-06, |
| "loss": 0.4695, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3522906793048973, |
| "grad_norm": 0.6310110092163086, |
| "learning_rate": 6.492890995260665e-06, |
| "loss": 0.4346, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.353870458135861, |
| "grad_norm": 0.5838906168937683, |
| "learning_rate": 6.477093206951027e-06, |
| "loss": 0.47, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.35545023696682465, |
| "grad_norm": 0.6752678155899048, |
| "learning_rate": 6.4612954186413915e-06, |
| "loss": 0.3842, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3570300157977883, |
| "grad_norm": 0.7029111981391907, |
| "learning_rate": 6.445497630331754e-06, |
| "loss": 0.4442, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.358609794628752, |
| "grad_norm": 0.511812686920166, |
| "learning_rate": 6.429699842022118e-06, |
| "loss": 0.5171, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.36018957345971564, |
| "grad_norm": 0.49457868933677673, |
| "learning_rate": 6.413902053712481e-06, |
| "loss": 0.3695, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3617693522906793, |
| "grad_norm": 0.4521022439002991, |
| "learning_rate": 6.398104265402843e-06, |
| "loss": 0.3909, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.36334913112164297, |
| "grad_norm": 0.45229026675224304, |
| "learning_rate": 6.382306477093208e-06, |
| "loss": 0.3417, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.36492890995260663, |
| "grad_norm": 0.5070056915283203, |
| "learning_rate": 6.36650868878357e-06, |
| "loss": 0.3518, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.3665086887835703, |
| "grad_norm": 0.9325531721115112, |
| "learning_rate": 6.350710900473935e-06, |
| "loss": 0.5172, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.36808846761453395, |
| "grad_norm": 0.6027977466583252, |
| "learning_rate": 6.334913112164297e-06, |
| "loss": 0.4052, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.3696682464454976, |
| "grad_norm": 0.7251097559928894, |
| "learning_rate": 6.319115323854661e-06, |
| "loss": 0.4739, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.3712480252764613, |
| "grad_norm": 0.6470052003860474, |
| "learning_rate": 6.303317535545023e-06, |
| "loss": 0.4745, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.37282780410742494, |
| "grad_norm": 0.7177411317825317, |
| "learning_rate": 6.287519747235388e-06, |
| "loss": 0.364, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3744075829383886, |
| "grad_norm": 0.7681677341461182, |
| "learning_rate": 6.271721958925751e-06, |
| "loss": 0.4559, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.37598736176935227, |
| "grad_norm": 0.6160128116607666, |
| "learning_rate": 6.255924170616115e-06, |
| "loss": 0.421, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.37756714060031593, |
| "grad_norm": 0.658981442451477, |
| "learning_rate": 6.240126382306478e-06, |
| "loss": 0.3979, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.3791469194312796, |
| "grad_norm": 0.9422373175621033, |
| "learning_rate": 6.2243285939968414e-06, |
| "loss": 0.3586, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3807266982622433, |
| "grad_norm": 0.5452501773834229, |
| "learning_rate": 6.208530805687204e-06, |
| "loss": 0.4209, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.382306477093207, |
| "grad_norm": 0.4912925660610199, |
| "learning_rate": 6.192733017377568e-06, |
| "loss": 0.4784, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.38388625592417064, |
| "grad_norm": 0.6575455665588379, |
| "learning_rate": 6.176935229067931e-06, |
| "loss": 0.4062, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.3854660347551343, |
| "grad_norm": 0.8840091824531555, |
| "learning_rate": 6.161137440758295e-06, |
| "loss": 0.4177, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.38704581358609796, |
| "grad_norm": 0.5949338674545288, |
| "learning_rate": 6.145339652448658e-06, |
| "loss": 0.4477, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.3886255924170616, |
| "grad_norm": 0.5938326120376587, |
| "learning_rate": 6.1295418641390216e-06, |
| "loss": 0.4155, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.3902053712480253, |
| "grad_norm": 0.5401394367218018, |
| "learning_rate": 6.1137440758293845e-06, |
| "loss": 0.3873, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.39178515007898895, |
| "grad_norm": 0.5220497846603394, |
| "learning_rate": 6.097946287519748e-06, |
| "loss": 0.3803, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3933649289099526, |
| "grad_norm": 0.5426644086837769, |
| "learning_rate": 6.082148499210111e-06, |
| "loss": 0.3239, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.3949447077409163, |
| "grad_norm": 0.5215898156166077, |
| "learning_rate": 6.066350710900475e-06, |
| "loss": 0.4373, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.39652448657187994, |
| "grad_norm": 0.5694135427474976, |
| "learning_rate": 6.050552922590838e-06, |
| "loss": 0.4948, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.3981042654028436, |
| "grad_norm": 0.5505183339118958, |
| "learning_rate": 6.034755134281202e-06, |
| "loss": 0.4108, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.39968404423380727, |
| "grad_norm": 0.593190610408783, |
| "learning_rate": 6.018957345971565e-06, |
| "loss": 0.429, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.40126382306477093, |
| "grad_norm": 0.5409046411514282, |
| "learning_rate": 6.003159557661928e-06, |
| "loss": 0.4443, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.4028436018957346, |
| "grad_norm": 0.5520291328430176, |
| "learning_rate": 5.987361769352291e-06, |
| "loss": 0.4485, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.40442338072669826, |
| "grad_norm": 0.5622429847717285, |
| "learning_rate": 5.971563981042654e-06, |
| "loss": 0.4181, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4060031595576619, |
| "grad_norm": 0.5267983078956604, |
| "learning_rate": 5.955766192733018e-06, |
| "loss": 0.4235, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.4075829383886256, |
| "grad_norm": 0.5384082198143005, |
| "learning_rate": 5.939968404423381e-06, |
| "loss": 0.4055, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.40916271721958924, |
| "grad_norm": 0.5427289605140686, |
| "learning_rate": 5.924170616113745e-06, |
| "loss": 0.3427, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.4107424960505529, |
| "grad_norm": 0.4936423599720001, |
| "learning_rate": 5.908372827804108e-06, |
| "loss": 0.4133, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.41232227488151657, |
| "grad_norm": 0.5825520753860474, |
| "learning_rate": 5.8925750394944715e-06, |
| "loss": 0.377, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.41390205371248023, |
| "grad_norm": 0.6343340277671814, |
| "learning_rate": 5.876777251184834e-06, |
| "loss": 0.441, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4154818325434439, |
| "grad_norm": 0.5479387044906616, |
| "learning_rate": 5.860979462875198e-06, |
| "loss": 0.4353, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.41706161137440756, |
| "grad_norm": 0.5873805284500122, |
| "learning_rate": 5.845181674565561e-06, |
| "loss": 0.4293, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.4186413902053712, |
| "grad_norm": 0.6624792218208313, |
| "learning_rate": 5.829383886255925e-06, |
| "loss": 0.5162, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.42022116903633494, |
| "grad_norm": 0.5797149538993835, |
| "learning_rate": 5.813586097946288e-06, |
| "loss": 0.3651, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.4218009478672986, |
| "grad_norm": 0.5814763903617859, |
| "learning_rate": 5.797788309636652e-06, |
| "loss": 0.3817, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.42338072669826227, |
| "grad_norm": 0.5556735992431641, |
| "learning_rate": 5.7819905213270145e-06, |
| "loss": 0.4186, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.42496050552922593, |
| "grad_norm": 0.5842727422714233, |
| "learning_rate": 5.766192733017378e-06, |
| "loss": 0.4343, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.4265402843601896, |
| "grad_norm": 0.5401722192764282, |
| "learning_rate": 5.750394944707741e-06, |
| "loss": 0.4418, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.42812006319115326, |
| "grad_norm": 0.5917039513587952, |
| "learning_rate": 5.734597156398105e-06, |
| "loss": 0.5371, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4296998420221169, |
| "grad_norm": 0.5991331338882446, |
| "learning_rate": 5.718799368088468e-06, |
| "loss": 0.4969, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4312796208530806, |
| "grad_norm": 0.4709448218345642, |
| "learning_rate": 5.703001579778832e-06, |
| "loss": 0.4139, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.43285939968404424, |
| "grad_norm": 0.5746496319770813, |
| "learning_rate": 5.687203791469195e-06, |
| "loss": 0.4683, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.4344391785150079, |
| "grad_norm": 0.523835301399231, |
| "learning_rate": 5.6714060031595584e-06, |
| "loss": 0.4346, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.43601895734597157, |
| "grad_norm": 0.5292810797691345, |
| "learning_rate": 5.655608214849921e-06, |
| "loss": 0.463, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.43759873617693523, |
| "grad_norm": 0.6543466448783875, |
| "learning_rate": 5.639810426540285e-06, |
| "loss": 0.427, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.4391785150078989, |
| "grad_norm": 0.5543989539146423, |
| "learning_rate": 5.624012638230648e-06, |
| "loss": 0.3902, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.44075829383886256, |
| "grad_norm": 0.5905360579490662, |
| "learning_rate": 5.608214849921012e-06, |
| "loss": 0.4266, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.4423380726698262, |
| "grad_norm": 0.5785796046257019, |
| "learning_rate": 5.592417061611375e-06, |
| "loss": 0.4521, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4439178515007899, |
| "grad_norm": 0.5580607056617737, |
| "learning_rate": 5.576619273301738e-06, |
| "loss": 0.378, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.44549763033175355, |
| "grad_norm": 0.5100966691970825, |
| "learning_rate": 5.5608214849921015e-06, |
| "loss": 0.3876, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4470774091627172, |
| "grad_norm": 0.5704023241996765, |
| "learning_rate": 5.5450236966824644e-06, |
| "loss": 0.4694, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.4486571879936809, |
| "grad_norm": 0.5954383611679077, |
| "learning_rate": 5.529225908372828e-06, |
| "loss": 0.5049, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.45023696682464454, |
| "grad_norm": 0.5239635705947876, |
| "learning_rate": 5.513428120063191e-06, |
| "loss": 0.4182, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4518167456556082, |
| "grad_norm": 0.6643552780151367, |
| "learning_rate": 5.497630331753555e-06, |
| "loss": 0.4434, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.45339652448657186, |
| "grad_norm": 0.6675540804862976, |
| "learning_rate": 5.481832543443918e-06, |
| "loss": 0.3745, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.4549763033175355, |
| "grad_norm": 0.5871401429176331, |
| "learning_rate": 5.466034755134282e-06, |
| "loss": 0.5527, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4565560821484992, |
| "grad_norm": 0.5936838984489441, |
| "learning_rate": 5.4502369668246446e-06, |
| "loss": 0.4857, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.45813586097946285, |
| "grad_norm": 0.5998191833496094, |
| "learning_rate": 5.434439178515008e-06, |
| "loss": 0.4395, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4597156398104265, |
| "grad_norm": 0.5102293491363525, |
| "learning_rate": 5.418641390205371e-06, |
| "loss": 0.4496, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.46129541864139023, |
| "grad_norm": 0.6297216415405273, |
| "learning_rate": 5.402843601895735e-06, |
| "loss": 0.3555, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4628751974723539, |
| "grad_norm": 0.6780267953872681, |
| "learning_rate": 5.387045813586098e-06, |
| "loss": 0.3295, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.46445497630331756, |
| "grad_norm": 0.5788872838020325, |
| "learning_rate": 5.371248025276462e-06, |
| "loss": 0.4293, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4660347551342812, |
| "grad_norm": 0.5679113268852234, |
| "learning_rate": 5.355450236966825e-06, |
| "loss": 0.4274, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.4676145339652449, |
| "grad_norm": 0.5739018321037292, |
| "learning_rate": 5.3396524486571885e-06, |
| "loss": 0.3292, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.46919431279620855, |
| "grad_norm": 0.5387299060821533, |
| "learning_rate": 5.323854660347551e-06, |
| "loss": 0.36, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.4707740916271722, |
| "grad_norm": 0.4877624213695526, |
| "learning_rate": 5.308056872037915e-06, |
| "loss": 0.403, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.47235387045813587, |
| "grad_norm": 0.5668107271194458, |
| "learning_rate": 5.292259083728278e-06, |
| "loss": 0.4087, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.47393364928909953, |
| "grad_norm": 0.5592719316482544, |
| "learning_rate": 5.276461295418642e-06, |
| "loss": 0.405, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4755134281200632, |
| "grad_norm": 0.48879534006118774, |
| "learning_rate": 5.260663507109005e-06, |
| "loss": 0.3562, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.47709320695102686, |
| "grad_norm": 0.5968641042709351, |
| "learning_rate": 5.244865718799369e-06, |
| "loss": 0.4216, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.4786729857819905, |
| "grad_norm": 0.7803828120231628, |
| "learning_rate": 5.2290679304897315e-06, |
| "loss": 0.4014, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.4802527646129542, |
| "grad_norm": 0.592827558517456, |
| "learning_rate": 5.213270142180096e-06, |
| "loss": 0.2895, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.48183254344391785, |
| "grad_norm": 0.8070396184921265, |
| "learning_rate": 5.197472353870458e-06, |
| "loss": 0.3972, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.4834123222748815, |
| "grad_norm": 0.5256397724151611, |
| "learning_rate": 5.181674565560821e-06, |
| "loss": 0.4384, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4849921011058452, |
| "grad_norm": 0.5307562947273254, |
| "learning_rate": 5.165876777251185e-06, |
| "loss": 0.3788, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.48657187993680884, |
| "grad_norm": 0.4588807225227356, |
| "learning_rate": 5.150078988941548e-06, |
| "loss": 0.3491, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4881516587677725, |
| "grad_norm": 0.524919331073761, |
| "learning_rate": 5.134281200631912e-06, |
| "loss": 0.4375, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.48973143759873616, |
| "grad_norm": 0.6611966490745544, |
| "learning_rate": 5.118483412322275e-06, |
| "loss": 0.4399, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4913112164296998, |
| "grad_norm": 0.5597748160362244, |
| "learning_rate": 5.102685624012638e-06, |
| "loss": 0.5073, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.4928909952606635, |
| "grad_norm": 0.8958181738853455, |
| "learning_rate": 5.086887835703001e-06, |
| "loss": 0.4756, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.49447077409162715, |
| "grad_norm": 0.4875742197036743, |
| "learning_rate": 5.071090047393366e-06, |
| "loss": 0.4424, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.4960505529225908, |
| "grad_norm": 0.6110445261001587, |
| "learning_rate": 5.055292259083728e-06, |
| "loss": 0.4686, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.4976303317535545, |
| "grad_norm": 0.5900540351867676, |
| "learning_rate": 5.039494470774093e-06, |
| "loss": 0.4, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.49921011058451814, |
| "grad_norm": 0.624906599521637, |
| "learning_rate": 5.023696682464455e-06, |
| "loss": 0.3967, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5007898894154819, |
| "grad_norm": 0.6435191631317139, |
| "learning_rate": 5.007898894154819e-06, |
| "loss": 0.5104, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.5023696682464455, |
| "grad_norm": 0.7464382648468018, |
| "learning_rate": 4.9921011058451815e-06, |
| "loss": 0.4621, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5039494470774092, |
| "grad_norm": 0.7912509441375732, |
| "learning_rate": 4.976303317535545e-06, |
| "loss": 0.4186, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.5055292259083728, |
| "grad_norm": 0.6150445938110352, |
| "learning_rate": 4.960505529225908e-06, |
| "loss": 0.469, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5071090047393365, |
| "grad_norm": 0.5445781946182251, |
| "learning_rate": 4.944707740916272e-06, |
| "loss": 0.4111, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.5086887835703001, |
| "grad_norm": 0.5628255605697632, |
| "learning_rate": 4.928909952606635e-06, |
| "loss": 0.4884, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5102685624012638, |
| "grad_norm": 0.5007054805755615, |
| "learning_rate": 4.913112164296999e-06, |
| "loss": 0.4315, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.5118483412322274, |
| "grad_norm": 0.6346699595451355, |
| "learning_rate": 4.8973143759873624e-06, |
| "loss": 0.4033, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5134281200631912, |
| "grad_norm": 0.639045774936676, |
| "learning_rate": 4.881516587677725e-06, |
| "loss": 0.3748, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5150078988941548, |
| "grad_norm": 0.5578002333641052, |
| "learning_rate": 4.865718799368089e-06, |
| "loss": 0.5055, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5165876777251185, |
| "grad_norm": 0.5281325578689575, |
| "learning_rate": 4.849921011058452e-06, |
| "loss": 0.4307, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.5181674565560821, |
| "grad_norm": 0.6557057499885559, |
| "learning_rate": 4.834123222748816e-06, |
| "loss": 0.4085, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5197472353870458, |
| "grad_norm": 0.5667731761932373, |
| "learning_rate": 4.818325434439179e-06, |
| "loss": 0.4774, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.5213270142180095, |
| "grad_norm": 0.5362856984138489, |
| "learning_rate": 4.8025276461295426e-06, |
| "loss": 0.4316, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5229067930489731, |
| "grad_norm": 0.5326763391494751, |
| "learning_rate": 4.7867298578199055e-06, |
| "loss": 0.389, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.5244865718799369, |
| "grad_norm": 0.4922950565814972, |
| "learning_rate": 4.770932069510269e-06, |
| "loss": 0.3756, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5260663507109005, |
| "grad_norm": 0.4961477518081665, |
| "learning_rate": 4.755134281200632e-06, |
| "loss": 0.4336, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.5276461295418642, |
| "grad_norm": 0.5258511304855347, |
| "learning_rate": 4.739336492890996e-06, |
| "loss": 0.404, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5292259083728278, |
| "grad_norm": 0.5479301810264587, |
| "learning_rate": 4.723538704581359e-06, |
| "loss": 0.3578, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.5308056872037915, |
| "grad_norm": 0.49883902072906494, |
| "learning_rate": 4.707740916271723e-06, |
| "loss": 0.3809, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5323854660347551, |
| "grad_norm": 0.5133053660392761, |
| "learning_rate": 4.691943127962086e-06, |
| "loss": 0.4091, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.5339652448657188, |
| "grad_norm": 0.6334301829338074, |
| "learning_rate": 4.676145339652449e-06, |
| "loss": 0.4432, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5355450236966824, |
| "grad_norm": 0.5124396085739136, |
| "learning_rate": 4.660347551342812e-06, |
| "loss": 0.3557, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.5371248025276462, |
| "grad_norm": 0.5863746404647827, |
| "learning_rate": 4.644549763033176e-06, |
| "loss": 0.4288, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5387045813586098, |
| "grad_norm": 0.6599943041801453, |
| "learning_rate": 4.628751974723539e-06, |
| "loss": 0.398, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5402843601895735, |
| "grad_norm": 0.480027437210083, |
| "learning_rate": 4.612954186413903e-06, |
| "loss": 0.4706, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5418641390205371, |
| "grad_norm": 0.6601845026016235, |
| "learning_rate": 4.597156398104266e-06, |
| "loss": 0.4092, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.5434439178515008, |
| "grad_norm": 0.5557224154472351, |
| "learning_rate": 4.581358609794629e-06, |
| "loss": 0.389, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5450236966824644, |
| "grad_norm": 0.49160709977149963, |
| "learning_rate": 4.5655608214849925e-06, |
| "loss": 0.4338, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.5466034755134281, |
| "grad_norm": 0.5284649133682251, |
| "learning_rate": 4.549763033175355e-06, |
| "loss": 0.403, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5481832543443917, |
| "grad_norm": 0.5501908659934998, |
| "learning_rate": 4.533965244865719e-06, |
| "loss": 0.4983, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.5497630331753555, |
| "grad_norm": 0.5585077404975891, |
| "learning_rate": 4.518167456556082e-06, |
| "loss": 0.4219, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5513428120063191, |
| "grad_norm": 0.4565962255001068, |
| "learning_rate": 4.502369668246446e-06, |
| "loss": 0.3591, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.5529225908372828, |
| "grad_norm": 0.5507949590682983, |
| "learning_rate": 4.486571879936809e-06, |
| "loss": 0.4752, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5545023696682464, |
| "grad_norm": 0.5490357875823975, |
| "learning_rate": 4.470774091627173e-06, |
| "loss": 0.4291, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.5560821484992101, |
| "grad_norm": 0.5804268717765808, |
| "learning_rate": 4.4549763033175355e-06, |
| "loss": 0.3113, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5576619273301737, |
| "grad_norm": 0.4745613634586334, |
| "learning_rate": 4.439178515007899e-06, |
| "loss": 0.4196, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.5592417061611374, |
| "grad_norm": 0.6223664283752441, |
| "learning_rate": 4.423380726698262e-06, |
| "loss": 0.4592, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5608214849921012, |
| "grad_norm": 0.8797832727432251, |
| "learning_rate": 4.407582938388626e-06, |
| "loss": 0.4448, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.5624012638230648, |
| "grad_norm": 0.5569826364517212, |
| "learning_rate": 4.391785150078989e-06, |
| "loss": 0.3873, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5639810426540285, |
| "grad_norm": 0.4294510781764984, |
| "learning_rate": 4.375987361769353e-06, |
| "loss": 0.3407, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.5655608214849921, |
| "grad_norm": 0.5657434463500977, |
| "learning_rate": 4.360189573459716e-06, |
| "loss": 0.3345, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5671406003159558, |
| "grad_norm": 0.5589077472686768, |
| "learning_rate": 4.3443917851500794e-06, |
| "loss": 0.5237, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.5687203791469194, |
| "grad_norm": 0.6107128858566284, |
| "learning_rate": 4.328593996840442e-06, |
| "loss": 0.4354, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5703001579778831, |
| "grad_norm": 0.5671380758285522, |
| "learning_rate": 4.312796208530806e-06, |
| "loss": 0.3712, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.5718799368088467, |
| "grad_norm": 0.508173406124115, |
| "learning_rate": 4.29699842022117e-06, |
| "loss": 0.4097, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5734597156398105, |
| "grad_norm": 0.6139382719993591, |
| "learning_rate": 4.281200631911533e-06, |
| "loss": 0.2646, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5750394944707741, |
| "grad_norm": 0.5677220821380615, |
| "learning_rate": 4.265402843601897e-06, |
| "loss": 0.3748, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5766192733017378, |
| "grad_norm": 0.530708372592926, |
| "learning_rate": 4.2496050552922596e-06, |
| "loss": 0.3857, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5781990521327014, |
| "grad_norm": 1.176272988319397, |
| "learning_rate": 4.233807266982623e-06, |
| "loss": 0.436, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5797788309636651, |
| "grad_norm": 0.6165753602981567, |
| "learning_rate": 4.218009478672986e-06, |
| "loss": 0.3898, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.5813586097946287, |
| "grad_norm": 0.47574201226234436, |
| "learning_rate": 4.20221169036335e-06, |
| "loss": 0.3685, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5829383886255924, |
| "grad_norm": 0.5995083451271057, |
| "learning_rate": 4.186413902053712e-06, |
| "loss": 0.4686, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.584518167456556, |
| "grad_norm": 0.5809090733528137, |
| "learning_rate": 4.170616113744076e-06, |
| "loss": 0.4514, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5860979462875198, |
| "grad_norm": 0.6154018044471741, |
| "learning_rate": 4.15481832543444e-06, |
| "loss": 0.3737, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.5876777251184834, |
| "grad_norm": 0.5799654126167297, |
| "learning_rate": 4.139020537124803e-06, |
| "loss": 0.4285, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5892575039494471, |
| "grad_norm": 0.4476354420185089, |
| "learning_rate": 4.123222748815166e-06, |
| "loss": 0.4362, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.5908372827804107, |
| "grad_norm": 0.6266714334487915, |
| "learning_rate": 4.107424960505529e-06, |
| "loss": 0.4943, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5924170616113744, |
| "grad_norm": 0.5103732347488403, |
| "learning_rate": 4.091627172195893e-06, |
| "loss": 0.4585, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.593996840442338, |
| "grad_norm": 0.49011877179145813, |
| "learning_rate": 4.075829383886256e-06, |
| "loss": 0.4489, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5955766192733017, |
| "grad_norm": 0.5286844372749329, |
| "learning_rate": 4.06003159557662e-06, |
| "loss": 0.4114, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.5971563981042654, |
| "grad_norm": 0.494807630777359, |
| "learning_rate": 4.044233807266983e-06, |
| "loss": 0.3514, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.5987361769352291, |
| "grad_norm": 0.46120524406433105, |
| "learning_rate": 4.0284360189573465e-06, |
| "loss": 0.4452, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.6003159557661928, |
| "grad_norm": 0.6024404764175415, |
| "learning_rate": 4.0126382306477095e-06, |
| "loss": 0.4368, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6018957345971564, |
| "grad_norm": 0.8292664885520935, |
| "learning_rate": 3.996840442338073e-06, |
| "loss": 0.4495, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.6034755134281201, |
| "grad_norm": 0.5312369465827942, |
| "learning_rate": 3.981042654028436e-06, |
| "loss": 0.3642, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6050552922590837, |
| "grad_norm": 0.6373758316040039, |
| "learning_rate": 3.9652448657188e-06, |
| "loss": 0.3884, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.6066350710900474, |
| "grad_norm": 0.5623313188552856, |
| "learning_rate": 3.949447077409163e-06, |
| "loss": 0.3489, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.608214849921011, |
| "grad_norm": 0.5703821778297424, |
| "learning_rate": 3.933649289099527e-06, |
| "loss": 0.5309, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6097946287519748, |
| "grad_norm": 0.5930938720703125, |
| "learning_rate": 3.91785150078989e-06, |
| "loss": 0.4072, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6113744075829384, |
| "grad_norm": 0.5636332631111145, |
| "learning_rate": 3.902053712480253e-06, |
| "loss": 0.3938, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.6129541864139021, |
| "grad_norm": 0.45709583163261414, |
| "learning_rate": 3.886255924170616e-06, |
| "loss": 0.4436, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6145339652448657, |
| "grad_norm": 0.5924400687217712, |
| "learning_rate": 3.87045813586098e-06, |
| "loss": 0.2939, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.6161137440758294, |
| "grad_norm": 0.6232696175575256, |
| "learning_rate": 3.854660347551343e-06, |
| "loss": 0.4183, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.617693522906793, |
| "grad_norm": 0.5407995581626892, |
| "learning_rate": 3.838862559241707e-06, |
| "loss": 0.3925, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.6192733017377567, |
| "grad_norm": 0.524691104888916, |
| "learning_rate": 3.82306477093207e-06, |
| "loss": 0.4327, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6208530805687204, |
| "grad_norm": 0.5206206440925598, |
| "learning_rate": 3.8072669826224335e-06, |
| "loss": 0.4203, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.6224328593996841, |
| "grad_norm": 0.6244251132011414, |
| "learning_rate": 3.7914691943127964e-06, |
| "loss": 0.4546, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.6240126382306477, |
| "grad_norm": 0.707058846950531, |
| "learning_rate": 3.77567140600316e-06, |
| "loss": 0.4015, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.6255924170616114, |
| "grad_norm": 0.5457757115364075, |
| "learning_rate": 3.759873617693523e-06, |
| "loss": 0.3962, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.627172195892575, |
| "grad_norm": 0.5757611989974976, |
| "learning_rate": 3.7440758293838865e-06, |
| "loss": 0.4299, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.6287519747235387, |
| "grad_norm": 0.5844476819038391, |
| "learning_rate": 3.72827804107425e-06, |
| "loss": 0.4674, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6303317535545023, |
| "grad_norm": 0.6859634518623352, |
| "learning_rate": 3.7124802527646132e-06, |
| "loss": 0.4253, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.631911532385466, |
| "grad_norm": 0.5247636437416077, |
| "learning_rate": 3.6966824644549766e-06, |
| "loss": 0.4318, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6334913112164297, |
| "grad_norm": 0.6206024885177612, |
| "learning_rate": 3.68088467614534e-06, |
| "loss": 0.3759, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.6350710900473934, |
| "grad_norm": 0.6237459182739258, |
| "learning_rate": 3.6650868878357033e-06, |
| "loss": 0.3642, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.636650868878357, |
| "grad_norm": 0.8048799633979797, |
| "learning_rate": 3.6492890995260666e-06, |
| "loss": 0.514, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.6382306477093207, |
| "grad_norm": 0.4662720561027527, |
| "learning_rate": 3.63349131121643e-06, |
| "loss": 0.3654, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.6398104265402843, |
| "grad_norm": 0.5561702251434326, |
| "learning_rate": 3.6176935229067934e-06, |
| "loss": 0.3823, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.641390205371248, |
| "grad_norm": 0.6143206357955933, |
| "learning_rate": 3.6018957345971567e-06, |
| "loss": 0.3938, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6429699842022117, |
| "grad_norm": 0.6854034662246704, |
| "learning_rate": 3.58609794628752e-06, |
| "loss": 0.4625, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.6445497630331753, |
| "grad_norm": 0.5590549111366272, |
| "learning_rate": 3.5703001579778834e-06, |
| "loss": 0.4199, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6461295418641391, |
| "grad_norm": 0.642573356628418, |
| "learning_rate": 3.5545023696682468e-06, |
| "loss": 0.4366, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.6477093206951027, |
| "grad_norm": 0.5898130536079407, |
| "learning_rate": 3.53870458135861e-06, |
| "loss": 0.4691, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6492890995260664, |
| "grad_norm": 0.5370688438415527, |
| "learning_rate": 3.5229067930489735e-06, |
| "loss": 0.45, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.65086887835703, |
| "grad_norm": 0.6769170165061951, |
| "learning_rate": 3.507109004739337e-06, |
| "loss": 0.3962, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6524486571879937, |
| "grad_norm": 0.5891703367233276, |
| "learning_rate": 3.4913112164297e-06, |
| "loss": 0.4542, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.6540284360189573, |
| "grad_norm": 0.42204615473747253, |
| "learning_rate": 3.4755134281200636e-06, |
| "loss": 0.3368, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.655608214849921, |
| "grad_norm": 0.46033787727355957, |
| "learning_rate": 3.459715639810427e-06, |
| "loss": 0.4357, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.6571879936808847, |
| "grad_norm": 0.5509577393531799, |
| "learning_rate": 3.4439178515007903e-06, |
| "loss": 0.3939, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6587677725118484, |
| "grad_norm": 0.5802867412567139, |
| "learning_rate": 3.4281200631911536e-06, |
| "loss": 0.4073, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.660347551342812, |
| "grad_norm": 0.6130402684211731, |
| "learning_rate": 3.412322274881517e-06, |
| "loss": 0.3452, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6619273301737757, |
| "grad_norm": 0.6854075789451599, |
| "learning_rate": 3.39652448657188e-06, |
| "loss": 0.3551, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.6635071090047393, |
| "grad_norm": 0.5365926027297974, |
| "learning_rate": 3.3807266982622433e-06, |
| "loss": 0.4011, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.665086887835703, |
| "grad_norm": 1.0338938236236572, |
| "learning_rate": 3.3649289099526066e-06, |
| "loss": 0.4623, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.5612855553627014, |
| "learning_rate": 3.34913112164297e-06, |
| "loss": 0.3738, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6682464454976303, |
| "grad_norm": 0.5113286375999451, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.3865, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.669826224328594, |
| "grad_norm": 0.5509905815124512, |
| "learning_rate": 3.3175355450236967e-06, |
| "loss": 0.4093, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.6714060031595577, |
| "grad_norm": 0.5425525903701782, |
| "learning_rate": 3.30173775671406e-06, |
| "loss": 0.383, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.6729857819905213, |
| "grad_norm": 0.5866172909736633, |
| "learning_rate": 3.2859399684044234e-06, |
| "loss": 0.4843, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.674565560821485, |
| "grad_norm": 1.0777703523635864, |
| "learning_rate": 3.2701421800947867e-06, |
| "loss": 0.3748, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.6761453396524486, |
| "grad_norm": 0.49126845598220825, |
| "learning_rate": 3.25434439178515e-06, |
| "loss": 0.3505, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6777251184834123, |
| "grad_norm": 0.5471718311309814, |
| "learning_rate": 3.2385466034755135e-06, |
| "loss": 0.4755, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.6793048973143759, |
| "grad_norm": 0.5689931511878967, |
| "learning_rate": 3.222748815165877e-06, |
| "loss": 0.3956, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6808846761453397, |
| "grad_norm": 0.6496183276176453, |
| "learning_rate": 3.2069510268562406e-06, |
| "loss": 0.4598, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.6824644549763034, |
| "grad_norm": 0.47042712569236755, |
| "learning_rate": 3.191153238546604e-06, |
| "loss": 0.3756, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.684044233807267, |
| "grad_norm": 0.5819857120513916, |
| "learning_rate": 3.1753554502369673e-06, |
| "loss": 0.4803, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.6856240126382307, |
| "grad_norm": 0.5752127766609192, |
| "learning_rate": 3.1595576619273307e-06, |
| "loss": 0.3916, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6872037914691943, |
| "grad_norm": 0.6483988761901855, |
| "learning_rate": 3.143759873617694e-06, |
| "loss": 0.4338, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.688783570300158, |
| "grad_norm": 0.7817516326904297, |
| "learning_rate": 3.1279620853080574e-06, |
| "loss": 0.3645, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6903633491311216, |
| "grad_norm": 0.4980696737766266, |
| "learning_rate": 3.1121642969984207e-06, |
| "loss": 0.3962, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.6919431279620853, |
| "grad_norm": 0.5592882037162781, |
| "learning_rate": 3.096366508688784e-06, |
| "loss": 0.3645, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.693522906793049, |
| "grad_norm": 0.6228163242340088, |
| "learning_rate": 3.0805687203791474e-06, |
| "loss": 0.3696, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.6951026856240127, |
| "grad_norm": 0.6718009114265442, |
| "learning_rate": 3.0647709320695108e-06, |
| "loss": 0.4926, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6966824644549763, |
| "grad_norm": 0.6085376143455505, |
| "learning_rate": 3.048973143759874e-06, |
| "loss": 0.418, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.69826224328594, |
| "grad_norm": 0.7716324925422668, |
| "learning_rate": 3.0331753554502375e-06, |
| "loss": 0.4038, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6998420221169036, |
| "grad_norm": 0.7239758968353271, |
| "learning_rate": 3.017377567140601e-06, |
| "loss": 0.4596, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.7014218009478673, |
| "grad_norm": 0.6308011412620544, |
| "learning_rate": 3.001579778830964e-06, |
| "loss": 0.4082, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.7030015797788309, |
| "grad_norm": 0.515626072883606, |
| "learning_rate": 2.985781990521327e-06, |
| "loss": 0.4688, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.7045813586097947, |
| "grad_norm": 0.5395441651344299, |
| "learning_rate": 2.9699842022116905e-06, |
| "loss": 0.3448, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7061611374407583, |
| "grad_norm": 0.5883680582046509, |
| "learning_rate": 2.954186413902054e-06, |
| "loss": 0.4546, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.707740916271722, |
| "grad_norm": 0.7300311326980591, |
| "learning_rate": 2.938388625592417e-06, |
| "loss": 0.368, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.7093206951026856, |
| "grad_norm": 0.5901307463645935, |
| "learning_rate": 2.9225908372827806e-06, |
| "loss": 0.3688, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.7109004739336493, |
| "grad_norm": 0.6521854996681213, |
| "learning_rate": 2.906793048973144e-06, |
| "loss": 0.3876, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7124802527646129, |
| "grad_norm": 0.688450038433075, |
| "learning_rate": 2.8909952606635073e-06, |
| "loss": 0.4298, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.7140600315955766, |
| "grad_norm": 0.6533556580543518, |
| "learning_rate": 2.8751974723538706e-06, |
| "loss": 0.3589, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.7156398104265402, |
| "grad_norm": 0.5261491537094116, |
| "learning_rate": 2.859399684044234e-06, |
| "loss": 0.3886, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.717219589257504, |
| "grad_norm": 0.5488421320915222, |
| "learning_rate": 2.8436018957345973e-06, |
| "loss": 0.411, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.7187993680884676, |
| "grad_norm": 0.6415657997131348, |
| "learning_rate": 2.8278041074249607e-06, |
| "loss": 0.4581, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.7203791469194313, |
| "grad_norm": 0.5058445334434509, |
| "learning_rate": 2.812006319115324e-06, |
| "loss": 0.4325, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.721958925750395, |
| "grad_norm": 0.6409322619438171, |
| "learning_rate": 2.7962085308056874e-06, |
| "loss": 0.3759, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.7235387045813586, |
| "grad_norm": 0.5578014850616455, |
| "learning_rate": 2.7804107424960508e-06, |
| "loss": 0.3947, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7251184834123223, |
| "grad_norm": 0.6064183115959167, |
| "learning_rate": 2.764612954186414e-06, |
| "loss": 0.4766, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.7266982622432859, |
| "grad_norm": 0.6067904233932495, |
| "learning_rate": 2.7488151658767775e-06, |
| "loss": 0.4698, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7282780410742496, |
| "grad_norm": 0.526088297367096, |
| "learning_rate": 2.733017377567141e-06, |
| "loss": 0.3997, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.7298578199052133, |
| "grad_norm": 0.6290006637573242, |
| "learning_rate": 2.717219589257504e-06, |
| "loss": 0.4393, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.731437598736177, |
| "grad_norm": 0.5822445154190063, |
| "learning_rate": 2.7014218009478675e-06, |
| "loss": 0.4767, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.7330173775671406, |
| "grad_norm": 0.5798205733299255, |
| "learning_rate": 2.685624012638231e-06, |
| "loss": 0.4163, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.7345971563981043, |
| "grad_norm": 0.6234124898910522, |
| "learning_rate": 2.6698262243285942e-06, |
| "loss": 0.387, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.7361769352290679, |
| "grad_norm": 0.5226984620094299, |
| "learning_rate": 2.6540284360189576e-06, |
| "loss": 0.4144, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7377567140600316, |
| "grad_norm": 0.529303789138794, |
| "learning_rate": 2.638230647709321e-06, |
| "loss": 0.4689, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.7393364928909952, |
| "grad_norm": 0.6620000004768372, |
| "learning_rate": 2.6224328593996843e-06, |
| "loss": 0.4358, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.740916271721959, |
| "grad_norm": 0.8560294508934021, |
| "learning_rate": 2.606635071090048e-06, |
| "loss": 0.422, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.7424960505529226, |
| "grad_norm": 0.47033989429473877, |
| "learning_rate": 2.5908372827804106e-06, |
| "loss": 0.4462, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7440758293838863, |
| "grad_norm": 0.5476656556129456, |
| "learning_rate": 2.575039494470774e-06, |
| "loss": 0.3818, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.7456556082148499, |
| "grad_norm": 0.5771902203559875, |
| "learning_rate": 2.5592417061611373e-06, |
| "loss": 0.3835, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7472353870458136, |
| "grad_norm": 0.6452733278274536, |
| "learning_rate": 2.5434439178515007e-06, |
| "loss": 0.4224, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.7488151658767772, |
| "grad_norm": 0.5318686962127686, |
| "learning_rate": 2.527646129541864e-06, |
| "loss": 0.4812, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.7503949447077409, |
| "grad_norm": 0.6591460108757019, |
| "learning_rate": 2.5118483412322274e-06, |
| "loss": 0.4546, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.7519747235387045, |
| "grad_norm": 0.5857440829277039, |
| "learning_rate": 2.4960505529225907e-06, |
| "loss": 0.4008, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7535545023696683, |
| "grad_norm": 0.6430768370628357, |
| "learning_rate": 2.480252764612954e-06, |
| "loss": 0.3191, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.7551342812006319, |
| "grad_norm": 0.7442892789840698, |
| "learning_rate": 2.4644549763033174e-06, |
| "loss": 0.4171, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7567140600315956, |
| "grad_norm": 0.6390454173088074, |
| "learning_rate": 2.4486571879936812e-06, |
| "loss": 0.5381, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.7582938388625592, |
| "grad_norm": 0.6277416348457336, |
| "learning_rate": 2.4328593996840446e-06, |
| "loss": 0.4824, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7598736176935229, |
| "grad_norm": 0.6043097972869873, |
| "learning_rate": 2.417061611374408e-06, |
| "loss": 0.4266, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.7614533965244866, |
| "grad_norm": 0.6095964312553406, |
| "learning_rate": 2.4012638230647713e-06, |
| "loss": 0.4258, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7630331753554502, |
| "grad_norm": 0.5433639287948608, |
| "learning_rate": 2.3854660347551346e-06, |
| "loss": 0.4873, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.764612954186414, |
| "grad_norm": 0.49287649989128113, |
| "learning_rate": 2.369668246445498e-06, |
| "loss": 0.4814, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.7661927330173776, |
| "grad_norm": 0.5905902981758118, |
| "learning_rate": 2.3538704581358613e-06, |
| "loss": 0.4519, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.7677725118483413, |
| "grad_norm": 0.6697285771369934, |
| "learning_rate": 2.3380726698262247e-06, |
| "loss": 0.4686, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.7693522906793049, |
| "grad_norm": 0.5338664650917053, |
| "learning_rate": 2.322274881516588e-06, |
| "loss": 0.401, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.7709320695102686, |
| "grad_norm": 0.5338428616523743, |
| "learning_rate": 2.3064770932069514e-06, |
| "loss": 0.4045, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7725118483412322, |
| "grad_norm": 0.6102830171585083, |
| "learning_rate": 2.2906793048973143e-06, |
| "loss": 0.3785, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.7740916271721959, |
| "grad_norm": 0.5787335634231567, |
| "learning_rate": 2.2748815165876777e-06, |
| "loss": 0.42, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7756714060031595, |
| "grad_norm": 0.7426438331604004, |
| "learning_rate": 2.259083728278041e-06, |
| "loss": 0.4676, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.7772511848341233, |
| "grad_norm": 0.5988475680351257, |
| "learning_rate": 2.2432859399684044e-06, |
| "loss": 0.5404, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7788309636650869, |
| "grad_norm": 0.6289830803871155, |
| "learning_rate": 2.2274881516587678e-06, |
| "loss": 0.396, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.7804107424960506, |
| "grad_norm": 0.6077900528907776, |
| "learning_rate": 2.211690363349131e-06, |
| "loss": 0.4016, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.7819905213270142, |
| "grad_norm": 0.8171889781951904, |
| "learning_rate": 2.1958925750394945e-06, |
| "loss": 0.3638, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.7835703001579779, |
| "grad_norm": 0.6225026845932007, |
| "learning_rate": 2.180094786729858e-06, |
| "loss": 0.4088, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7851500789889415, |
| "grad_norm": 0.6262929439544678, |
| "learning_rate": 2.164296998420221e-06, |
| "loss": 0.3311, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.7867298578199052, |
| "grad_norm": 0.662129282951355, |
| "learning_rate": 2.148499210110585e-06, |
| "loss": 0.4434, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.7883096366508688, |
| "grad_norm": 0.5046777725219727, |
| "learning_rate": 2.1327014218009483e-06, |
| "loss": 0.5042, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.7898894154818326, |
| "grad_norm": 0.6273382306098938, |
| "learning_rate": 2.1169036334913117e-06, |
| "loss": 0.345, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7914691943127962, |
| "grad_norm": 0.5484871864318848, |
| "learning_rate": 2.101105845181675e-06, |
| "loss": 0.3476, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.7930489731437599, |
| "grad_norm": 0.6779518723487854, |
| "learning_rate": 2.085308056872038e-06, |
| "loss": 0.4062, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7946287519747235, |
| "grad_norm": 0.4969736635684967, |
| "learning_rate": 2.0695102685624013e-06, |
| "loss": 0.3615, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.7962085308056872, |
| "grad_norm": 0.5542388558387756, |
| "learning_rate": 2.0537124802527647e-06, |
| "loss": 0.39, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7977883096366508, |
| "grad_norm": 0.8587651252746582, |
| "learning_rate": 2.037914691943128e-06, |
| "loss": 0.423, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.7993680884676145, |
| "grad_norm": 0.6399357318878174, |
| "learning_rate": 2.0221169036334914e-06, |
| "loss": 0.4645, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.8009478672985783, |
| "grad_norm": 0.5677849650382996, |
| "learning_rate": 2.0063191153238547e-06, |
| "loss": 0.3749, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.8025276461295419, |
| "grad_norm": 0.5609621405601501, |
| "learning_rate": 1.990521327014218e-06, |
| "loss": 0.4727, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.8041074249605056, |
| "grad_norm": 0.615185558795929, |
| "learning_rate": 1.9747235387045814e-06, |
| "loss": 0.4349, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.8056872037914692, |
| "grad_norm": 0.5093739032745361, |
| "learning_rate": 1.958925750394945e-06, |
| "loss": 0.3502, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8072669826224329, |
| "grad_norm": 0.8513323068618774, |
| "learning_rate": 1.943127962085308e-06, |
| "loss": 0.3902, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.8088467614533965, |
| "grad_norm": 0.6797610521316528, |
| "learning_rate": 1.9273301737756715e-06, |
| "loss": 0.4987, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.8104265402843602, |
| "grad_norm": 0.5715585947036743, |
| "learning_rate": 1.911532385466035e-06, |
| "loss": 0.3965, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.8120063191153238, |
| "grad_norm": 0.5537532567977905, |
| "learning_rate": 1.8957345971563982e-06, |
| "loss": 0.3832, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.8135860979462876, |
| "grad_norm": 0.5337470173835754, |
| "learning_rate": 1.8799368088467616e-06, |
| "loss": 0.4136, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.8151658767772512, |
| "grad_norm": 0.5929555892944336, |
| "learning_rate": 1.864139020537125e-06, |
| "loss": 0.3901, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.8167456556082149, |
| "grad_norm": 0.6738921403884888, |
| "learning_rate": 1.8483412322274883e-06, |
| "loss": 0.4128, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.8183254344391785, |
| "grad_norm": 0.598659098148346, |
| "learning_rate": 1.8325434439178516e-06, |
| "loss": 0.3707, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.8199052132701422, |
| "grad_norm": 0.5679790377616882, |
| "learning_rate": 1.816745655608215e-06, |
| "loss": 0.457, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.8214849921011058, |
| "grad_norm": 0.5459115505218506, |
| "learning_rate": 1.8009478672985784e-06, |
| "loss": 0.3613, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8230647709320695, |
| "grad_norm": 0.5752125978469849, |
| "learning_rate": 1.7851500789889417e-06, |
| "loss": 0.479, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.8246445497630331, |
| "grad_norm": 0.5184637904167175, |
| "learning_rate": 1.769352290679305e-06, |
| "loss": 0.4126, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8262243285939969, |
| "grad_norm": 0.6329041123390198, |
| "learning_rate": 1.7535545023696684e-06, |
| "loss": 0.4221, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.8278041074249605, |
| "grad_norm": 0.5233784317970276, |
| "learning_rate": 1.7377567140600318e-06, |
| "loss": 0.4375, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.8293838862559242, |
| "grad_norm": 0.5424541234970093, |
| "learning_rate": 1.7219589257503951e-06, |
| "loss": 0.4447, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.8309636650868878, |
| "grad_norm": 0.5534167885780334, |
| "learning_rate": 1.7061611374407585e-06, |
| "loss": 0.3672, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8325434439178515, |
| "grad_norm": 0.605102002620697, |
| "learning_rate": 1.6903633491311216e-06, |
| "loss": 0.4319, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.8341232227488151, |
| "grad_norm": 0.5609396696090698, |
| "learning_rate": 1.674565560821485e-06, |
| "loss": 0.3984, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8357030015797788, |
| "grad_norm": 0.7964479923248291, |
| "learning_rate": 1.6587677725118483e-06, |
| "loss": 0.407, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.8372827804107424, |
| "grad_norm": 0.4886048436164856, |
| "learning_rate": 1.6429699842022117e-06, |
| "loss": 0.4506, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8388625592417062, |
| "grad_norm": 0.543812096118927, |
| "learning_rate": 1.627172195892575e-06, |
| "loss": 0.3141, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.8404423380726699, |
| "grad_norm": 0.5370059609413147, |
| "learning_rate": 1.6113744075829384e-06, |
| "loss": 0.3712, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8420221169036335, |
| "grad_norm": 0.7402203679084778, |
| "learning_rate": 1.595576619273302e-06, |
| "loss": 0.4136, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.8436018957345972, |
| "grad_norm": 0.6814244985580444, |
| "learning_rate": 1.5797788309636653e-06, |
| "loss": 0.4634, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.8451816745655608, |
| "grad_norm": 0.5919080972671509, |
| "learning_rate": 1.5639810426540287e-06, |
| "loss": 0.4238, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.8467614533965245, |
| "grad_norm": 0.617522120475769, |
| "learning_rate": 1.548183254344392e-06, |
| "loss": 0.3431, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.8483412322274881, |
| "grad_norm": 0.49482643604278564, |
| "learning_rate": 1.5323854660347554e-06, |
| "loss": 0.3882, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.8499210110584519, |
| "grad_norm": 0.5525531768798828, |
| "learning_rate": 1.5165876777251187e-06, |
| "loss": 0.4053, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.8515007898894155, |
| "grad_norm": 0.6634103655815125, |
| "learning_rate": 1.500789889415482e-06, |
| "loss": 0.4624, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.8530805687203792, |
| "grad_norm": 0.45309382677078247, |
| "learning_rate": 1.4849921011058452e-06, |
| "loss": 0.3486, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8546603475513428, |
| "grad_norm": 0.778338611125946, |
| "learning_rate": 1.4691943127962086e-06, |
| "loss": 0.3984, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.8562401263823065, |
| "grad_norm": 0.6093356609344482, |
| "learning_rate": 1.453396524486572e-06, |
| "loss": 0.333, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.8578199052132701, |
| "grad_norm": 0.49551188945770264, |
| "learning_rate": 1.4375987361769353e-06, |
| "loss": 0.3915, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.8593996840442338, |
| "grad_norm": 0.5423188209533691, |
| "learning_rate": 1.4218009478672987e-06, |
| "loss": 0.4192, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.8609794628751974, |
| "grad_norm": 0.8111097812652588, |
| "learning_rate": 1.406003159557662e-06, |
| "loss": 0.473, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.8625592417061612, |
| "grad_norm": 0.6064862012863159, |
| "learning_rate": 1.3902053712480254e-06, |
| "loss": 0.4164, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8641390205371248, |
| "grad_norm": 0.6180470585823059, |
| "learning_rate": 1.3744075829383887e-06, |
| "loss": 0.4351, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.8657187993680885, |
| "grad_norm": 0.5101069808006287, |
| "learning_rate": 1.358609794628752e-06, |
| "loss": 0.3806, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8672985781990521, |
| "grad_norm": 0.6269749402999878, |
| "learning_rate": 1.3428120063191154e-06, |
| "loss": 0.4028, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.8688783570300158, |
| "grad_norm": 0.6344918608665466, |
| "learning_rate": 1.3270142180094788e-06, |
| "loss": 0.3206, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8704581358609794, |
| "grad_norm": 0.7053835988044739, |
| "learning_rate": 1.3112164296998422e-06, |
| "loss": 0.4404, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.8720379146919431, |
| "grad_norm": 0.4780917465686798, |
| "learning_rate": 1.2954186413902053e-06, |
| "loss": 0.4089, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8736176935229067, |
| "grad_norm": 0.5235942006111145, |
| "learning_rate": 1.2796208530805687e-06, |
| "loss": 0.3992, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.8751974723538705, |
| "grad_norm": 0.5037370324134827, |
| "learning_rate": 1.263823064770932e-06, |
| "loss": 0.3727, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.8767772511848341, |
| "grad_norm": 0.5422868132591248, |
| "learning_rate": 1.2480252764612954e-06, |
| "loss": 0.4524, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.8783570300157978, |
| "grad_norm": 0.5287191271781921, |
| "learning_rate": 1.2322274881516587e-06, |
| "loss": 0.3445, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8799368088467614, |
| "grad_norm": 0.49679964780807495, |
| "learning_rate": 1.2164296998420223e-06, |
| "loss": 0.3357, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.8815165876777251, |
| "grad_norm": 0.5391539931297302, |
| "learning_rate": 1.2006319115323856e-06, |
| "loss": 0.4645, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8830963665086888, |
| "grad_norm": 0.5474575757980347, |
| "learning_rate": 1.184834123222749e-06, |
| "loss": 0.4109, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.8846761453396524, |
| "grad_norm": 0.5920886993408203, |
| "learning_rate": 1.1690363349131124e-06, |
| "loss": 0.4034, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8862559241706162, |
| "grad_norm": 0.5637263655662537, |
| "learning_rate": 1.1532385466034757e-06, |
| "loss": 0.392, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.8878357030015798, |
| "grad_norm": 0.6719076037406921, |
| "learning_rate": 1.1374407582938388e-06, |
| "loss": 0.3798, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.8894154818325435, |
| "grad_norm": 0.5554001927375793, |
| "learning_rate": 1.1216429699842022e-06, |
| "loss": 0.3901, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.8909952606635071, |
| "grad_norm": 0.6078475713729858, |
| "learning_rate": 1.1058451816745656e-06, |
| "loss": 0.3574, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.8925750394944708, |
| "grad_norm": 0.9478325843811035, |
| "learning_rate": 1.090047393364929e-06, |
| "loss": 0.3831, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.8941548183254344, |
| "grad_norm": 0.5259877443313599, |
| "learning_rate": 1.0742496050552925e-06, |
| "loss": 0.4003, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.8957345971563981, |
| "grad_norm": 0.5395880937576294, |
| "learning_rate": 1.0584518167456558e-06, |
| "loss": 0.3513, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.8973143759873617, |
| "grad_norm": 0.5458592772483826, |
| "learning_rate": 1.042654028436019e-06, |
| "loss": 0.49, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.8988941548183255, |
| "grad_norm": 0.5552616715431213, |
| "learning_rate": 1.0268562401263823e-06, |
| "loss": 0.3905, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.9004739336492891, |
| "grad_norm": 0.551466166973114, |
| "learning_rate": 1.0110584518167457e-06, |
| "loss": 0.4241, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9020537124802528, |
| "grad_norm": 0.7195900082588196, |
| "learning_rate": 9.95260663507109e-07, |
| "loss": 0.3912, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.9036334913112164, |
| "grad_norm": 0.5951517820358276, |
| "learning_rate": 9.794628751974724e-07, |
| "loss": 0.4267, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.9052132701421801, |
| "grad_norm": 0.7582541108131409, |
| "learning_rate": 9.636650868878358e-07, |
| "loss": 0.4024, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.9067930489731437, |
| "grad_norm": 0.6346389651298523, |
| "learning_rate": 9.478672985781991e-07, |
| "loss": 0.4677, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.9083728278041074, |
| "grad_norm": 0.7323048710823059, |
| "learning_rate": 9.320695102685625e-07, |
| "loss": 0.4332, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.909952606635071, |
| "grad_norm": 0.5796726942062378, |
| "learning_rate": 9.162717219589258e-07, |
| "loss": 0.3514, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.9115323854660348, |
| "grad_norm": 0.7424004673957825, |
| "learning_rate": 9.004739336492892e-07, |
| "loss": 0.4178, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.9131121642969984, |
| "grad_norm": 0.525142252445221, |
| "learning_rate": 8.846761453396525e-07, |
| "loss": 0.4498, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.9146919431279621, |
| "grad_norm": 0.5565955638885498, |
| "learning_rate": 8.688783570300159e-07, |
| "loss": 0.4532, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.9162717219589257, |
| "grad_norm": 0.540267288684845, |
| "learning_rate": 8.530805687203792e-07, |
| "loss": 0.4828, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9178515007898894, |
| "grad_norm": 0.5061677694320679, |
| "learning_rate": 8.372827804107425e-07, |
| "loss": 0.3505, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.919431279620853, |
| "grad_norm": 0.5490908622741699, |
| "learning_rate": 8.214849921011058e-07, |
| "loss": 0.4402, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.9210110584518167, |
| "grad_norm": 0.5788997411727905, |
| "learning_rate": 8.056872037914692e-07, |
| "loss": 0.3256, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.9225908372827805, |
| "grad_norm": 0.5741492509841919, |
| "learning_rate": 7.898894154818327e-07, |
| "loss": 0.451, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.9241706161137441, |
| "grad_norm": 0.5012090802192688, |
| "learning_rate": 7.74091627172196e-07, |
| "loss": 0.3513, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.9257503949447078, |
| "grad_norm": 0.5613192915916443, |
| "learning_rate": 7.582938388625594e-07, |
| "loss": 0.3499, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.9273301737756714, |
| "grad_norm": 0.5941815376281738, |
| "learning_rate": 7.424960505529226e-07, |
| "loss": 0.4133, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.9289099526066351, |
| "grad_norm": 0.7772453427314758, |
| "learning_rate": 7.26698262243286e-07, |
| "loss": 0.3818, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9304897314375987, |
| "grad_norm": 0.5977700352668762, |
| "learning_rate": 7.109004739336493e-07, |
| "loss": 0.4099, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.9320695102685624, |
| "grad_norm": 0.7777069807052612, |
| "learning_rate": 6.951026856240127e-07, |
| "loss": 0.4341, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.933649289099526, |
| "grad_norm": 0.5362728834152222, |
| "learning_rate": 6.79304897314376e-07, |
| "loss": 0.4431, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.9352290679304898, |
| "grad_norm": 0.5126134157180786, |
| "learning_rate": 6.635071090047394e-07, |
| "loss": 0.3713, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.9368088467614534, |
| "grad_norm": 0.5886785984039307, |
| "learning_rate": 6.477093206951026e-07, |
| "loss": 0.405, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.9383886255924171, |
| "grad_norm": 0.5328089594841003, |
| "learning_rate": 6.31911532385466e-07, |
| "loss": 0.3952, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.9399684044233807, |
| "grad_norm": 0.7170501351356506, |
| "learning_rate": 6.161137440758294e-07, |
| "loss": 0.3979, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.9415481832543444, |
| "grad_norm": 0.6048548817634583, |
| "learning_rate": 6.003159557661928e-07, |
| "loss": 0.3425, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.943127962085308, |
| "grad_norm": 0.5635291337966919, |
| "learning_rate": 5.845181674565562e-07, |
| "loss": 0.3008, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.9447077409162717, |
| "grad_norm": 0.6890112161636353, |
| "learning_rate": 5.687203791469194e-07, |
| "loss": 0.4205, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.9462875197472354, |
| "grad_norm": 0.5197014212608337, |
| "learning_rate": 5.529225908372828e-07, |
| "loss": 0.4589, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.9478672985781991, |
| "grad_norm": 0.5197718143463135, |
| "learning_rate": 5.371248025276462e-07, |
| "loss": 0.2678, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9494470774091627, |
| "grad_norm": 0.44931474328041077, |
| "learning_rate": 5.213270142180095e-07, |
| "loss": 0.4351, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.9510268562401264, |
| "grad_norm": 0.47795984148979187, |
| "learning_rate": 5.055292259083728e-07, |
| "loss": 0.4392, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.95260663507109, |
| "grad_norm": 0.6027578115463257, |
| "learning_rate": 4.897314375987362e-07, |
| "loss": 0.4499, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.9541864139020537, |
| "grad_norm": 0.6160722374916077, |
| "learning_rate": 4.7393364928909956e-07, |
| "loss": 0.434, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.9557661927330173, |
| "grad_norm": 0.8371343612670898, |
| "learning_rate": 4.581358609794629e-07, |
| "loss": 0.3911, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.957345971563981, |
| "grad_norm": 0.5282484292984009, |
| "learning_rate": 4.4233807266982627e-07, |
| "loss": 0.4445, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.9589257503949447, |
| "grad_norm": 0.5557743310928345, |
| "learning_rate": 4.265402843601896e-07, |
| "loss": 0.4103, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.9605055292259084, |
| "grad_norm": 0.6362637281417847, |
| "learning_rate": 4.107424960505529e-07, |
| "loss": 0.3856, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.9620853080568721, |
| "grad_norm": 0.745617151260376, |
| "learning_rate": 3.9494470774091633e-07, |
| "loss": 0.4179, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.9636650868878357, |
| "grad_norm": 0.659038782119751, |
| "learning_rate": 3.791469194312797e-07, |
| "loss": 0.4027, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.9652448657187994, |
| "grad_norm": 0.645199716091156, |
| "learning_rate": 3.63349131121643e-07, |
| "loss": 0.3501, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.966824644549763, |
| "grad_norm": 0.4868941605091095, |
| "learning_rate": 3.4755134281200634e-07, |
| "loss": 0.3385, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.9684044233807267, |
| "grad_norm": 0.5993934273719788, |
| "learning_rate": 3.317535545023697e-07, |
| "loss": 0.369, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.9699842022116903, |
| "grad_norm": 0.6094574928283691, |
| "learning_rate": 3.15955766192733e-07, |
| "loss": 0.4899, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.9715639810426541, |
| "grad_norm": 0.6989656686782837, |
| "learning_rate": 3.001579778830964e-07, |
| "loss": 0.4346, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.9731437598736177, |
| "grad_norm": 0.5412940382957458, |
| "learning_rate": 2.843601895734597e-07, |
| "loss": 0.4515, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.9747235387045814, |
| "grad_norm": 0.507622241973877, |
| "learning_rate": 2.685624012638231e-07, |
| "loss": 0.4171, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.976303317535545, |
| "grad_norm": 0.4564089775085449, |
| "learning_rate": 2.527646129541864e-07, |
| "loss": 0.3452, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9778830963665087, |
| "grad_norm": 0.48170286417007446, |
| "learning_rate": 2.3696682464454978e-07, |
| "loss": 0.3866, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.9794628751974723, |
| "grad_norm": 0.47774481773376465, |
| "learning_rate": 2.2116903633491313e-07, |
| "loss": 0.4425, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.981042654028436, |
| "grad_norm": 0.4460739493370056, |
| "learning_rate": 2.0537124802527646e-07, |
| "loss": 0.3991, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.9826224328593997, |
| "grad_norm": 0.536359965801239, |
| "learning_rate": 1.8957345971563984e-07, |
| "loss": 0.327, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.9842022116903634, |
| "grad_norm": 0.5439571738243103, |
| "learning_rate": 1.7377567140600317e-07, |
| "loss": 0.408, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.985781990521327, |
| "grad_norm": 0.8827345967292786, |
| "learning_rate": 1.579778830963665e-07, |
| "loss": 0.4924, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.9873617693522907, |
| "grad_norm": 0.4992835521697998, |
| "learning_rate": 1.4218009478672986e-07, |
| "loss": 0.3921, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.9889415481832543, |
| "grad_norm": 0.7306237816810608, |
| "learning_rate": 1.263823064770932e-07, |
| "loss": 0.5063, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.990521327014218, |
| "grad_norm": 0.5200903415679932, |
| "learning_rate": 1.1058451816745657e-07, |
| "loss": 0.358, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.9921011058451816, |
| "grad_norm": 0.42708104848861694, |
| "learning_rate": 9.478672985781992e-08, |
| "loss": 0.3361, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.9936808846761453, |
| "grad_norm": 0.5993225574493408, |
| "learning_rate": 7.898894154818325e-08, |
| "loss": 0.3625, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.995260663507109, |
| "grad_norm": 0.49995774030685425, |
| "learning_rate": 6.31911532385466e-08, |
| "loss": 0.3746, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9968404423380727, |
| "grad_norm": 0.5806180238723755, |
| "learning_rate": 4.739336492890996e-08, |
| "loss": 0.3727, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.9984202211690363, |
| "grad_norm": 0.5514349341392517, |
| "learning_rate": 3.15955766192733e-08, |
| "loss": 0.4634, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.4094119668006897, |
| "learning_rate": 1.579778830963665e-08, |
| "loss": 0.2044, |
| "step": 633 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 633, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.9805266972408545e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|