| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 8.0, |
| "eval_steps": 500, |
| "global_step": 1431, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011180992313067784, |
| "grad_norm": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 1.2878, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.02236198462613557, |
| "grad_norm": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 1.392, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.033542976939203356, |
| "grad_norm": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 1.3594, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.04472396925227114, |
| "grad_norm": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 1.2958, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.055904961565338925, |
| "grad_norm": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 1.3475, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06708595387840671, |
| "grad_norm": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 1.2303, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.07826694619147449, |
| "grad_norm": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 1.1964, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.08944793850454227, |
| "grad_norm": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 1.3328, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.10062893081761007, |
| "grad_norm": 6.944002672340658, |
| "learning_rate": 4.999996106235862e-06, |
| "loss": 1.3134, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.11180992313067785, |
| "grad_norm": 7.800497498064014, |
| "learning_rate": 4.999964956195521e-06, |
| "loss": 1.1147, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12299091544374563, |
| "grad_norm": 4.4662495771497355, |
| "learning_rate": 4.999902656502973e-06, |
| "loss": 1.025, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.13417190775681342, |
| "grad_norm": 4.026851738528776, |
| "learning_rate": 4.999809207934472e-06, |
| "loss": 1.0448, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1453529000698812, |
| "grad_norm": 5.658278761851693, |
| "learning_rate": 4.999684611654392e-06, |
| "loss": 0.9826, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.15653389238294899, |
| "grad_norm": 3.9275877006609505, |
| "learning_rate": 4.9995288692152046e-06, |
| "loss": 0.9627, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.16771488469601678, |
| "grad_norm": 3.634771950296262, |
| "learning_rate": 4.9993419825574686e-06, |
| "loss": 0.9476, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.17889587700908455, |
| "grad_norm": 4.604406424526374, |
| "learning_rate": 4.9992368608591775e-06, |
| "loss": 0.9414, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.19007686932215234, |
| "grad_norm": 5.708200502114745, |
| "learning_rate": 4.999003262361029e-06, |
| "loss": 0.9572, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.20125786163522014, |
| "grad_norm": 5.020134712294459, |
| "learning_rate": 4.998738526193412e-06, |
| "loss": 0.9544, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.2124388539482879, |
| "grad_norm": 4.643332496496484, |
| "learning_rate": 4.998442655654946e-06, |
| "loss": 0.8504, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.2236198462613557, |
| "grad_norm": 4.7843514072232125, |
| "learning_rate": 4.998115654432191e-06, |
| "loss": 0.914, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2348008385744235, |
| "grad_norm": 3.973113705087721, |
| "learning_rate": 4.997757526599592e-06, |
| "loss": 0.8303, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.24598183088749126, |
| "grad_norm": 5.753323652117126, |
| "learning_rate": 4.9973682766194355e-06, |
| "loss": 0.8916, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.25716282320055905, |
| "grad_norm": 4.00607759948128, |
| "learning_rate": 4.996947909341789e-06, |
| "loss": 0.9391, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.26834381551362685, |
| "grad_norm": 4.73751358896988, |
| "learning_rate": 4.996496430004446e-06, |
| "loss": 0.8445, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.27952480782669464, |
| "grad_norm": 3.801634673248135, |
| "learning_rate": 4.9960138442328535e-06, |
| "loss": 0.8354, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2907058001397624, |
| "grad_norm": 4.998706656181077, |
| "learning_rate": 4.9955001580400475e-06, |
| "loss": 0.8556, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.3018867924528302, |
| "grad_norm": 5.235396952388322, |
| "learning_rate": 4.994955377826577e-06, |
| "loss": 0.8821, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.31306778476589797, |
| "grad_norm": 4.593843550283633, |
| "learning_rate": 4.994379510380421e-06, |
| "loss": 0.7965, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.32424877707896577, |
| "grad_norm": 4.636040406542864, |
| "learning_rate": 4.993772562876909e-06, |
| "loss": 0.8576, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.33542976939203356, |
| "grad_norm": 4.422458900120915, |
| "learning_rate": 4.993134542878631e-06, |
| "loss": 0.8388, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3466107617051013, |
| "grad_norm": 4.88515796654498, |
| "learning_rate": 4.992465458335335e-06, |
| "loss": 0.8427, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3577917540181691, |
| "grad_norm": 4.620642626620232, |
| "learning_rate": 4.991765317583841e-06, |
| "loss": 0.8088, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.3689727463312369, |
| "grad_norm": 3.0164501013815146, |
| "learning_rate": 4.991034129347927e-06, |
| "loss": 0.7643, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3801537386443047, |
| "grad_norm": 4.0807085306410915, |
| "learning_rate": 4.990271902738223e-06, |
| "loss": 0.8304, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3913347309573725, |
| "grad_norm": 4.913983348963418, |
| "learning_rate": 4.989478647252101e-06, |
| "loss": 0.8694, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4025157232704403, |
| "grad_norm": 5.427166275548586, |
| "learning_rate": 4.988654372773552e-06, |
| "loss": 0.8031, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.413696715583508, |
| "grad_norm": 4.976699288607289, |
| "learning_rate": 4.987799089573066e-06, |
| "loss": 0.7548, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.4248777078965758, |
| "grad_norm": 5.035712861337141, |
| "learning_rate": 4.986912808307502e-06, |
| "loss": 0.7769, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4360587002096436, |
| "grad_norm": 5.703104314189732, |
| "learning_rate": 4.985995540019956e-06, |
| "loss": 0.7744, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.4472396925227114, |
| "grad_norm": 3.6174332203212938, |
| "learning_rate": 4.985047296139622e-06, |
| "loss": 0.7215, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4584206848357792, |
| "grad_norm": 5.084461038739496, |
| "learning_rate": 4.984068088481654e-06, |
| "loss": 0.7462, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.469601677148847, |
| "grad_norm": 5.500722673783384, |
| "learning_rate": 4.983057929247014e-06, |
| "loss": 0.7937, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4807826694619147, |
| "grad_norm": 5.76928743736382, |
| "learning_rate": 4.9820168310223215e-06, |
| "loss": 0.7701, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.4919636617749825, |
| "grad_norm": 4.3638410984754366, |
| "learning_rate": 4.980944806779698e-06, |
| "loss": 0.7063, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.5031446540880503, |
| "grad_norm": 6.6022312070502664, |
| "learning_rate": 4.979841869876603e-06, |
| "loss": 0.7829, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5143256464011181, |
| "grad_norm": 5.114853414480892, |
| "learning_rate": 4.97870803405567e-06, |
| "loss": 0.7419, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5255066387141859, |
| "grad_norm": 5.450293615821356, |
| "learning_rate": 4.977543313444534e-06, |
| "loss": 0.7428, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5366876310272537, |
| "grad_norm": 3.888671786201343, |
| "learning_rate": 4.976347722555655e-06, |
| "loss": 0.763, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5478686233403215, |
| "grad_norm": 5.580018062591517, |
| "learning_rate": 4.975121276286136e-06, |
| "loss": 0.7451, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5590496156533893, |
| "grad_norm": 5.244409209125885, |
| "learning_rate": 4.973863989917545e-06, |
| "loss": 0.6658, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.570230607966457, |
| "grad_norm": 6.341201782490113, |
| "learning_rate": 4.9725758791157105e-06, |
| "loss": 0.7042, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.5814116002795248, |
| "grad_norm": 3.63864440598579, |
| "learning_rate": 4.9712569599305415e-06, |
| "loss": 0.6859, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 5.643540415249962, |
| "learning_rate": 4.9699072487958185e-06, |
| "loss": 0.7072, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.6037735849056604, |
| "grad_norm": 4.518214836889502, |
| "learning_rate": 4.968526762528988e-06, |
| "loss": 0.6989, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.6149545772187281, |
| "grad_norm": 4.813780988459217, |
| "learning_rate": 4.96711551833096e-06, |
| "loss": 0.6213, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6261355695317959, |
| "grad_norm": 6.534716960952802, |
| "learning_rate": 4.965673533785887e-06, |
| "loss": 0.6603, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6373165618448637, |
| "grad_norm": 4.694700268634709, |
| "learning_rate": 4.9642008268609455e-06, |
| "loss": 0.6458, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6484975541579315, |
| "grad_norm": 3.797163997052886, |
| "learning_rate": 4.962697415906118e-06, |
| "loss": 0.6208, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6596785464709993, |
| "grad_norm": 5.303604758140139, |
| "learning_rate": 4.961163319653959e-06, |
| "loss": 0.6175, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6708595387840671, |
| "grad_norm": 3.8308857949946398, |
| "learning_rate": 4.959598557219361e-06, |
| "loss": 0.6178, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6820405310971349, |
| "grad_norm": 5.611339241664303, |
| "learning_rate": 4.95800314809932e-06, |
| "loss": 0.617, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.6932215234102026, |
| "grad_norm": 5.234511261826922, |
| "learning_rate": 4.956377112172691e-06, |
| "loss": 0.6557, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.7044025157232704, |
| "grad_norm": 4.381066733905507, |
| "learning_rate": 4.954720469699939e-06, |
| "loss": 0.6343, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.7155835080363382, |
| "grad_norm": 5.113989443684452, |
| "learning_rate": 4.953033241322887e-06, |
| "loss": 0.6135, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.726764500349406, |
| "grad_norm": 5.138987950069777, |
| "learning_rate": 4.951315448064462e-06, |
| "loss": 0.6403, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7379454926624738, |
| "grad_norm": 4.43583718290579, |
| "learning_rate": 4.949567111328428e-06, |
| "loss": 0.6226, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7491264849755416, |
| "grad_norm": 4.391597448273059, |
| "learning_rate": 4.947788252899124e-06, |
| "loss": 0.6333, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.7603074772886094, |
| "grad_norm": 4.193385817962468, |
| "learning_rate": 4.945978894941189e-06, |
| "loss": 0.6884, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.7714884696016772, |
| "grad_norm": 5.03154779607414, |
| "learning_rate": 4.944139059999286e-06, |
| "loss": 0.5783, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.782669461914745, |
| "grad_norm": 6.345004441163444, |
| "learning_rate": 4.942268770997825e-06, |
| "loss": 0.5314, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7938504542278128, |
| "grad_norm": 4.800013540838224, |
| "learning_rate": 4.940368051240675e-06, |
| "loss": 0.5876, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.8050314465408805, |
| "grad_norm": 5.229387760297341, |
| "learning_rate": 4.938436924410869e-06, |
| "loss": 0.6266, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.8162124388539483, |
| "grad_norm": 5.663117027843187, |
| "learning_rate": 4.936475414570317e-06, |
| "loss": 0.5407, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.827393431167016, |
| "grad_norm": 4.355698674662869, |
| "learning_rate": 4.9344835461595016e-06, |
| "loss": 0.5757, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.8385744234800838, |
| "grad_norm": 3.73012661577406, |
| "learning_rate": 4.932461343997174e-06, |
| "loss": 0.5671, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8497554157931516, |
| "grad_norm": 5.17610307953933, |
| "learning_rate": 4.930408833280044e-06, |
| "loss": 0.5552, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.8609364081062194, |
| "grad_norm": 4.8108290286110575, |
| "learning_rate": 4.928326039582468e-06, |
| "loss": 0.5455, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.8721174004192872, |
| "grad_norm": 4.143977047297293, |
| "learning_rate": 4.926212988856131e-06, |
| "loss": 0.5865, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.883298392732355, |
| "grad_norm": 4.809016102192773, |
| "learning_rate": 4.9240697074297205e-06, |
| "loss": 0.5904, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.8944793850454228, |
| "grad_norm": 4.329310274878485, |
| "learning_rate": 4.921896222008598e-06, |
| "loss": 0.5213, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9056603773584906, |
| "grad_norm": 6.082276125346202, |
| "learning_rate": 4.919692559674469e-06, |
| "loss": 0.5321, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.9168413696715584, |
| "grad_norm": 3.595682377289556, |
| "learning_rate": 4.917458747885045e-06, |
| "loss": 0.5589, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.9280223619846262, |
| "grad_norm": 4.759398027424621, |
| "learning_rate": 4.9151948144737e-06, |
| "loss": 0.5252, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.939203354297694, |
| "grad_norm": 4.925856740501272, |
| "learning_rate": 4.912900787649124e-06, |
| "loss": 0.5688, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.9503843466107617, |
| "grad_norm": 4.9751554778931695, |
| "learning_rate": 4.910576695994976e-06, |
| "loss": 0.49, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9615653389238294, |
| "grad_norm": 4.404002437196143, |
| "learning_rate": 4.908222568469516e-06, |
| "loss": 0.5031, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.9727463312368972, |
| "grad_norm": 4.438458089119356, |
| "learning_rate": 4.905838434405259e-06, |
| "loss": 0.5015, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.983927323549965, |
| "grad_norm": 3.7675300141289205, |
| "learning_rate": 4.903424323508601e-06, |
| "loss": 0.5133, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.9951083158630328, |
| "grad_norm": 5.557474516168906, |
| "learning_rate": 4.900980265859449e-06, |
| "loss": 0.4913, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.0062893081761006, |
| "grad_norm": 4.4806858821540585, |
| "learning_rate": 4.898506291910847e-06, |
| "loss": 0.4446, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.0174703004891683, |
| "grad_norm": 4.605929975666356, |
| "learning_rate": 4.896002432488599e-06, |
| "loss": 0.3632, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.0286512928022362, |
| "grad_norm": 4.9794341930411665, |
| "learning_rate": 4.893468718790883e-06, |
| "loss": 0.3868, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.039832285115304, |
| "grad_norm": 3.5317296745452733, |
| "learning_rate": 4.890905182387862e-06, |
| "loss": 0.4334, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.0510132774283718, |
| "grad_norm": 4.568181420141649, |
| "learning_rate": 4.88831185522129e-06, |
| "loss": 0.456, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.0621942697414395, |
| "grad_norm": 3.570260813698039, |
| "learning_rate": 4.885688769604115e-06, |
| "loss": 0.3846, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0733752620545074, |
| "grad_norm": 3.639759353451614, |
| "learning_rate": 4.883035958220077e-06, |
| "loss": 0.4363, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.084556254367575, |
| "grad_norm": 4.074741691986429, |
| "learning_rate": 4.8803534541233016e-06, |
| "loss": 0.3782, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.095737246680643, |
| "grad_norm": 4.875221867832197, |
| "learning_rate": 4.8776412907378845e-06, |
| "loss": 0.3815, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.1069182389937107, |
| "grad_norm": 3.575182053435755, |
| "learning_rate": 4.874899501857477e-06, |
| "loss": 0.4023, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.1180992313067786, |
| "grad_norm": 3.984785984285916, |
| "learning_rate": 4.8721281216448675e-06, |
| "loss": 0.305, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1292802236198463, |
| "grad_norm": 3.997235184408756, |
| "learning_rate": 4.869327184631552e-06, |
| "loss": 0.3896, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.140461215932914, |
| "grad_norm": 3.403723018382878, |
| "learning_rate": 4.866496725717304e-06, |
| "loss": 0.3332, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.1516422082459818, |
| "grad_norm": 3.5740869992425917, |
| "learning_rate": 4.8636367801697415e-06, |
| "loss": 0.3299, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.1628232005590495, |
| "grad_norm": 3.8789874672120033, |
| "learning_rate": 4.860747383623889e-06, |
| "loss": 0.4145, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.1740041928721174, |
| "grad_norm": 3.8038820435820084, |
| "learning_rate": 4.857828572081731e-06, |
| "loss": 0.3171, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1851851851851851, |
| "grad_norm": 3.260333619392394, |
| "learning_rate": 4.854880381911762e-06, |
| "loss": 0.3474, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.196366177498253, |
| "grad_norm": 2.8989963280714925, |
| "learning_rate": 4.851902849848536e-06, |
| "loss": 0.3931, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.2075471698113207, |
| "grad_norm": 3.6383247911373773, |
| "learning_rate": 4.848896012992208e-06, |
| "loss": 0.3822, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.2187281621243886, |
| "grad_norm": 3.0864181531286734, |
| "learning_rate": 4.845859908808074e-06, |
| "loss": 0.378, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.2299091544374563, |
| "grad_norm": 2.494513481207721, |
| "learning_rate": 4.842794575126099e-06, |
| "loss": 0.3655, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.2410901467505242, |
| "grad_norm": 2.6074910342756334, |
| "learning_rate": 4.839700050140448e-06, |
| "loss": 0.3973, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.2522711390635919, |
| "grad_norm": 2.2421870374103285, |
| "learning_rate": 4.836576372409015e-06, |
| "loss": 0.3784, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.2634521313766598, |
| "grad_norm": 2.451559449193117, |
| "learning_rate": 4.833423580852933e-06, |
| "loss": 0.3805, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.2746331236897275, |
| "grad_norm": 2.5374184019501285, |
| "learning_rate": 4.830241714756099e-06, |
| "loss": 0.293, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.2858141160027952, |
| "grad_norm": 2.525807489259318, |
| "learning_rate": 4.827030813764677e-06, |
| "loss": 0.2665, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.296995108315863, |
| "grad_norm": 2.3755504317471523, |
| "learning_rate": 4.8237909178866075e-06, |
| "loss": 0.4108, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.3081761006289307, |
| "grad_norm": 2.7662660096000793, |
| "learning_rate": 4.8205220674911075e-06, |
| "loss": 0.3928, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.3193570929419987, |
| "grad_norm": 2.245517906271987, |
| "learning_rate": 4.81722430330817e-06, |
| "loss": 0.355, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.3305380852550663, |
| "grad_norm": 2.684087860818518, |
| "learning_rate": 4.813897666428054e-06, |
| "loss": 0.3624, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.3417190775681342, |
| "grad_norm": 2.5507370157459865, |
| "learning_rate": 4.810542198300772e-06, |
| "loss": 0.3494, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.352900069881202, |
| "grad_norm": 2.157612559104276, |
| "learning_rate": 4.807157940735577e-06, |
| "loss": 0.3064, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.3640810621942698, |
| "grad_norm": 1.9389355017962189, |
| "learning_rate": 4.803744935900439e-06, |
| "loss": 0.3331, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.3752620545073375, |
| "grad_norm": 2.3147558047608867, |
| "learning_rate": 4.8003032263215185e-06, |
| "loss": 0.3538, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.3864430468204052, |
| "grad_norm": 2.414181223767401, |
| "learning_rate": 4.79683285488264e-06, |
| "loss": 0.3237, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.397624039133473, |
| "grad_norm": 2.0498128676624368, |
| "learning_rate": 4.793333864824756e-06, |
| "loss": 0.3742, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.408805031446541, |
| "grad_norm": 2.2294049255917416, |
| "learning_rate": 4.789806299745405e-06, |
| "loss": 0.2948, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.4199860237596087, |
| "grad_norm": 2.2210196470155923, |
| "learning_rate": 4.786250203598174e-06, |
| "loss": 0.28, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.4311670160726764, |
| "grad_norm": 2.6896787603814816, |
| "learning_rate": 4.782665620692147e-06, |
| "loss": 0.3513, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.4423480083857443, |
| "grad_norm": 2.1151921249556644, |
| "learning_rate": 4.779052595691355e-06, |
| "loss": 0.3598, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.453529000698812, |
| "grad_norm": 2.6404538176276047, |
| "learning_rate": 4.775411173614218e-06, |
| "loss": 0.3075, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.4647099930118799, |
| "grad_norm": 1.9888888421343762, |
| "learning_rate": 4.771741399832984e-06, |
| "loss": 0.356, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.4758909853249476, |
| "grad_norm": 2.284642426340359, |
| "learning_rate": 4.768043320073165e-06, |
| "loss": 0.2765, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.4870719776380152, |
| "grad_norm": 2.135563450656965, |
| "learning_rate": 4.764316980412966e-06, |
| "loss": 0.2825, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.4982529699510831, |
| "grad_norm": 1.8267552790003188, |
| "learning_rate": 4.7605624272827125e-06, |
| "loss": 0.3915, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.509433962264151, |
| "grad_norm": 2.26569092336033, |
| "learning_rate": 4.75677970746427e-06, |
| "loss": 0.3859, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.5206149545772187, |
| "grad_norm": 2.3510908940666346, |
| "learning_rate": 4.75296886809046e-06, |
| "loss": 0.312, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.5317959468902864, |
| "grad_norm": 2.1562478846600883, |
| "learning_rate": 4.749129956644477e-06, |
| "loss": 0.4398, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.5429769392033543, |
| "grad_norm": 2.1811966726037655, |
| "learning_rate": 4.745263020959296e-06, |
| "loss": 0.3221, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.5541579315164222, |
| "grad_norm": 2.035643810106488, |
| "learning_rate": 4.741368109217072e-06, |
| "loss": 0.3317, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.56533892382949, |
| "grad_norm": 2.0722038381676824, |
| "learning_rate": 4.737445269948543e-06, |
| "loss": 0.4627, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.5765199161425576, |
| "grad_norm": 2.2584403073433212, |
| "learning_rate": 4.733494552032426e-06, |
| "loss": 0.352, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.5877009084556253, |
| "grad_norm": 3.1127410509937783, |
| "learning_rate": 4.729516004694808e-06, |
| "loss": 0.3109, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.5988819007686932, |
| "grad_norm": 1.6930738402579835, |
| "learning_rate": 4.725509677508528e-06, |
| "loss": 0.3723, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.610062893081761, |
| "grad_norm": 2.6225330496610573, |
| "learning_rate": 4.721475620392567e-06, |
| "loss": 0.2853, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.6212438853948288, |
| "grad_norm": 1.998954970455011, |
| "learning_rate": 4.71741388361142e-06, |
| "loss": 0.323, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.6324248777078965, |
| "grad_norm": 2.3952745413220677, |
| "learning_rate": 4.713324517774471e-06, |
| "loss": 0.4057, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.6436058700209644, |
| "grad_norm": 1.7339961999135642, |
| "learning_rate": 4.7092075738353625e-06, |
| "loss": 0.2855, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.6547868623340323, |
| "grad_norm": 2.3672466509243075, |
| "learning_rate": 4.705063103091365e-06, |
| "loss": 0.277, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.6659678546471, |
| "grad_norm": 1.92096238087282, |
| "learning_rate": 4.700891157182729e-06, |
| "loss": 0.2699, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.6771488469601676, |
| "grad_norm": 1.6478187267877538, |
| "learning_rate": 4.696691788092049e-06, |
| "loss": 0.2875, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6883298392732355, |
| "grad_norm": 2.6637144089516545, |
| "learning_rate": 4.692465048143615e-06, |
| "loss": 0.3229, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.6995108315863034, |
| "grad_norm": 2.0530281428374084, |
| "learning_rate": 4.688210990002755e-06, |
| "loss": 0.3546, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.7106918238993711, |
| "grad_norm": 2.150198399781322, |
| "learning_rate": 4.683929666675185e-06, |
| "loss": 0.4021, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.7218728162124388, |
| "grad_norm": 2.1752313572704542, |
| "learning_rate": 4.679621131506347e-06, |
| "loss": 0.3299, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.7330538085255065, |
| "grad_norm": 1.9055889494341978, |
| "learning_rate": 4.6752854381807414e-06, |
| "loss": 0.2514, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.7442348008385744, |
| "grad_norm": 2.469483649303522, |
| "learning_rate": 4.670922640721261e-06, |
| "loss": 0.332, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.7554157931516423, |
| "grad_norm": 2.327049750502898, |
| "learning_rate": 4.666532793488518e-06, |
| "loss": 0.3482, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.76659678546471, |
| "grad_norm": 2.0224582609864674, |
| "learning_rate": 4.662115951180164e-06, |
| "loss": 0.3192, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 1.9568416201882894, |
| "learning_rate": 4.657672168830211e-06, |
| "loss": 0.2682, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.7889587700908456, |
| "grad_norm": 1.919410926201314, |
| "learning_rate": 4.653201501808346e-06, |
| "loss": 0.3602, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.8001397624039135, |
| "grad_norm": 2.239752835185363, |
| "learning_rate": 4.6487040058192385e-06, |
| "loss": 0.346, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.8113207547169812, |
| "grad_norm": 2.3820790461811643, |
| "learning_rate": 4.644179736901848e-06, |
| "loss": 0.393, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.8225017470300489, |
| "grad_norm": 2.100652056063807, |
| "learning_rate": 4.639628751428728e-06, |
| "loss": 0.3348, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.8336827393431165, |
| "grad_norm": 1.839587786014522, |
| "learning_rate": 4.635051106105316e-06, |
| "loss": 0.297, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.8448637316561844, |
| "grad_norm": 1.460937373317575, |
| "learning_rate": 4.630446857969238e-06, |
| "loss": 0.3291, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.8560447239692524, |
| "grad_norm": 3.066440662132836, |
| "learning_rate": 4.625816064389589e-06, |
| "loss": 0.2752, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.86722571628232, |
| "grad_norm": 1.9596525632755366, |
| "learning_rate": 4.62115878306622e-06, |
| "loss": 0.3444, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.8784067085953877, |
| "grad_norm": 2.2835299782118335, |
| "learning_rate": 4.616475072029024e-06, |
| "loss": 0.3013, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.8895877009084556, |
| "grad_norm": 2.1330589159921756, |
| "learning_rate": 4.6117649896372055e-06, |
| "loss": 0.3811, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.9007686932215235, |
| "grad_norm": 2.28792058261577, |
| "learning_rate": 4.607028594578559e-06, |
| "loss": 0.304, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.9119496855345912, |
| "grad_norm": 1.8457539990364031, |
| "learning_rate": 4.602265945868735e-06, |
| "loss": 0.2817, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.923130677847659, |
| "grad_norm": 1.7860630390403116, |
| "learning_rate": 4.597477102850506e-06, |
| "loss": 0.3166, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.9343116701607268, |
| "grad_norm": 1.988441202911347, |
| "learning_rate": 4.592662125193027e-06, |
| "loss": 0.2881, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.9454926624737947, |
| "grad_norm": 1.7341207391896365, |
| "learning_rate": 4.587821072891089e-06, |
| "loss": 0.3126, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.9566736547868624, |
| "grad_norm": 1.8960045369195677, |
| "learning_rate": 4.582954006264377e-06, |
| "loss": 0.32, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.96785464709993, |
| "grad_norm": 1.8028316706058551, |
| "learning_rate": 4.578060985956714e-06, |
| "loss": 0.3308, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.9790356394129978, |
| "grad_norm": 1.7537644172052635, |
| "learning_rate": 4.573142072935307e-06, |
| "loss": 0.325, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.9902166317260657, |
| "grad_norm": 1.5291097261080726, |
| "learning_rate": 4.568197328489986e-06, |
| "loss": 0.3418, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.0013976240391336, |
| "grad_norm": 2.703429613422267, |
| "learning_rate": 4.563226814232444e-06, |
| "loss": 0.316, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.0125786163522013, |
| "grad_norm": 1.6677019482039983, |
| "learning_rate": 4.558230592095465e-06, |
| "loss": 0.2242, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.023759608665269, |
| "grad_norm": 2.1855279147060527, |
| "learning_rate": 4.5532087243321536e-06, |
| "loss": 0.1706, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.0349406009783366, |
| "grad_norm": 1.433260386596143, |
| "learning_rate": 4.548161273515161e-06, |
| "loss": 0.2597, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.0461215932914047, |
| "grad_norm": 1.9528007044032762, |
| "learning_rate": 4.543088302535903e-06, |
| "loss": 0.2321, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.0573025856044724, |
| "grad_norm": 1.508509476663671, |
| "learning_rate": 4.53798987460378e-06, |
| "loss": 0.1975, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.06848357791754, |
| "grad_norm": 1.4870411030447606, |
| "learning_rate": 4.532866053245385e-06, |
| "loss": 0.218, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.079664570230608, |
| "grad_norm": 1.984299603467917, |
| "learning_rate": 4.527716902303713e-06, |
| "loss": 0.1866, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.090845562543676, |
| "grad_norm": 1.7502708144873231, |
| "learning_rate": 4.522542485937369e-06, |
| "loss": 0.2128, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.1020265548567436, |
| "grad_norm": 1.131006072907252, |
| "learning_rate": 4.517342868619764e-06, |
| "loss": 0.2418, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.1132075471698113, |
| "grad_norm": 2.365723778930082, |
| "learning_rate": 4.512118115138315e-06, |
| "loss": 0.2249, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.124388539482879, |
| "grad_norm": 1.7739738087900154, |
| "learning_rate": 4.506868290593635e-06, |
| "loss": 0.225, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.135569531795947, |
| "grad_norm": 2.3920039733015197, |
| "learning_rate": 4.501593460398726e-06, |
| "loss": 0.207, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.146750524109015, |
| "grad_norm": 1.3961875749075527, |
| "learning_rate": 4.49629369027816e-06, |
| "loss": 0.1847, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.1579315164220825, |
| "grad_norm": 1.740079266616333, |
| "learning_rate": 4.490969046267258e-06, |
| "loss": 0.2092, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.16911250873515, |
| "grad_norm": 1.716849109423316, |
| "learning_rate": 4.485619594711278e-06, |
| "loss": 0.2512, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.180293501048218, |
| "grad_norm": 2.2256205473256836, |
| "learning_rate": 4.4802454022645725e-06, |
| "loss": 0.2212, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.191474493361286, |
| "grad_norm": 1.5080548485099736, |
| "learning_rate": 4.474846535889773e-06, |
| "loss": 0.2577, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.2026554856743537, |
| "grad_norm": 1.849350001917602, |
| "learning_rate": 4.469423062856946e-06, |
| "loss": 0.2518, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.2138364779874213, |
| "grad_norm": 2.0456903454646937, |
| "learning_rate": 4.463975050742757e-06, |
| "loss": 0.2666, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.225017470300489, |
| "grad_norm": 2.1576955140860172, |
| "learning_rate": 4.4585025674296315e-06, |
| "loss": 0.1881, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.236198462613557, |
| "grad_norm": 1.959825305986428, |
| "learning_rate": 4.453005681104906e-06, |
| "loss": 0.1912, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.247379454926625, |
| "grad_norm": 1.8263078605633967, |
| "learning_rate": 4.44748446025998e-06, |
| "loss": 0.177, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.2585604472396925, |
| "grad_norm": 1.3737693376807456, |
| "learning_rate": 4.44193897368946e-06, |
| "loss": 0.2083, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.26974143955276, |
| "grad_norm": 1.9216745648550881, |
| "learning_rate": 4.436369290490307e-06, |
| "loss": 0.269, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.280922431865828, |
| "grad_norm": 1.5225068983698562, |
| "learning_rate": 4.430775480060973e-06, |
| "loss": 0.2043, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.292103424178896, |
| "grad_norm": 1.958524495155971, |
| "learning_rate": 4.425157612100531e-06, |
| "loss": 0.2735, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.3032844164919637, |
| "grad_norm": 2.020109840115744, |
| "learning_rate": 4.419515756607819e-06, |
| "loss": 0.2623, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.3144654088050314, |
| "grad_norm": 1.6832635446278787, |
| "learning_rate": 4.413849983880554e-06, |
| "loss": 0.2122, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.325646401118099, |
| "grad_norm": 1.8238819367042174, |
| "learning_rate": 4.4081603645144685e-06, |
| "loss": 0.2141, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.336827393431167, |
| "grad_norm": 1.636664838162331, |
| "learning_rate": 4.4024469694024194e-06, |
| "loss": 0.2159, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.348008385744235, |
| "grad_norm": 1.563361723149053, |
| "learning_rate": 4.396709869733515e-06, |
| "loss": 0.2636, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.3591893780573026, |
| "grad_norm": 1.7104549540666967, |
| "learning_rate": 4.39094913699222e-06, |
| "loss": 0.2059, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.3703703703703702, |
| "grad_norm": 1.7448299629844894, |
| "learning_rate": 4.385164842957469e-06, |
| "loss": 0.2076, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.381551362683438, |
| "grad_norm": 2.0760771369111812, |
| "learning_rate": 4.379357059701771e-06, |
| "loss": 0.2241, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.392732354996506, |
| "grad_norm": 1.4610379659131663, |
| "learning_rate": 4.373525859590313e-06, |
| "loss": 0.2135, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.4039133473095737, |
| "grad_norm": 1.9763200369365506, |
| "learning_rate": 4.367671315280055e-06, |
| "loss": 0.2225, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.4150943396226414, |
| "grad_norm": 2.138415914668256, |
| "learning_rate": 4.3617934997188274e-06, |
| "loss": 0.2618, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.426275331935709, |
| "grad_norm": 1.6842725394389781, |
| "learning_rate": 4.355892486144419e-06, |
| "loss": 0.1691, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.4374563242487772, |
| "grad_norm": 2.056626946764254, |
| "learning_rate": 4.349968348083673e-06, |
| "loss": 0.1922, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.448637316561845, |
| "grad_norm": 1.2423274511146358, |
| "learning_rate": 4.3440211593515556e-06, |
| "loss": 0.2061, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.4598183088749126, |
| "grad_norm": 1.465237522133527, |
| "learning_rate": 4.338050994050253e-06, |
| "loss": 0.1996, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.4709993011879803, |
| "grad_norm": 2.1451900105983315, |
| "learning_rate": 4.332057926568235e-06, |
| "loss": 0.2441, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.4821802935010484, |
| "grad_norm": 1.5259606296511572, |
| "learning_rate": 4.326042031579337e-06, |
| "loss": 0.2066, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.493361285814116, |
| "grad_norm": 2.4163109674867784, |
| "learning_rate": 4.320003384041823e-06, |
| "loss": 0.2393, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.5045422781271838, |
| "grad_norm": 2.1518283309231907, |
| "learning_rate": 4.313942059197457e-06, |
| "loss": 0.2467, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.5157232704402515, |
| "grad_norm": 1.6715387204280183, |
| "learning_rate": 4.3078581325705614e-06, |
| "loss": 0.2495, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.5269042627533196, |
| "grad_norm": 1.7729216990478125, |
| "learning_rate": 4.3017516799670785e-06, |
| "loss": 0.1586, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.5380852550663873, |
| "grad_norm": 1.7853923740535589, |
| "learning_rate": 4.295622777473625e-06, |
| "loss": 0.2216, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.549266247379455, |
| "grad_norm": 1.7001940457803237, |
| "learning_rate": 4.289471501456543e-06, |
| "loss": 0.2288, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.5604472396925226, |
| "grad_norm": 2.5868877625212354, |
| "learning_rate": 4.283297928560951e-06, |
| "loss": 0.2075, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.5716282320055903, |
| "grad_norm": 2.1990912649669823, |
| "learning_rate": 4.277102135709786e-06, |
| "loss": 0.2017, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.582809224318658, |
| "grad_norm": 2.2627396419665273, |
| "learning_rate": 4.270884200102848e-06, |
| "loss": 0.2144, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.593990216631726, |
| "grad_norm": 2.2283930780278505, |
| "learning_rate": 4.2646441992158356e-06, |
| "loss": 0.3, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.605171208944794, |
| "grad_norm": 2.6765537923336087, |
| "learning_rate": 4.258382210799381e-06, |
| "loss": 0.2441, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.6163522012578615, |
| "grad_norm": 2.0124117535310706, |
| "learning_rate": 4.252098312878083e-06, |
| "loss": 0.2667, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.6275331935709296, |
| "grad_norm": 2.0622543839995586, |
| "learning_rate": 4.245792583749533e-06, |
| "loss": 0.2209, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.6387141858839973, |
| "grad_norm": 1.7479329049755916, |
| "learning_rate": 4.2394651019833385e-06, |
| "loss": 0.2045, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.649895178197065, |
| "grad_norm": 2.223724201139868, |
| "learning_rate": 4.23311594642015e-06, |
| "loss": 0.2283, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.6610761705101327, |
| "grad_norm": 1.8280919056271019, |
| "learning_rate": 4.226745196170669e-06, |
| "loss": 0.2319, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.6722571628232004, |
| "grad_norm": 1.6911807333452673, |
| "learning_rate": 4.220352930614672e-06, |
| "loss": 0.232, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.6834381551362685, |
| "grad_norm": 1.9242468593637576, |
| "learning_rate": 4.213939229400014e-06, |
| "loss": 0.2733, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.694619147449336, |
| "grad_norm": 2.1223012349945254, |
| "learning_rate": 4.20750417244164e-06, |
| "loss": 0.2529, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.705800139762404, |
| "grad_norm": 2.1921742273194313, |
| "learning_rate": 4.201047839920589e-06, |
| "loss": 0.257, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.7169811320754715, |
| "grad_norm": 2.118251084662083, |
| "learning_rate": 4.194570312282993e-06, |
| "loss": 0.235, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.7281621243885397, |
| "grad_norm": 1.9816644323530734, |
| "learning_rate": 4.1880716702390764e-06, |
| "loss": 0.1839, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.7393431167016074, |
| "grad_norm": 1.8891363830208663, |
| "learning_rate": 4.181551994762151e-06, |
| "loss": 0.2301, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.750524109014675, |
| "grad_norm": 1.7502840233703516, |
| "learning_rate": 4.1750113670876045e-06, |
| "loss": 0.1883, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.7617051013277427, |
| "grad_norm": 1.5627429248705165, |
| "learning_rate": 4.16844986871189e-06, |
| "loss": 0.2042, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.7728860936408104, |
| "grad_norm": 1.8631447011251083, |
| "learning_rate": 4.161867581391511e-06, |
| "loss": 0.2018, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.7840670859538785, |
| "grad_norm": 2.0906363974353765, |
| "learning_rate": 4.155264587142002e-06, |
| "loss": 0.2319, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.795248078266946, |
| "grad_norm": 1.7819164584799931, |
| "learning_rate": 4.148640968236903e-06, |
| "loss": 0.1703, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.806429070580014, |
| "grad_norm": 1.7607086842324982, |
| "learning_rate": 4.141996807206745e-06, |
| "loss": 0.2264, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.817610062893082, |
| "grad_norm": 1.5277530729360727, |
| "learning_rate": 4.135332186838008e-06, |
| "loss": 0.2134, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.8287910552061497, |
| "grad_norm": 1.739277840645659, |
| "learning_rate": 4.128647190172099e-06, |
| "loss": 0.1952, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.8399720475192174, |
| "grad_norm": 1.9987218712547774, |
| "learning_rate": 4.121941900504316e-06, |
| "loss": 0.2364, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.851153039832285, |
| "grad_norm": 2.2244662318443225, |
| "learning_rate": 4.1152164013828035e-06, |
| "loss": 0.2072, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.8623340321453528, |
| "grad_norm": 1.526547678145968, |
| "learning_rate": 4.108470776607521e-06, |
| "loss": 0.2047, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.8735150244584204, |
| "grad_norm": 2.005093613185987, |
| "learning_rate": 4.1017051102291946e-06, |
| "loss": 0.2789, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.8846960167714886, |
| "grad_norm": 2.2990829029486624, |
| "learning_rate": 4.094919486548266e-06, |
| "loss": 0.2414, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.8958770090845563, |
| "grad_norm": 2.13743283403912, |
| "learning_rate": 4.088113990113846e-06, |
| "loss": 0.2029, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.907058001397624, |
| "grad_norm": 1.9027626030017704, |
| "learning_rate": 4.081288705722666e-06, |
| "loss": 0.2229, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.918238993710692, |
| "grad_norm": 2.0076859155071745, |
| "learning_rate": 4.074443718418009e-06, |
| "loss": 0.1995, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.9294199860237597, |
| "grad_norm": 1.7985240007466619, |
| "learning_rate": 4.067579113488661e-06, |
| "loss": 0.1807, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.9406009783368274, |
| "grad_norm": 2.140934337000471, |
| "learning_rate": 4.060694976467844e-06, |
| "loss": 0.2532, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.951781970649895, |
| "grad_norm": 2.323003193893417, |
| "learning_rate": 4.0537913931321495e-06, |
| "loss": 0.2421, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.962962962962963, |
| "grad_norm": 1.4532319163010707, |
| "learning_rate": 4.04686844950047e-06, |
| "loss": 0.2267, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.9741439552760305, |
| "grad_norm": 2.0854922336923023, |
| "learning_rate": 4.039926231832931e-06, |
| "loss": 0.266, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.9853249475890986, |
| "grad_norm": 2.882533995321225, |
| "learning_rate": 4.032964826629811e-06, |
| "loss": 0.2079, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.9965059399021663, |
| "grad_norm": 2.7236955724192873, |
| "learning_rate": 4.025984320630465e-06, |
| "loss": 0.1657, |
| "step": 536 |
| }, |
| { |
| "epoch": 3.007686932215234, |
| "grad_norm": 1.8432900490614266, |
| "learning_rate": 4.018984800812248e-06, |
| "loss": 0.1354, |
| "step": 538 |
| }, |
| { |
| "epoch": 3.018867924528302, |
| "grad_norm": 2.0142515580054017, |
| "learning_rate": 4.011966354389424e-06, |
| "loss": 0.1542, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.03004891684137, |
| "grad_norm": 2.756352182005047, |
| "learning_rate": 4.004929068812086e-06, |
| "loss": 0.1638, |
| "step": 542 |
| }, |
| { |
| "epoch": 3.0412299091544375, |
| "grad_norm": 2.048077691313813, |
| "learning_rate": 3.997873031765061e-06, |
| "loss": 0.156, |
| "step": 544 |
| }, |
| { |
| "epoch": 3.052410901467505, |
| "grad_norm": 1.7442233155652336, |
| "learning_rate": 3.990798331166822e-06, |
| "loss": 0.1095, |
| "step": 546 |
| }, |
| { |
| "epoch": 3.063591893780573, |
| "grad_norm": 1.826861973142375, |
| "learning_rate": 3.983705055168391e-06, |
| "loss": 0.1195, |
| "step": 548 |
| }, |
| { |
| "epoch": 3.074772886093641, |
| "grad_norm": 1.943175517862748, |
| "learning_rate": 3.976593292152238e-06, |
| "loss": 0.1638, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.0859538784067087, |
| "grad_norm": 1.5477727978546996, |
| "learning_rate": 3.969463130731183e-06, |
| "loss": 0.1291, |
| "step": 552 |
| }, |
| { |
| "epoch": 3.0971348707197763, |
| "grad_norm": 2.3918080397656034, |
| "learning_rate": 3.9623146597472915e-06, |
| "loss": 0.1333, |
| "step": 554 |
| }, |
| { |
| "epoch": 3.108315863032844, |
| "grad_norm": 2.0592865934704, |
| "learning_rate": 3.955147968270764e-06, |
| "loss": 0.1692, |
| "step": 556 |
| }, |
| { |
| "epoch": 3.119496855345912, |
| "grad_norm": 1.280306245998938, |
| "learning_rate": 3.947963145598833e-06, |
| "loss": 0.1695, |
| "step": 558 |
| }, |
| { |
| "epoch": 3.13067784765898, |
| "grad_norm": 1.5568837418874426, |
| "learning_rate": 3.940760281254645e-06, |
| "loss": 0.1614, |
| "step": 560 |
| }, |
| { |
| "epoch": 3.1418588399720475, |
| "grad_norm": 1.6248982612645957, |
| "learning_rate": 3.933539464986143e-06, |
| "loss": 0.1184, |
| "step": 562 |
| }, |
| { |
| "epoch": 3.153039832285115, |
| "grad_norm": 1.657284019650329, |
| "learning_rate": 3.926300786764957e-06, |
| "loss": 0.1523, |
| "step": 564 |
| }, |
| { |
| "epoch": 3.164220824598183, |
| "grad_norm": 1.9315037734198213, |
| "learning_rate": 3.919044336785274e-06, |
| "loss": 0.1411, |
| "step": 566 |
| }, |
| { |
| "epoch": 3.175401816911251, |
| "grad_norm": 1.7456382044347782, |
| "learning_rate": 3.911770205462717e-06, |
| "loss": 0.1764, |
| "step": 568 |
| }, |
| { |
| "epoch": 3.1865828092243187, |
| "grad_norm": 1.4045398532057205, |
| "learning_rate": 3.904478483433223e-06, |
| "loss": 0.1241, |
| "step": 570 |
| }, |
| { |
| "epoch": 3.1977638015373864, |
| "grad_norm": 2.0886459168414895, |
| "learning_rate": 3.897169261551907e-06, |
| "loss": 0.1475, |
| "step": 572 |
| }, |
| { |
| "epoch": 3.208944793850454, |
| "grad_norm": 1.9098750157027404, |
| "learning_rate": 3.889842630891934e-06, |
| "loss": 0.138, |
| "step": 574 |
| }, |
| { |
| "epoch": 3.220125786163522, |
| "grad_norm": 2.184899827108709, |
| "learning_rate": 3.8824986827433804e-06, |
| "loss": 0.1315, |
| "step": 576 |
| }, |
| { |
| "epoch": 3.23130677847659, |
| "grad_norm": 1.528868394326383, |
| "learning_rate": 3.875137508612104e-06, |
| "loss": 0.1447, |
| "step": 578 |
| }, |
| { |
| "epoch": 3.2424877707896576, |
| "grad_norm": 1.6893708687857107, |
| "learning_rate": 3.867759200218594e-06, |
| "loss": 0.1746, |
| "step": 580 |
| }, |
| { |
| "epoch": 3.2536687631027252, |
| "grad_norm": 1.2610411246909474, |
| "learning_rate": 3.860363849496836e-06, |
| "loss": 0.1301, |
| "step": 582 |
| }, |
| { |
| "epoch": 3.264849755415793, |
| "grad_norm": 1.397542140556738, |
| "learning_rate": 3.852951548593161e-06, |
| "loss": 0.1373, |
| "step": 584 |
| }, |
| { |
| "epoch": 3.276030747728861, |
| "grad_norm": 1.9903353672741917, |
| "learning_rate": 3.845522389865106e-06, |
| "loss": 0.1609, |
| "step": 586 |
| }, |
| { |
| "epoch": 3.2872117400419287, |
| "grad_norm": 1.8370941337314268, |
| "learning_rate": 3.838076465880248e-06, |
| "loss": 0.148, |
| "step": 588 |
| }, |
| { |
| "epoch": 3.2983927323549964, |
| "grad_norm": 2.058865100613852, |
| "learning_rate": 3.830613869415069e-06, |
| "loss": 0.1483, |
| "step": 590 |
| }, |
| { |
| "epoch": 3.309573724668064, |
| "grad_norm": 1.5232253694216566, |
| "learning_rate": 3.823134693453782e-06, |
| "loss": 0.1621, |
| "step": 592 |
| }, |
| { |
| "epoch": 3.3207547169811322, |
| "grad_norm": 1.4993049111722665, |
| "learning_rate": 3.8156390311871885e-06, |
| "loss": 0.1433, |
| "step": 594 |
| }, |
| { |
| "epoch": 3.3319357092942, |
| "grad_norm": 1.555934394379587, |
| "learning_rate": 3.808126976011505e-06, |
| "loss": 0.1426, |
| "step": 596 |
| }, |
| { |
| "epoch": 3.3431167016072676, |
| "grad_norm": 1.3356473446523094, |
| "learning_rate": 3.8005986215272056e-06, |
| "loss": 0.1706, |
| "step": 598 |
| }, |
| { |
| "epoch": 3.3542976939203353, |
| "grad_norm": 1.9137688829035275, |
| "learning_rate": 3.7930540615378565e-06, |
| "loss": 0.1268, |
| "step": 600 |
| }, |
| { |
| "epoch": 3.3654786862334034, |
| "grad_norm": 1.5344748040953766, |
| "learning_rate": 3.785493390048942e-06, |
| "loss": 0.1458, |
| "step": 602 |
| }, |
| { |
| "epoch": 3.376659678546471, |
| "grad_norm": 1.602087497610558, |
| "learning_rate": 3.777916701266699e-06, |
| "loss": 0.1697, |
| "step": 604 |
| }, |
| { |
| "epoch": 3.3878406708595388, |
| "grad_norm": 1.4842568873334896, |
| "learning_rate": 3.7703240895969373e-06, |
| "loss": 0.1519, |
| "step": 606 |
| }, |
| { |
| "epoch": 3.3990216631726065, |
| "grad_norm": 1.53860971256147, |
| "learning_rate": 3.7627156496438686e-06, |
| "loss": 0.1691, |
| "step": 608 |
| }, |
| { |
| "epoch": 3.4102026554856746, |
| "grad_norm": 1.4193083610134813, |
| "learning_rate": 3.755091476208925e-06, |
| "loss": 0.1211, |
| "step": 610 |
| }, |
| { |
| "epoch": 3.4213836477987423, |
| "grad_norm": 1.8053625548432577, |
| "learning_rate": 3.7474516642895804e-06, |
| "loss": 0.131, |
| "step": 612 |
| }, |
| { |
| "epoch": 3.43256464011181, |
| "grad_norm": 1.9235537907938398, |
| "learning_rate": 3.7397963090781606e-06, |
| "loss": 0.163, |
| "step": 614 |
| }, |
| { |
| "epoch": 3.4437456324248776, |
| "grad_norm": 1.6022979215271898, |
| "learning_rate": 3.732125505960665e-06, |
| "loss": 0.1479, |
| "step": 616 |
| }, |
| { |
| "epoch": 3.4549266247379453, |
| "grad_norm": 1.663918706474492, |
| "learning_rate": 3.7244393505155713e-06, |
| "loss": 0.1376, |
| "step": 618 |
| }, |
| { |
| "epoch": 3.4661076170510134, |
| "grad_norm": 1.7974067820999995, |
| "learning_rate": 3.716737938512651e-06, |
| "loss": 0.1281, |
| "step": 620 |
| }, |
| { |
| "epoch": 3.477288609364081, |
| "grad_norm": 2.10108609081228, |
| "learning_rate": 3.709021365911772e-06, |
| "loss": 0.1388, |
| "step": 622 |
| }, |
| { |
| "epoch": 3.488469601677149, |
| "grad_norm": 1.367826215107555, |
| "learning_rate": 3.701289728861701e-06, |
| "loss": 0.1191, |
| "step": 624 |
| }, |
| { |
| "epoch": 3.4996505939902165, |
| "grad_norm": 1.7959553374302317, |
| "learning_rate": 3.693543123698913e-06, |
| "loss": 0.1758, |
| "step": 626 |
| }, |
| { |
| "epoch": 3.5108315863032846, |
| "grad_norm": 1.7389366148854988, |
| "learning_rate": 3.6857816469463806e-06, |
| "loss": 0.1405, |
| "step": 628 |
| }, |
| { |
| "epoch": 3.5220125786163523, |
| "grad_norm": 2.871162474790627, |
| "learning_rate": 3.6780053953123836e-06, |
| "loss": 0.1549, |
| "step": 630 |
| }, |
| { |
| "epoch": 3.53319357092942, |
| "grad_norm": 1.478751565339363, |
| "learning_rate": 3.6702144656892907e-06, |
| "loss": 0.1759, |
| "step": 632 |
| }, |
| { |
| "epoch": 3.5443745632424877, |
| "grad_norm": 1.4974413518112613, |
| "learning_rate": 3.662408955152364e-06, |
| "loss": 0.1078, |
| "step": 634 |
| }, |
| { |
| "epoch": 3.5555555555555554, |
| "grad_norm": 1.7006067350332152, |
| "learning_rate": 3.6545889609585405e-06, |
| "loss": 0.1427, |
| "step": 636 |
| }, |
| { |
| "epoch": 3.5667365478686235, |
| "grad_norm": 1.8754398825641954, |
| "learning_rate": 3.6467545805452266e-06, |
| "loss": 0.1893, |
| "step": 638 |
| }, |
| { |
| "epoch": 3.577917540181691, |
| "grad_norm": 1.7762501705151392, |
| "learning_rate": 3.6389059115290813e-06, |
| "loss": 0.1109, |
| "step": 640 |
| }, |
| { |
| "epoch": 3.589098532494759, |
| "grad_norm": 2.0251975300449327, |
| "learning_rate": 3.631043051704799e-06, |
| "loss": 0.121, |
| "step": 642 |
| }, |
| { |
| "epoch": 3.6002795248078265, |
| "grad_norm": 1.3531681902278672, |
| "learning_rate": 3.6231660990438922e-06, |
| "loss": 0.1348, |
| "step": 644 |
| }, |
| { |
| "epoch": 3.6114605171208947, |
| "grad_norm": 1.9724391202631109, |
| "learning_rate": 3.615275151693471e-06, |
| "loss": 0.1449, |
| "step": 646 |
| }, |
| { |
| "epoch": 3.6226415094339623, |
| "grad_norm": 1.785158595271644, |
| "learning_rate": 3.6073703079750204e-06, |
| "loss": 0.1485, |
| "step": 648 |
| }, |
| { |
| "epoch": 3.63382250174703, |
| "grad_norm": 1.829166278099355, |
| "learning_rate": 3.5994516663831734e-06, |
| "loss": 0.1192, |
| "step": 650 |
| }, |
| { |
| "epoch": 3.6450034940600977, |
| "grad_norm": 1.9222881871208803, |
| "learning_rate": 3.591519325584487e-06, |
| "loss": 0.1635, |
| "step": 652 |
| }, |
| { |
| "epoch": 3.6561844863731654, |
| "grad_norm": 2.052453811112636, |
| "learning_rate": 3.583573384416209e-06, |
| "loss": 0.1561, |
| "step": 654 |
| }, |
| { |
| "epoch": 3.6673654786862335, |
| "grad_norm": 1.9190051036571132, |
| "learning_rate": 3.575613941885047e-06, |
| "loss": 0.1051, |
| "step": 656 |
| }, |
| { |
| "epoch": 3.678546470999301, |
| "grad_norm": 1.4736638642637576, |
| "learning_rate": 3.5676410971659404e-06, |
| "loss": 0.123, |
| "step": 658 |
| }, |
| { |
| "epoch": 3.689727463312369, |
| "grad_norm": 1.7325761695268906, |
| "learning_rate": 3.5596549496008165e-06, |
| "loss": 0.1446, |
| "step": 660 |
| }, |
| { |
| "epoch": 3.700908455625437, |
| "grad_norm": 2.0344810615726288, |
| "learning_rate": 3.551655598697358e-06, |
| "loss": 0.1629, |
| "step": 662 |
| }, |
| { |
| "epoch": 3.7120894479385047, |
| "grad_norm": 1.936581123166174, |
| "learning_rate": 3.54364314412776e-06, |
| "loss": 0.1569, |
| "step": 664 |
| }, |
| { |
| "epoch": 3.7232704402515724, |
| "grad_norm": 1.3525874354992642, |
| "learning_rate": 3.535617685727494e-06, |
| "loss": 0.1082, |
| "step": 666 |
| }, |
| { |
| "epoch": 3.73445143256464, |
| "grad_norm": 1.6514309403224916, |
| "learning_rate": 3.527579323494055e-06, |
| "loss": 0.1431, |
| "step": 668 |
| }, |
| { |
| "epoch": 3.7456324248777078, |
| "grad_norm": 1.8602451468342234, |
| "learning_rate": 3.5195281575857228e-06, |
| "loss": 0.1639, |
| "step": 670 |
| }, |
| { |
| "epoch": 3.7568134171907754, |
| "grad_norm": 1.4731268992440232, |
| "learning_rate": 3.511464288320311e-06, |
| "loss": 0.1271, |
| "step": 672 |
| }, |
| { |
| "epoch": 3.7679944095038436, |
| "grad_norm": 1.37724516129253, |
| "learning_rate": 3.503387816173916e-06, |
| "loss": 0.1597, |
| "step": 674 |
| }, |
| { |
| "epoch": 3.7791754018169113, |
| "grad_norm": 1.7200144334067748, |
| "learning_rate": 3.495298841779669e-06, |
| "loss": 0.117, |
| "step": 676 |
| }, |
| { |
| "epoch": 3.790356394129979, |
| "grad_norm": 1.92538314164391, |
| "learning_rate": 3.4871974659264786e-06, |
| "loss": 0.1584, |
| "step": 678 |
| }, |
| { |
| "epoch": 3.801537386443047, |
| "grad_norm": 1.4718208788605616, |
| "learning_rate": 3.4790837895577752e-06, |
| "loss": 0.1333, |
| "step": 680 |
| }, |
| { |
| "epoch": 3.8127183787561147, |
| "grad_norm": 1.5582481918696203, |
| "learning_rate": 3.470957913770255e-06, |
| "loss": 0.1464, |
| "step": 682 |
| }, |
| { |
| "epoch": 3.8238993710691824, |
| "grad_norm": 1.4618275028428347, |
| "learning_rate": 3.462819939812618e-06, |
| "loss": 0.0995, |
| "step": 684 |
| }, |
| { |
| "epoch": 3.83508036338225, |
| "grad_norm": 1.3366351935592664, |
| "learning_rate": 3.4546699690843123e-06, |
| "loss": 0.1204, |
| "step": 686 |
| }, |
| { |
| "epoch": 3.846261355695318, |
| "grad_norm": 1.3780079667316787, |
| "learning_rate": 3.446508103134259e-06, |
| "loss": 0.1701, |
| "step": 688 |
| }, |
| { |
| "epoch": 3.8574423480083855, |
| "grad_norm": 1.7451718870626607, |
| "learning_rate": 3.4383344436595992e-06, |
| "loss": 0.1158, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.8686233403214536, |
| "grad_norm": 2.019474198008684, |
| "learning_rate": 3.430149092504422e-06, |
| "loss": 0.1304, |
| "step": 692 |
| }, |
| { |
| "epoch": 3.8798043326345213, |
| "grad_norm": 1.6820935429062616, |
| "learning_rate": 3.4219521516584912e-06, |
| "loss": 0.1334, |
| "step": 694 |
| }, |
| { |
| "epoch": 3.890985324947589, |
| "grad_norm": 2.2578057319721236, |
| "learning_rate": 3.4137437232559834e-06, |
| "loss": 0.1557, |
| "step": 696 |
| }, |
| { |
| "epoch": 3.902166317260657, |
| "grad_norm": 1.3610116271561221, |
| "learning_rate": 3.4055239095742067e-06, |
| "loss": 0.1644, |
| "step": 698 |
| }, |
| { |
| "epoch": 3.913347309573725, |
| "grad_norm": 1.3397050224861815, |
| "learning_rate": 3.3972928130323322e-06, |
| "loss": 0.1471, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.9245283018867925, |
| "grad_norm": 1.5234658664307734, |
| "learning_rate": 3.3890505361901153e-06, |
| "loss": 0.1195, |
| "step": 702 |
| }, |
| { |
| "epoch": 3.93570929419986, |
| "grad_norm": 1.763362220735128, |
| "learning_rate": 3.380797181746619e-06, |
| "loss": 0.1363, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.946890286512928, |
| "grad_norm": 2.038986301246902, |
| "learning_rate": 3.3725328525389324e-06, |
| "loss": 0.1203, |
| "step": 706 |
| }, |
| { |
| "epoch": 3.958071278825996, |
| "grad_norm": 1.9046513315579439, |
| "learning_rate": 3.364257651540891e-06, |
| "loss": 0.1578, |
| "step": 708 |
| }, |
| { |
| "epoch": 3.9692522711390636, |
| "grad_norm": 1.423399143627221, |
| "learning_rate": 3.355971681861794e-06, |
| "loss": 0.1211, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.9804332634521313, |
| "grad_norm": 1.5586817639667492, |
| "learning_rate": 3.3476750467451176e-06, |
| "loss": 0.153, |
| "step": 712 |
| }, |
| { |
| "epoch": 3.991614255765199, |
| "grad_norm": 1.4814888460752178, |
| "learning_rate": 3.33936784956723e-06, |
| "loss": 0.1288, |
| "step": 714 |
| }, |
| { |
| "epoch": 4.002795248078267, |
| "grad_norm": 1.6561127976965244, |
| "learning_rate": 3.331050193836104e-06, |
| "loss": 0.1196, |
| "step": 716 |
| }, |
| { |
| "epoch": 4.013976240391335, |
| "grad_norm": 1.8246755797846792, |
| "learning_rate": 3.322722183190025e-06, |
| "loss": 0.0983, |
| "step": 718 |
| }, |
| { |
| "epoch": 4.0251572327044025, |
| "grad_norm": 1.2508646883720782, |
| "learning_rate": 3.3143839213963026e-06, |
| "loss": 0.1132, |
| "step": 720 |
| }, |
| { |
| "epoch": 4.03633822501747, |
| "grad_norm": 1.3174073933660169, |
| "learning_rate": 3.306035512349974e-06, |
| "loss": 0.0886, |
| "step": 722 |
| }, |
| { |
| "epoch": 4.047519217330538, |
| "grad_norm": 1.4006843207756257, |
| "learning_rate": 3.297677060072513e-06, |
| "loss": 0.0907, |
| "step": 724 |
| }, |
| { |
| "epoch": 4.058700209643606, |
| "grad_norm": 2.147633002379955, |
| "learning_rate": 3.2893086687105324e-06, |
| "loss": 0.0814, |
| "step": 726 |
| }, |
| { |
| "epoch": 4.069881201956673, |
| "grad_norm": 1.8499679148666142, |
| "learning_rate": 3.280930442534486e-06, |
| "loss": 0.0916, |
| "step": 728 |
| }, |
| { |
| "epoch": 4.081062194269742, |
| "grad_norm": 1.5576608674855401, |
| "learning_rate": 3.272542485937369e-06, |
| "loss": 0.0814, |
| "step": 730 |
| }, |
| { |
| "epoch": 4.0922431865828095, |
| "grad_norm": 1.5258204722757824, |
| "learning_rate": 3.264144903433419e-06, |
| "loss": 0.0929, |
| "step": 732 |
| }, |
| { |
| "epoch": 4.103424178895877, |
| "grad_norm": 1.2377371189448831, |
| "learning_rate": 3.2557377996568135e-06, |
| "loss": 0.0933, |
| "step": 734 |
| }, |
| { |
| "epoch": 4.114605171208945, |
| "grad_norm": 1.6706792363129992, |
| "learning_rate": 3.247321279360363e-06, |
| "loss": 0.0957, |
| "step": 736 |
| }, |
| { |
| "epoch": 4.1257861635220126, |
| "grad_norm": 1.5205095000978939, |
| "learning_rate": 3.238895447414211e-06, |
| "loss": 0.1094, |
| "step": 738 |
| }, |
| { |
| "epoch": 4.13696715583508, |
| "grad_norm": 1.8218111131497405, |
| "learning_rate": 3.2304604088045206e-06, |
| "loss": 0.0866, |
| "step": 740 |
| }, |
| { |
| "epoch": 4.148148148148148, |
| "grad_norm": 1.5060146063158792, |
| "learning_rate": 3.222016268632175e-06, |
| "loss": 0.0974, |
| "step": 742 |
| }, |
| { |
| "epoch": 4.159329140461216, |
| "grad_norm": 2.33394735696618, |
| "learning_rate": 3.2135631321114603e-06, |
| "loss": 0.0767, |
| "step": 744 |
| }, |
| { |
| "epoch": 4.170510132774284, |
| "grad_norm": 1.8304481485687374, |
| "learning_rate": 3.2051011045687574e-06, |
| "loss": 0.1027, |
| "step": 746 |
| }, |
| { |
| "epoch": 4.181691125087352, |
| "grad_norm": 1.4496933516097028, |
| "learning_rate": 3.196630291441231e-06, |
| "loss": 0.073, |
| "step": 748 |
| }, |
| { |
| "epoch": 4.1928721174004195, |
| "grad_norm": 1.5989097781751378, |
| "learning_rate": 3.1881507982755126e-06, |
| "loss": 0.074, |
| "step": 750 |
| }, |
| { |
| "epoch": 4.204053109713487, |
| "grad_norm": 1.5479651084913313, |
| "learning_rate": 3.17966273072639e-06, |
| "loss": 0.0941, |
| "step": 752 |
| }, |
| { |
| "epoch": 4.215234102026555, |
| "grad_norm": 1.4844971201883568, |
| "learning_rate": 3.1711661945554857e-06, |
| "loss": 0.1171, |
| "step": 754 |
| }, |
| { |
| "epoch": 4.226415094339623, |
| "grad_norm": 1.538555100844062, |
| "learning_rate": 3.162661295629942e-06, |
| "loss": 0.0839, |
| "step": 756 |
| }, |
| { |
| "epoch": 4.23759608665269, |
| "grad_norm": 1.511356916861757, |
| "learning_rate": 3.154148139921102e-06, |
| "loss": 0.1039, |
| "step": 758 |
| }, |
| { |
| "epoch": 4.248777078965758, |
| "grad_norm": 1.811476489190878, |
| "learning_rate": 3.1456268335031886e-06, |
| "loss": 0.0794, |
| "step": 760 |
| }, |
| { |
| "epoch": 4.259958071278826, |
| "grad_norm": 1.6229333309674812, |
| "learning_rate": 3.137097482551983e-06, |
| "loss": 0.1152, |
| "step": 762 |
| }, |
| { |
| "epoch": 4.271139063591894, |
| "grad_norm": 1.4723017587041405, |
| "learning_rate": 3.128560193343501e-06, |
| "loss": 0.0944, |
| "step": 764 |
| }, |
| { |
| "epoch": 4.282320055904962, |
| "grad_norm": 1.0034690245189755, |
| "learning_rate": 3.1200150722526693e-06, |
| "loss": 0.0663, |
| "step": 766 |
| }, |
| { |
| "epoch": 4.29350104821803, |
| "grad_norm": 1.5551415143149132, |
| "learning_rate": 3.1114622257520004e-06, |
| "loss": 0.1021, |
| "step": 768 |
| }, |
| { |
| "epoch": 4.304682040531097, |
| "grad_norm": 1.836559018121584, |
| "learning_rate": 3.1029017604102655e-06, |
| "loss": 0.099, |
| "step": 770 |
| }, |
| { |
| "epoch": 4.315863032844165, |
| "grad_norm": 1.0818921388079483, |
| "learning_rate": 3.0943337828911673e-06, |
| "loss": 0.0899, |
| "step": 772 |
| }, |
| { |
| "epoch": 4.327044025157233, |
| "grad_norm": 0.9784785751112162, |
| "learning_rate": 3.085758399952011e-06, |
| "loss": 0.1016, |
| "step": 774 |
| }, |
| { |
| "epoch": 4.3382250174703, |
| "grad_norm": 1.348338975607883, |
| "learning_rate": 3.0771757184423716e-06, |
| "loss": 0.1063, |
| "step": 776 |
| }, |
| { |
| "epoch": 4.349406009783368, |
| "grad_norm": 2.1529902019434455, |
| "learning_rate": 3.0685858453027668e-06, |
| "loss": 0.089, |
| "step": 778 |
| }, |
| { |
| "epoch": 4.360587002096436, |
| "grad_norm": 1.3031273077449874, |
| "learning_rate": 3.0599888875633192e-06, |
| "loss": 0.1077, |
| "step": 780 |
| }, |
| { |
| "epoch": 4.371767994409504, |
| "grad_norm": 1.3772043306307704, |
| "learning_rate": 3.0513849523424298e-06, |
| "loss": 0.0879, |
| "step": 782 |
| }, |
| { |
| "epoch": 4.382948986722572, |
| "grad_norm": 1.7829225937512299, |
| "learning_rate": 3.0427741468454375e-06, |
| "loss": 0.1099, |
| "step": 784 |
| }, |
| { |
| "epoch": 4.39412997903564, |
| "grad_norm": 1.1143653742483424, |
| "learning_rate": 3.034156578363284e-06, |
| "loss": 0.0908, |
| "step": 786 |
| }, |
| { |
| "epoch": 4.405310971348707, |
| "grad_norm": 1.9841896768408593, |
| "learning_rate": 3.0255323542711784e-06, |
| "loss": 0.0846, |
| "step": 788 |
| }, |
| { |
| "epoch": 4.416491963661775, |
| "grad_norm": 1.1622503242476587, |
| "learning_rate": 3.0169015820272595e-06, |
| "loss": 0.0809, |
| "step": 790 |
| }, |
| { |
| "epoch": 4.427672955974843, |
| "grad_norm": 1.4138977756081776, |
| "learning_rate": 3.0082643691712572e-06, |
| "loss": 0.0832, |
| "step": 792 |
| }, |
| { |
| "epoch": 4.43885394828791, |
| "grad_norm": 1.3694425414816003, |
| "learning_rate": 2.9996208233231506e-06, |
| "loss": 0.1015, |
| "step": 794 |
| }, |
| { |
| "epoch": 4.450034940600978, |
| "grad_norm": 1.8252502558409327, |
| "learning_rate": 2.9909710521818265e-06, |
| "loss": 0.1049, |
| "step": 796 |
| }, |
| { |
| "epoch": 4.461215932914046, |
| "grad_norm": 1.4396307405101365, |
| "learning_rate": 2.9823151635237424e-06, |
| "loss": 0.0613, |
| "step": 798 |
| }, |
| { |
| "epoch": 4.472396925227114, |
| "grad_norm": 1.3667673153541864, |
| "learning_rate": 2.973653265201578e-06, |
| "loss": 0.1081, |
| "step": 800 |
| }, |
| { |
| "epoch": 4.483577917540182, |
| "grad_norm": 1.761976942384573, |
| "learning_rate": 2.964985465142895e-06, |
| "loss": 0.1002, |
| "step": 802 |
| }, |
| { |
| "epoch": 4.49475890985325, |
| "grad_norm": 1.6343471974417978, |
| "learning_rate": 2.9563118713487895e-06, |
| "loss": 0.0749, |
| "step": 804 |
| }, |
| { |
| "epoch": 4.505939902166317, |
| "grad_norm": 2.0454570442431046, |
| "learning_rate": 2.9476325918925484e-06, |
| "loss": 0.0857, |
| "step": 806 |
| }, |
| { |
| "epoch": 4.517120894479385, |
| "grad_norm": 1.7007295640066746, |
| "learning_rate": 2.938947734918302e-06, |
| "loss": 0.1085, |
| "step": 808 |
| }, |
| { |
| "epoch": 4.528301886792453, |
| "grad_norm": 1.5611422829954795, |
| "learning_rate": 2.9302574086396774e-06, |
| "loss": 0.0775, |
| "step": 810 |
| }, |
| { |
| "epoch": 4.53948287910552, |
| "grad_norm": 1.7913016893140525, |
| "learning_rate": 2.9215617213384494e-06, |
| "loss": 0.0875, |
| "step": 812 |
| }, |
| { |
| "epoch": 4.550663871418588, |
| "grad_norm": 1.5753063947599002, |
| "learning_rate": 2.91286078136319e-06, |
| "loss": 0.0805, |
| "step": 814 |
| }, |
| { |
| "epoch": 4.561844863731656, |
| "grad_norm": 1.8942921897754963, |
| "learning_rate": 2.904154697127921e-06, |
| "loss": 0.0806, |
| "step": 816 |
| }, |
| { |
| "epoch": 4.573025856044724, |
| "grad_norm": 1.791394910046461, |
| "learning_rate": 2.8954435771107604e-06, |
| "loss": 0.0992, |
| "step": 818 |
| }, |
| { |
| "epoch": 4.584206848357792, |
| "grad_norm": 1.245790765054016, |
| "learning_rate": 2.8867275298525743e-06, |
| "loss": 0.0886, |
| "step": 820 |
| }, |
| { |
| "epoch": 4.59538784067086, |
| "grad_norm": 1.5133863011334676, |
| "learning_rate": 2.878006663955621e-06, |
| "loss": 0.0886, |
| "step": 822 |
| }, |
| { |
| "epoch": 4.606568832983927, |
| "grad_norm": 2.0502622868705993, |
| "learning_rate": 2.8692810880821997e-06, |
| "loss": 0.0716, |
| "step": 824 |
| }, |
| { |
| "epoch": 4.617749825296995, |
| "grad_norm": 1.2876873289352964, |
| "learning_rate": 2.860550910953296e-06, |
| "loss": 0.0943, |
| "step": 826 |
| }, |
| { |
| "epoch": 4.628930817610063, |
| "grad_norm": 1.440475980645125, |
| "learning_rate": 2.8518162413472266e-06, |
| "loss": 0.1083, |
| "step": 828 |
| }, |
| { |
| "epoch": 4.64011180992313, |
| "grad_norm": 1.3754262878787067, |
| "learning_rate": 2.843077188098286e-06, |
| "loss": 0.1041, |
| "step": 830 |
| }, |
| { |
| "epoch": 4.651292802236198, |
| "grad_norm": 1.4424213259038674, |
| "learning_rate": 2.834333860095388e-06, |
| "loss": 0.0807, |
| "step": 832 |
| }, |
| { |
| "epoch": 4.662473794549266, |
| "grad_norm": 1.994638545215632, |
| "learning_rate": 2.8255863662807097e-06, |
| "loss": 0.0819, |
| "step": 834 |
| }, |
| { |
| "epoch": 4.673654786862334, |
| "grad_norm": 1.5478645240921063, |
| "learning_rate": 2.8168348156483356e-06, |
| "loss": 0.113, |
| "step": 836 |
| }, |
| { |
| "epoch": 4.684835779175402, |
| "grad_norm": 1.324879005941319, |
| "learning_rate": 2.8124575531000226e-06, |
| "loss": 0.11, |
| "step": 838 |
| }, |
| { |
| "epoch": 4.69601677148847, |
| "grad_norm": 1.5993247352100177, |
| "learning_rate": 2.803700121715214e-06, |
| "loss": 0.0903, |
| "step": 840 |
| }, |
| { |
| "epoch": 4.707197763801537, |
| "grad_norm": 1.256541482417978, |
| "learning_rate": 2.7949389062160946e-06, |
| "loss": 0.0925, |
| "step": 842 |
| }, |
| { |
| "epoch": 4.718378756114605, |
| "grad_norm": 2.706891920194882, |
| "learning_rate": 2.786174015767721e-06, |
| "loss": 0.084, |
| "step": 844 |
| }, |
| { |
| "epoch": 4.729559748427673, |
| "grad_norm": 1.3220515828132557, |
| "learning_rate": 2.7774055595809395e-06, |
| "loss": 0.0801, |
| "step": 846 |
| }, |
| { |
| "epoch": 4.7407407407407405, |
| "grad_norm": 1.5911477732332153, |
| "learning_rate": 2.768633646911027e-06, |
| "loss": 0.0938, |
| "step": 848 |
| }, |
| { |
| "epoch": 4.751921733053808, |
| "grad_norm": 1.1333988378482527, |
| "learning_rate": 2.759858387056325e-06, |
| "loss": 0.0721, |
| "step": 850 |
| }, |
| { |
| "epoch": 4.763102725366876, |
| "grad_norm": 1.4690260920140663, |
| "learning_rate": 2.7510798893568846e-06, |
| "loss": 0.0769, |
| "step": 852 |
| }, |
| { |
| "epoch": 4.774283717679944, |
| "grad_norm": 1.3785131166774844, |
| "learning_rate": 2.742298263193099e-06, |
| "loss": 0.1064, |
| "step": 854 |
| }, |
| { |
| "epoch": 4.785464709993012, |
| "grad_norm": 1.39128795327872, |
| "learning_rate": 2.733513617984342e-06, |
| "loss": 0.075, |
| "step": 856 |
| }, |
| { |
| "epoch": 4.79664570230608, |
| "grad_norm": 1.6826021403482612, |
| "learning_rate": 2.724726063187605e-06, |
| "loss": 0.1175, |
| "step": 858 |
| }, |
| { |
| "epoch": 4.8078266946191475, |
| "grad_norm": 1.353741266830404, |
| "learning_rate": 2.715935708296134e-06, |
| "loss": 0.1146, |
| "step": 860 |
| }, |
| { |
| "epoch": 4.819007686932215, |
| "grad_norm": 1.4488179633464906, |
| "learning_rate": 2.707142662838062e-06, |
| "loss": 0.1033, |
| "step": 862 |
| }, |
| { |
| "epoch": 4.830188679245283, |
| "grad_norm": 1.307354977462126, |
| "learning_rate": 2.6983470363750497e-06, |
| "loss": 0.093, |
| "step": 864 |
| }, |
| { |
| "epoch": 4.8413696715583505, |
| "grad_norm": 1.4753004858703918, |
| "learning_rate": 2.689548938500914e-06, |
| "loss": 0.0905, |
| "step": 866 |
| }, |
| { |
| "epoch": 4.852550663871418, |
| "grad_norm": 1.551558439927485, |
| "learning_rate": 2.6807484788402676e-06, |
| "loss": 0.075, |
| "step": 868 |
| }, |
| { |
| "epoch": 4.863731656184486, |
| "grad_norm": 1.499892261020302, |
| "learning_rate": 2.67194576704715e-06, |
| "loss": 0.0876, |
| "step": 870 |
| }, |
| { |
| "epoch": 4.8749126484975545, |
| "grad_norm": 1.82643381640813, |
| "learning_rate": 2.6631409128036637e-06, |
| "loss": 0.0892, |
| "step": 872 |
| }, |
| { |
| "epoch": 4.886093640810622, |
| "grad_norm": 1.3480606493487655, |
| "learning_rate": 2.6543340258186063e-06, |
| "loss": 0.0816, |
| "step": 874 |
| }, |
| { |
| "epoch": 4.89727463312369, |
| "grad_norm": 2.2307067144092407, |
| "learning_rate": 2.6455252158261015e-06, |
| "loss": 0.0994, |
| "step": 876 |
| }, |
| { |
| "epoch": 4.9084556254367575, |
| "grad_norm": 1.8646868858712458, |
| "learning_rate": 2.636714592584235e-06, |
| "loss": 0.0902, |
| "step": 878 |
| }, |
| { |
| "epoch": 4.919636617749825, |
| "grad_norm": 1.535171207325978, |
| "learning_rate": 2.6279022658736856e-06, |
| "loss": 0.0911, |
| "step": 880 |
| }, |
| { |
| "epoch": 4.930817610062893, |
| "grad_norm": 1.1594360070916991, |
| "learning_rate": 2.619088345496358e-06, |
| "loss": 0.066, |
| "step": 882 |
| }, |
| { |
| "epoch": 4.941998602375961, |
| "grad_norm": 1.6526631394475477, |
| "learning_rate": 2.610272941274012e-06, |
| "loss": 0.1014, |
| "step": 884 |
| }, |
| { |
| "epoch": 4.953179594689029, |
| "grad_norm": 1.8240816325874138, |
| "learning_rate": 2.6014561630468993e-06, |
| "loss": 0.0928, |
| "step": 886 |
| }, |
| { |
| "epoch": 4.964360587002097, |
| "grad_norm": 1.3816438884334348, |
| "learning_rate": 2.5926381206723885e-06, |
| "loss": 0.088, |
| "step": 888 |
| }, |
| { |
| "epoch": 4.9755415793151645, |
| "grad_norm": 1.3157397283692482, |
| "learning_rate": 2.583818924023601e-06, |
| "loss": 0.0938, |
| "step": 890 |
| }, |
| { |
| "epoch": 4.986722571628232, |
| "grad_norm": 1.464557516575305, |
| "learning_rate": 2.5749986829880423e-06, |
| "loss": 0.0781, |
| "step": 892 |
| }, |
| { |
| "epoch": 4.9979035639413, |
| "grad_norm": 1.8481309973872981, |
| "learning_rate": 2.5661775074662276e-06, |
| "loss": 0.0708, |
| "step": 894 |
| }, |
| { |
| "epoch": 5.0090845562543675, |
| "grad_norm": 1.3777408578534927, |
| "learning_rate": 2.5573555073703172e-06, |
| "loss": 0.0574, |
| "step": 896 |
| }, |
| { |
| "epoch": 5.020265548567435, |
| "grad_norm": 1.5585565063610693, |
| "learning_rate": 2.5485327926227464e-06, |
| "loss": 0.0533, |
| "step": 898 |
| }, |
| { |
| "epoch": 5.031446540880503, |
| "grad_norm": 3.8488829032344403, |
| "learning_rate": 2.539709473154855e-06, |
| "loss": 0.0524, |
| "step": 900 |
| }, |
| { |
| "epoch": 5.042627533193571, |
| "grad_norm": 1.360678519326562, |
| "learning_rate": 2.5308856589055164e-06, |
| "loss": 0.0608, |
| "step": 902 |
| }, |
| { |
| "epoch": 5.053808525506638, |
| "grad_norm": 1.4720850175627471, |
| "learning_rate": 2.5220614598197708e-06, |
| "loss": 0.0527, |
| "step": 904 |
| }, |
| { |
| "epoch": 5.064989517819707, |
| "grad_norm": 1.2412662972591795, |
| "learning_rate": 2.513236985847451e-06, |
| "loss": 0.0488, |
| "step": 906 |
| }, |
| { |
| "epoch": 5.0761705101327745, |
| "grad_norm": 1.3236580966844242, |
| "learning_rate": 2.5044123469418174e-06, |
| "loss": 0.0638, |
| "step": 908 |
| }, |
| { |
| "epoch": 5.087351502445842, |
| "grad_norm": 1.8348241342651854, |
| "learning_rate": 2.495587653058184e-06, |
| "loss": 0.0629, |
| "step": 910 |
| }, |
| { |
| "epoch": 5.09853249475891, |
| "grad_norm": 0.9662213920921242, |
| "learning_rate": 2.4867630141525493e-06, |
| "loss": 0.0722, |
| "step": 912 |
| }, |
| { |
| "epoch": 5.109713487071978, |
| "grad_norm": 1.6784486385619315, |
| "learning_rate": 2.477938540180231e-06, |
| "loss": 0.0482, |
| "step": 914 |
| }, |
| { |
| "epoch": 5.120894479385045, |
| "grad_norm": 1.386742744607905, |
| "learning_rate": 2.4691143410944844e-06, |
| "loss": 0.0596, |
| "step": 916 |
| }, |
| { |
| "epoch": 5.132075471698113, |
| "grad_norm": 1.5375835898995094, |
| "learning_rate": 2.4602905268451455e-06, |
| "loss": 0.0592, |
| "step": 918 |
| }, |
| { |
| "epoch": 5.143256464011181, |
| "grad_norm": 1.334707574114043, |
| "learning_rate": 2.451467207377254e-06, |
| "loss": 0.0493, |
| "step": 920 |
| }, |
| { |
| "epoch": 5.154437456324249, |
| "grad_norm": 1.018606004126685, |
| "learning_rate": 2.442644492629683e-06, |
| "loss": 0.0544, |
| "step": 922 |
| }, |
| { |
| "epoch": 5.165618448637317, |
| "grad_norm": 1.0236510244569192, |
| "learning_rate": 2.433822492533774e-06, |
| "loss": 0.0501, |
| "step": 924 |
| }, |
| { |
| "epoch": 5.176799440950385, |
| "grad_norm": 0.8191759766926784, |
| "learning_rate": 2.4250013170119585e-06, |
| "loss": 0.0594, |
| "step": 926 |
| }, |
| { |
| "epoch": 5.187980433263452, |
| "grad_norm": 1.0938612787512558, |
| "learning_rate": 2.4161810759763993e-06, |
| "loss": 0.0544, |
| "step": 928 |
| }, |
| { |
| "epoch": 5.19916142557652, |
| "grad_norm": 1.3602285379082586, |
| "learning_rate": 2.407361879327612e-06, |
| "loss": 0.0442, |
| "step": 930 |
| }, |
| { |
| "epoch": 5.210342417889588, |
| "grad_norm": 1.1380441045618945, |
| "learning_rate": 2.398543836953101e-06, |
| "loss": 0.0563, |
| "step": 932 |
| }, |
| { |
| "epoch": 5.221523410202655, |
| "grad_norm": 1.1080478505241853, |
| "learning_rate": 2.389727058725989e-06, |
| "loss": 0.0515, |
| "step": 934 |
| }, |
| { |
| "epoch": 5.232704402515723, |
| "grad_norm": 1.2558697950305333, |
| "learning_rate": 2.380911654503643e-06, |
| "loss": 0.0507, |
| "step": 936 |
| }, |
| { |
| "epoch": 5.243885394828791, |
| "grad_norm": 1.2293644348010904, |
| "learning_rate": 2.3720977341263152e-06, |
| "loss": 0.0607, |
| "step": 938 |
| }, |
| { |
| "epoch": 5.255066387141859, |
| "grad_norm": 1.292488994918762, |
| "learning_rate": 2.3632854074157653e-06, |
| "loss": 0.0474, |
| "step": 940 |
| }, |
| { |
| "epoch": 5.266247379454927, |
| "grad_norm": 1.2671492916227067, |
| "learning_rate": 2.3544747841738998e-06, |
| "loss": 0.0769, |
| "step": 942 |
| }, |
| { |
| "epoch": 5.277428371767995, |
| "grad_norm": 1.6102887076835615, |
| "learning_rate": 2.3456659741813945e-06, |
| "loss": 0.0496, |
| "step": 944 |
| }, |
| { |
| "epoch": 5.288609364081062, |
| "grad_norm": 1.577997048333656, |
| "learning_rate": 2.3368590871963367e-06, |
| "loss": 0.0796, |
| "step": 946 |
| }, |
| { |
| "epoch": 5.29979035639413, |
| "grad_norm": 2.278441135480121, |
| "learning_rate": 2.328054232952851e-06, |
| "loss": 0.0679, |
| "step": 948 |
| }, |
| { |
| "epoch": 5.310971348707198, |
| "grad_norm": 1.1443796744340577, |
| "learning_rate": 2.3192515211597332e-06, |
| "loss": 0.0589, |
| "step": 950 |
| }, |
| { |
| "epoch": 5.322152341020265, |
| "grad_norm": 1.3246252050774938, |
| "learning_rate": 2.3104510614990875e-06, |
| "loss": 0.0711, |
| "step": 952 |
| }, |
| { |
| "epoch": 5.333333333333333, |
| "grad_norm": 2.3404125762291574, |
| "learning_rate": 2.301652963624951e-06, |
| "loss": 0.0571, |
| "step": 954 |
| }, |
| { |
| "epoch": 5.344514325646401, |
| "grad_norm": 1.6173224098499974, |
| "learning_rate": 2.292857337161938e-06, |
| "loss": 0.0715, |
| "step": 956 |
| }, |
| { |
| "epoch": 5.355695317959469, |
| "grad_norm": 1.416375080557459, |
| "learning_rate": 2.2840642917038666e-06, |
| "loss": 0.0555, |
| "step": 958 |
| }, |
| { |
| "epoch": 5.366876310272537, |
| "grad_norm": 1.2819320119071211, |
| "learning_rate": 2.2752739368123948e-06, |
| "loss": 0.0486, |
| "step": 960 |
| }, |
| { |
| "epoch": 5.378057302585605, |
| "grad_norm": 1.1198977788924485, |
| "learning_rate": 2.2664863820156593e-06, |
| "loss": 0.0408, |
| "step": 962 |
| }, |
| { |
| "epoch": 5.389238294898672, |
| "grad_norm": 1.1451798114445098, |
| "learning_rate": 2.2577017368069017e-06, |
| "loss": 0.0626, |
| "step": 964 |
| }, |
| { |
| "epoch": 5.40041928721174, |
| "grad_norm": 1.3380127274735694, |
| "learning_rate": 2.248920110643116e-06, |
| "loss": 0.0568, |
| "step": 966 |
| }, |
| { |
| "epoch": 5.411600279524808, |
| "grad_norm": 1.4489239240672898, |
| "learning_rate": 2.2401416129436753e-06, |
| "loss": 0.059, |
| "step": 968 |
| }, |
| { |
| "epoch": 5.422781271837875, |
| "grad_norm": 1.3130908635170957, |
| "learning_rate": 2.2313663530889734e-06, |
| "loss": 0.0444, |
| "step": 970 |
| }, |
| { |
| "epoch": 5.433962264150943, |
| "grad_norm": 1.2045728193533076, |
| "learning_rate": 2.222594440419061e-06, |
| "loss": 0.0952, |
| "step": 972 |
| }, |
| { |
| "epoch": 5.445143256464011, |
| "grad_norm": 1.1505612686257871, |
| "learning_rate": 2.2138259842322794e-06, |
| "loss": 0.0536, |
| "step": 974 |
| }, |
| { |
| "epoch": 5.456324248777079, |
| "grad_norm": 1.521719008832957, |
| "learning_rate": 2.2050610937839058e-06, |
| "loss": 0.073, |
| "step": 976 |
| }, |
| { |
| "epoch": 5.467505241090147, |
| "grad_norm": 1.3381824532405695, |
| "learning_rate": 2.1962998782847863e-06, |
| "loss": 0.0583, |
| "step": 978 |
| }, |
| { |
| "epoch": 5.478686233403215, |
| "grad_norm": 1.1782879600371732, |
| "learning_rate": 2.1875424468999787e-06, |
| "loss": 0.052, |
| "step": 980 |
| }, |
| { |
| "epoch": 5.489867225716282, |
| "grad_norm": 1.1689516819440322, |
| "learning_rate": 2.178788908747387e-06, |
| "loss": 0.0515, |
| "step": 982 |
| }, |
| { |
| "epoch": 5.50104821802935, |
| "grad_norm": 1.1479989981730907, |
| "learning_rate": 2.170039372896409e-06, |
| "loss": 0.055, |
| "step": 984 |
| }, |
| { |
| "epoch": 5.512229210342418, |
| "grad_norm": 1.3922562574409854, |
| "learning_rate": 2.161293948366573e-06, |
| "loss": 0.0554, |
| "step": 986 |
| }, |
| { |
| "epoch": 5.523410202655485, |
| "grad_norm": 1.409490849880991, |
| "learning_rate": 2.152552744126178e-06, |
| "loss": 0.0392, |
| "step": 988 |
| }, |
| { |
| "epoch": 5.534591194968553, |
| "grad_norm": 1.2479629003574995, |
| "learning_rate": 2.1438158690909413e-06, |
| "loss": 0.0599, |
| "step": 990 |
| }, |
| { |
| "epoch": 5.545772187281621, |
| "grad_norm": 1.2371376050465024, |
| "learning_rate": 2.1350834321226344e-06, |
| "loss": 0.0664, |
| "step": 992 |
| }, |
| { |
| "epoch": 5.556953179594689, |
| "grad_norm": 1.593505278104288, |
| "learning_rate": 2.126355542027734e-06, |
| "loss": 0.0479, |
| "step": 994 |
| }, |
| { |
| "epoch": 5.568134171907757, |
| "grad_norm": 1.2742537988695015, |
| "learning_rate": 2.117632307556059e-06, |
| "loss": 0.0803, |
| "step": 996 |
| }, |
| { |
| "epoch": 5.579315164220825, |
| "grad_norm": 1.3748039610126324, |
| "learning_rate": 2.1089138373994226e-06, |
| "loss": 0.0416, |
| "step": 998 |
| }, |
| { |
| "epoch": 5.590496156533892, |
| "grad_norm": 2.4084571636039755, |
| "learning_rate": 2.100200240190273e-06, |
| "loss": 0.0514, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.60167714884696, |
| "grad_norm": 1.1933752040503858, |
| "learning_rate": 2.09149162450034e-06, |
| "loss": 0.0625, |
| "step": 1002 |
| }, |
| { |
| "epoch": 5.612858141160028, |
| "grad_norm": 1.037709039674537, |
| "learning_rate": 2.0827880988392856e-06, |
| "loss": 0.0514, |
| "step": 1004 |
| }, |
| { |
| "epoch": 5.6240391334730955, |
| "grad_norm": 1.315142680072312, |
| "learning_rate": 2.0740897716533475e-06, |
| "loss": 0.0593, |
| "step": 1006 |
| }, |
| { |
| "epoch": 5.635220125786163, |
| "grad_norm": 1.0531660230737552, |
| "learning_rate": 2.0653967513239934e-06, |
| "loss": 0.0543, |
| "step": 1008 |
| }, |
| { |
| "epoch": 5.646401118099231, |
| "grad_norm": 1.2633776013551097, |
| "learning_rate": 2.0567091461665636e-06, |
| "loss": 0.0431, |
| "step": 1010 |
| }, |
| { |
| "epoch": 5.657582110412299, |
| "grad_norm": 1.449959564050197, |
| "learning_rate": 2.0480270644289282e-06, |
| "loss": 0.0482, |
| "step": 1012 |
| }, |
| { |
| "epoch": 5.668763102725367, |
| "grad_norm": 1.1071912059302882, |
| "learning_rate": 2.0393506142901347e-06, |
| "loss": 0.0564, |
| "step": 1014 |
| }, |
| { |
| "epoch": 5.679944095038435, |
| "grad_norm": 0.9876137346535111, |
| "learning_rate": 2.0306799038590595e-06, |
| "loss": 0.0391, |
| "step": 1016 |
| }, |
| { |
| "epoch": 5.6911250873515025, |
| "grad_norm": 1.1071464038310999, |
| "learning_rate": 2.0220150411730638e-06, |
| "loss": 0.0636, |
| "step": 1018 |
| }, |
| { |
| "epoch": 5.70230607966457, |
| "grad_norm": 1.0473491285671832, |
| "learning_rate": 2.013356134196643e-06, |
| "loss": 0.0581, |
| "step": 1020 |
| }, |
| { |
| "epoch": 5.713487071977638, |
| "grad_norm": 1.1296902267336801, |
| "learning_rate": 2.004703290820086e-06, |
| "loss": 0.0604, |
| "step": 1022 |
| }, |
| { |
| "epoch": 5.7246680642907055, |
| "grad_norm": 1.309317661735025, |
| "learning_rate": 1.9960566188581306e-06, |
| "loss": 0.0438, |
| "step": 1024 |
| }, |
| { |
| "epoch": 5.735849056603773, |
| "grad_norm": 0.8918766336417149, |
| "learning_rate": 1.9874162260486146e-06, |
| "loss": 0.0475, |
| "step": 1026 |
| }, |
| { |
| "epoch": 5.747030048916841, |
| "grad_norm": 1.2095534019736167, |
| "learning_rate": 1.978782220051142e-06, |
| "loss": 0.0454, |
| "step": 1028 |
| }, |
| { |
| "epoch": 5.7582110412299095, |
| "grad_norm": 1.1967009451687045, |
| "learning_rate": 1.9701547084457314e-06, |
| "loss": 0.0697, |
| "step": 1030 |
| }, |
| { |
| "epoch": 5.769392033542977, |
| "grad_norm": 1.8160556667087309, |
| "learning_rate": 1.961533798731486e-06, |
| "loss": 0.0422, |
| "step": 1032 |
| }, |
| { |
| "epoch": 5.780573025856045, |
| "grad_norm": 1.590627053883797, |
| "learning_rate": 1.952919598325247e-06, |
| "loss": 0.0602, |
| "step": 1034 |
| }, |
| { |
| "epoch": 5.7917540181691125, |
| "grad_norm": 1.4584761134724722, |
| "learning_rate": 1.944312214560256e-06, |
| "loss": 0.0575, |
| "step": 1036 |
| }, |
| { |
| "epoch": 5.80293501048218, |
| "grad_norm": 1.6093909025543798, |
| "learning_rate": 1.935711754684824e-06, |
| "loss": 0.0814, |
| "step": 1038 |
| }, |
| { |
| "epoch": 5.814116002795248, |
| "grad_norm": 1.7715253484509736, |
| "learning_rate": 1.9271183258609836e-06, |
| "loss": 0.0608, |
| "step": 1040 |
| }, |
| { |
| "epoch": 5.825296995108316, |
| "grad_norm": 0.850327251905485, |
| "learning_rate": 1.9185320351631654e-06, |
| "loss": 0.0388, |
| "step": 1042 |
| }, |
| { |
| "epoch": 5.836477987421384, |
| "grad_norm": 1.4837292387797913, |
| "learning_rate": 1.9099529895768552e-06, |
| "loss": 0.0567, |
| "step": 1044 |
| }, |
| { |
| "epoch": 5.847658979734452, |
| "grad_norm": 1.0384213631474088, |
| "learning_rate": 1.901381295997267e-06, |
| "loss": 0.0661, |
| "step": 1046 |
| }, |
| { |
| "epoch": 5.8588399720475195, |
| "grad_norm": 1.2071171218984706, |
| "learning_rate": 1.8928170612280067e-06, |
| "loss": 0.0665, |
| "step": 1048 |
| }, |
| { |
| "epoch": 5.870020964360587, |
| "grad_norm": 1.2020194163974407, |
| "learning_rate": 1.8842603919797436e-06, |
| "loss": 0.0466, |
| "step": 1050 |
| }, |
| { |
| "epoch": 5.881201956673655, |
| "grad_norm": 1.141150946131999, |
| "learning_rate": 1.8757113948688827e-06, |
| "loss": 0.0562, |
| "step": 1052 |
| }, |
| { |
| "epoch": 5.8923829489867225, |
| "grad_norm": 1.583487458549684, |
| "learning_rate": 1.8671701764162287e-06, |
| "loss": 0.0589, |
| "step": 1054 |
| }, |
| { |
| "epoch": 5.90356394129979, |
| "grad_norm": 1.3417276690702418, |
| "learning_rate": 1.8586368430456708e-06, |
| "loss": 0.0604, |
| "step": 1056 |
| }, |
| { |
| "epoch": 5.914744933612858, |
| "grad_norm": 1.3294273305641617, |
| "learning_rate": 1.8501115010828423e-06, |
| "loss": 0.0628, |
| "step": 1058 |
| }, |
| { |
| "epoch": 5.925925925925926, |
| "grad_norm": 1.2448945324282268, |
| "learning_rate": 1.8415942567538106e-06, |
| "loss": 0.0554, |
| "step": 1060 |
| }, |
| { |
| "epoch": 5.937106918238994, |
| "grad_norm": 0.960687093766239, |
| "learning_rate": 1.8330852161837399e-06, |
| "loss": 0.0532, |
| "step": 1062 |
| }, |
| { |
| "epoch": 5.948287910552062, |
| "grad_norm": 1.4656893110825278, |
| "learning_rate": 1.8245844853955786e-06, |
| "loss": 0.0719, |
| "step": 1064 |
| }, |
| { |
| "epoch": 5.9594689028651295, |
| "grad_norm": 1.6634277575338297, |
| "learning_rate": 1.8160921703087368e-06, |
| "loss": 0.0565, |
| "step": 1066 |
| }, |
| { |
| "epoch": 5.970649895178197, |
| "grad_norm": 1.7257111050609335, |
| "learning_rate": 1.8076083767377595e-06, |
| "loss": 0.068, |
| "step": 1068 |
| }, |
| { |
| "epoch": 5.981830887491265, |
| "grad_norm": 1.42483183153276, |
| "learning_rate": 1.7991332103910184e-06, |
| "loss": 0.0613, |
| "step": 1070 |
| }, |
| { |
| "epoch": 5.993011879804333, |
| "grad_norm": 1.4316025881020678, |
| "learning_rate": 1.7906667768693853e-06, |
| "loss": 0.0481, |
| "step": 1072 |
| }, |
| { |
| "epoch": 6.0041928721174, |
| "grad_norm": 1.037376667784287, |
| "learning_rate": 1.782209181664924e-06, |
| "loss": 0.0483, |
| "step": 1074 |
| }, |
| { |
| "epoch": 6.015373864430468, |
| "grad_norm": 1.0336168566598631, |
| "learning_rate": 1.773760530159571e-06, |
| "loss": 0.0347, |
| "step": 1076 |
| }, |
| { |
| "epoch": 6.026554856743536, |
| "grad_norm": 0.7872905184564322, |
| "learning_rate": 1.7653209276238242e-06, |
| "loss": 0.0355, |
| "step": 1078 |
| }, |
| { |
| "epoch": 6.037735849056604, |
| "grad_norm": 1.772389302776251, |
| "learning_rate": 1.7568904792154328e-06, |
| "loss": 0.0542, |
| "step": 1080 |
| }, |
| { |
| "epoch": 6.048916841369672, |
| "grad_norm": 1.3577848873845724, |
| "learning_rate": 1.7484692899780812e-06, |
| "loss": 0.0583, |
| "step": 1082 |
| }, |
| { |
| "epoch": 6.06009783368274, |
| "grad_norm": 0.7840766650439943, |
| "learning_rate": 1.740057464840088e-06, |
| "loss": 0.0289, |
| "step": 1084 |
| }, |
| { |
| "epoch": 6.071278825995807, |
| "grad_norm": 0.9255675051401594, |
| "learning_rate": 1.7316551086130925e-06, |
| "loss": 0.0417, |
| "step": 1086 |
| }, |
| { |
| "epoch": 6.082459818308875, |
| "grad_norm": 0.9107219582827843, |
| "learning_rate": 1.7232623259907538e-06, |
| "loss": 0.0429, |
| "step": 1088 |
| }, |
| { |
| "epoch": 6.093640810621943, |
| "grad_norm": 1.0296310110561282, |
| "learning_rate": 1.714879221547439e-06, |
| "loss": 0.0362, |
| "step": 1090 |
| }, |
| { |
| "epoch": 6.10482180293501, |
| "grad_norm": 0.9575340239366315, |
| "learning_rate": 1.7065058997369288e-06, |
| "loss": 0.0471, |
| "step": 1092 |
| }, |
| { |
| "epoch": 6.116002795248078, |
| "grad_norm": 0.7430183397758778, |
| "learning_rate": 1.6981424648911112e-06, |
| "loss": 0.0351, |
| "step": 1094 |
| }, |
| { |
| "epoch": 6.127183787561146, |
| "grad_norm": 0.9807593854080312, |
| "learning_rate": 1.6897890212186804e-06, |
| "loss": 0.0334, |
| "step": 1096 |
| }, |
| { |
| "epoch": 6.138364779874214, |
| "grad_norm": 1.2961448011313597, |
| "learning_rate": 1.6814456728038431e-06, |
| "loss": 0.025, |
| "step": 1098 |
| }, |
| { |
| "epoch": 6.149545772187282, |
| "grad_norm": 0.961636779671174, |
| "learning_rate": 1.673112523605015e-06, |
| "loss": 0.0285, |
| "step": 1100 |
| }, |
| { |
| "epoch": 6.16072676450035, |
| "grad_norm": 0.9647606646620928, |
| "learning_rate": 1.6647896774535324e-06, |
| "loss": 0.0303, |
| "step": 1102 |
| }, |
| { |
| "epoch": 6.171907756813417, |
| "grad_norm": 1.1381988477100318, |
| "learning_rate": 1.6564772380523546e-06, |
| "loss": 0.0358, |
| "step": 1104 |
| }, |
| { |
| "epoch": 6.183088749126485, |
| "grad_norm": 0.7901346245952422, |
| "learning_rate": 1.648175308974771e-06, |
| "loss": 0.0279, |
| "step": 1106 |
| }, |
| { |
| "epoch": 6.194269741439553, |
| "grad_norm": 1.2717247572933381, |
| "learning_rate": 1.6398839936631142e-06, |
| "loss": 0.0328, |
| "step": 1108 |
| }, |
| { |
| "epoch": 6.20545073375262, |
| "grad_norm": 1.2916496315117834, |
| "learning_rate": 1.631603395427466e-06, |
| "loss": 0.055, |
| "step": 1110 |
| }, |
| { |
| "epoch": 6.216631726065688, |
| "grad_norm": 0.9740099844597652, |
| "learning_rate": 1.6233336174443762e-06, |
| "loss": 0.048, |
| "step": 1112 |
| }, |
| { |
| "epoch": 6.227812718378756, |
| "grad_norm": 1.0103830292004847, |
| "learning_rate": 1.6150747627555713e-06, |
| "loss": 0.0434, |
| "step": 1114 |
| }, |
| { |
| "epoch": 6.238993710691824, |
| "grad_norm": 1.1350854047223082, |
| "learning_rate": 1.6068269342666749e-06, |
| "loss": 0.0389, |
| "step": 1116 |
| }, |
| { |
| "epoch": 6.250174703004892, |
| "grad_norm": 0.7884154494279628, |
| "learning_rate": 1.5985902347459239e-06, |
| "loss": 0.0432, |
| "step": 1118 |
| }, |
| { |
| "epoch": 6.26135569531796, |
| "grad_norm": 0.8788178903528164, |
| "learning_rate": 1.5903647668228855e-06, |
| "loss": 0.0432, |
| "step": 1120 |
| }, |
| { |
| "epoch": 6.272536687631027, |
| "grad_norm": 0.6393918351108393, |
| "learning_rate": 1.5821506329871834e-06, |
| "loss": 0.0253, |
| "step": 1122 |
| }, |
| { |
| "epoch": 6.283717679944095, |
| "grad_norm": 1.0870268262489273, |
| "learning_rate": 1.5739479355872162e-06, |
| "loss": 0.0364, |
| "step": 1124 |
| }, |
| { |
| "epoch": 6.294898672257163, |
| "grad_norm": 1.1679875063936556, |
| "learning_rate": 1.5657567768288868e-06, |
| "loss": 0.0333, |
| "step": 1126 |
| }, |
| { |
| "epoch": 6.30607966457023, |
| "grad_norm": 0.8388447320245327, |
| "learning_rate": 1.5575772587743222e-06, |
| "loss": 0.0316, |
| "step": 1128 |
| }, |
| { |
| "epoch": 6.317260656883298, |
| "grad_norm": 0.7710273725047172, |
| "learning_rate": 1.5494094833406092e-06, |
| "loss": 0.0308, |
| "step": 1130 |
| }, |
| { |
| "epoch": 6.328441649196366, |
| "grad_norm": 1.3107972415612894, |
| "learning_rate": 1.5412535522985205e-06, |
| "loss": 0.0186, |
| "step": 1132 |
| }, |
| { |
| "epoch": 6.339622641509434, |
| "grad_norm": 0.8488196487806184, |
| "learning_rate": 1.5331095672712463e-06, |
| "loss": 0.023, |
| "step": 1134 |
| }, |
| { |
| "epoch": 6.350803633822502, |
| "grad_norm": 1.014050814471419, |
| "learning_rate": 1.5249776297331302e-06, |
| "loss": 0.0425, |
| "step": 1136 |
| }, |
| { |
| "epoch": 6.36198462613557, |
| "grad_norm": 0.8160528908459946, |
| "learning_rate": 1.516857841008401e-06, |
| "loss": 0.0407, |
| "step": 1138 |
| }, |
| { |
| "epoch": 6.373165618448637, |
| "grad_norm": 0.6924190623075557, |
| "learning_rate": 1.5087503022699168e-06, |
| "loss": 0.0527, |
| "step": 1140 |
| }, |
| { |
| "epoch": 6.384346610761705, |
| "grad_norm": 1.0149043689805195, |
| "learning_rate": 1.5006551145378967e-06, |
| "loss": 0.0367, |
| "step": 1142 |
| }, |
| { |
| "epoch": 6.395527603074773, |
| "grad_norm": 1.5920991707794845, |
| "learning_rate": 1.4925723786786691e-06, |
| "loss": 0.0319, |
| "step": 1144 |
| }, |
| { |
| "epoch": 6.40670859538784, |
| "grad_norm": 0.8834798218634231, |
| "learning_rate": 1.4845021954034106e-06, |
| "loss": 0.0372, |
| "step": 1146 |
| }, |
| { |
| "epoch": 6.417889587700908, |
| "grad_norm": 1.072104658850445, |
| "learning_rate": 1.476444665266889e-06, |
| "loss": 0.0413, |
| "step": 1148 |
| }, |
| { |
| "epoch": 6.429070580013976, |
| "grad_norm": 1.1893734124292998, |
| "learning_rate": 1.4683998886662187e-06, |
| "loss": 0.0307, |
| "step": 1150 |
| }, |
| { |
| "epoch": 6.440251572327044, |
| "grad_norm": 1.1513167005422524, |
| "learning_rate": 1.4603679658396006e-06, |
| "loss": 0.0402, |
| "step": 1152 |
| }, |
| { |
| "epoch": 6.451432564640112, |
| "grad_norm": 1.0586602700365229, |
| "learning_rate": 1.4523489968650795e-06, |
| "loss": 0.0303, |
| "step": 1154 |
| }, |
| { |
| "epoch": 6.46261355695318, |
| "grad_norm": 0.7650987855999634, |
| "learning_rate": 1.4443430816592936e-06, |
| "loss": 0.0312, |
| "step": 1156 |
| }, |
| { |
| "epoch": 6.473794549266247, |
| "grad_norm": 0.7470083708652993, |
| "learning_rate": 1.4363503199762296e-06, |
| "loss": 0.0298, |
| "step": 1158 |
| }, |
| { |
| "epoch": 6.484975541579315, |
| "grad_norm": 1.2247183517462086, |
| "learning_rate": 1.4283708114059853e-06, |
| "loss": 0.0476, |
| "step": 1160 |
| }, |
| { |
| "epoch": 6.496156533892383, |
| "grad_norm": 1.0042001049340177, |
| "learning_rate": 1.4204046553735174e-06, |
| "loss": 0.0421, |
| "step": 1162 |
| }, |
| { |
| "epoch": 6.5073375262054505, |
| "grad_norm": 1.0066856707214424, |
| "learning_rate": 1.4124519511374158e-06, |
| "loss": 0.0277, |
| "step": 1164 |
| }, |
| { |
| "epoch": 6.518518518518518, |
| "grad_norm": 1.3761888161849996, |
| "learning_rate": 1.404512797788657e-06, |
| "loss": 0.0251, |
| "step": 1166 |
| }, |
| { |
| "epoch": 6.529699510831586, |
| "grad_norm": 0.7445041473181229, |
| "learning_rate": 1.396587294249374e-06, |
| "loss": 0.0383, |
| "step": 1168 |
| }, |
| { |
| "epoch": 6.540880503144654, |
| "grad_norm": 1.0231799225570892, |
| "learning_rate": 1.3886755392716225e-06, |
| "loss": 0.0289, |
| "step": 1170 |
| }, |
| { |
| "epoch": 6.552061495457722, |
| "grad_norm": 1.0842064444530823, |
| "learning_rate": 1.3807776314361498e-06, |
| "loss": 0.0341, |
| "step": 1172 |
| }, |
| { |
| "epoch": 6.56324248777079, |
| "grad_norm": 0.9409388421938562, |
| "learning_rate": 1.3728936691511704e-06, |
| "loss": 0.0413, |
| "step": 1174 |
| }, |
| { |
| "epoch": 6.5744234800838575, |
| "grad_norm": 0.8052329748698783, |
| "learning_rate": 1.3650237506511333e-06, |
| "loss": 0.0399, |
| "step": 1176 |
| }, |
| { |
| "epoch": 6.585604472396925, |
| "grad_norm": 0.6879172446908371, |
| "learning_rate": 1.3571679739955029e-06, |
| "loss": 0.0288, |
| "step": 1178 |
| }, |
| { |
| "epoch": 6.596785464709993, |
| "grad_norm": 0.8737080494275846, |
| "learning_rate": 1.3493264370675352e-06, |
| "loss": 0.0181, |
| "step": 1180 |
| }, |
| { |
| "epoch": 6.6079664570230605, |
| "grad_norm": 0.8744184416405667, |
| "learning_rate": 1.3414992375730587e-06, |
| "loss": 0.0432, |
| "step": 1182 |
| }, |
| { |
| "epoch": 6.619147449336128, |
| "grad_norm": 0.9265074156931595, |
| "learning_rate": 1.3336864730392587e-06, |
| "loss": 0.0464, |
| "step": 1184 |
| }, |
| { |
| "epoch": 6.630328441649196, |
| "grad_norm": 1.14003149718633, |
| "learning_rate": 1.3258882408134582e-06, |
| "loss": 0.0271, |
| "step": 1186 |
| }, |
| { |
| "epoch": 6.6415094339622645, |
| "grad_norm": 0.8949105583359471, |
| "learning_rate": 1.3181046380619078e-06, |
| "loss": 0.0276, |
| "step": 1188 |
| }, |
| { |
| "epoch": 6.652690426275332, |
| "grad_norm": 1.0602768370905677, |
| "learning_rate": 1.3103357617685746e-06, |
| "loss": 0.0352, |
| "step": 1190 |
| }, |
| { |
| "epoch": 6.6638714185884, |
| "grad_norm": 1.187406942024327, |
| "learning_rate": 1.3025817087339335e-06, |
| "loss": 0.0597, |
| "step": 1192 |
| }, |
| { |
| "epoch": 6.6750524109014675, |
| "grad_norm": 0.8451020033143687, |
| "learning_rate": 1.2948425755737592e-06, |
| "loss": 0.0359, |
| "step": 1194 |
| }, |
| { |
| "epoch": 6.686233403214535, |
| "grad_norm": 1.2760921925255864, |
| "learning_rate": 1.2871184587179286e-06, |
| "loss": 0.0285, |
| "step": 1196 |
| }, |
| { |
| "epoch": 6.697414395527603, |
| "grad_norm": 0.7781748766075295, |
| "learning_rate": 1.2794094544092111e-06, |
| "loss": 0.0346, |
| "step": 1198 |
| }, |
| { |
| "epoch": 6.7085953878406706, |
| "grad_norm": 1.1832623077309767, |
| "learning_rate": 1.2717156587020746e-06, |
| "loss": 0.041, |
| "step": 1200 |
| }, |
| { |
| "epoch": 6.719776380153739, |
| "grad_norm": 1.3133094357866473, |
| "learning_rate": 1.2640371674614866e-06, |
| "loss": 0.0629, |
| "step": 1202 |
| }, |
| { |
| "epoch": 6.730957372466807, |
| "grad_norm": 0.7218331862903847, |
| "learning_rate": 1.2563740763617198e-06, |
| "loss": 0.0366, |
| "step": 1204 |
| }, |
| { |
| "epoch": 6.7421383647798745, |
| "grad_norm": 0.9560652150388108, |
| "learning_rate": 1.2487264808851654e-06, |
| "loss": 0.044, |
| "step": 1206 |
| }, |
| { |
| "epoch": 6.753319357092942, |
| "grad_norm": 1.1190106870390395, |
| "learning_rate": 1.2410944763211302e-06, |
| "loss": 0.0517, |
| "step": 1208 |
| }, |
| { |
| "epoch": 6.76450034940601, |
| "grad_norm": 0.7835985914687663, |
| "learning_rate": 1.2334781577646653e-06, |
| "loss": 0.0272, |
| "step": 1210 |
| }, |
| { |
| "epoch": 6.7756813417190775, |
| "grad_norm": 2.056446636497986, |
| "learning_rate": 1.2258776201153702e-06, |
| "loss": 0.0239, |
| "step": 1212 |
| }, |
| { |
| "epoch": 6.786862334032145, |
| "grad_norm": 0.8485551422736736, |
| "learning_rate": 1.218292958076213e-06, |
| "loss": 0.0206, |
| "step": 1214 |
| }, |
| { |
| "epoch": 6.798043326345213, |
| "grad_norm": 1.2531964534501892, |
| "learning_rate": 1.2107242661523544e-06, |
| "loss": 0.0254, |
| "step": 1216 |
| }, |
| { |
| "epoch": 6.809224318658281, |
| "grad_norm": 1.269537638790587, |
| "learning_rate": 1.203171638649962e-06, |
| "loss": 0.0299, |
| "step": 1218 |
| }, |
| { |
| "epoch": 6.820405310971349, |
| "grad_norm": 1.1178764385402225, |
| "learning_rate": 1.195635169675045e-06, |
| "loss": 0.0396, |
| "step": 1220 |
| }, |
| { |
| "epoch": 6.831586303284417, |
| "grad_norm": 0.6920818283019613, |
| "learning_rate": 1.1881149531322744e-06, |
| "loss": 0.0268, |
| "step": 1222 |
| }, |
| { |
| "epoch": 6.8427672955974845, |
| "grad_norm": 0.80369354175751, |
| "learning_rate": 1.180611082723814e-06, |
| "loss": 0.031, |
| "step": 1224 |
| }, |
| { |
| "epoch": 6.853948287910552, |
| "grad_norm": 0.7447389756775401, |
| "learning_rate": 1.1731236519481593e-06, |
| "loss": 0.0345, |
| "step": 1226 |
| }, |
| { |
| "epoch": 6.86512928022362, |
| "grad_norm": 1.1115305000722167, |
| "learning_rate": 1.1656527540989595e-06, |
| "loss": 0.0283, |
| "step": 1228 |
| }, |
| { |
| "epoch": 6.876310272536688, |
| "grad_norm": 1.2279572164110593, |
| "learning_rate": 1.1581984822638706e-06, |
| "loss": 0.0452, |
| "step": 1230 |
| }, |
| { |
| "epoch": 6.887491264849755, |
| "grad_norm": 0.8467749629186313, |
| "learning_rate": 1.1507609293233837e-06, |
| "loss": 0.0283, |
| "step": 1232 |
| }, |
| { |
| "epoch": 6.898672257162823, |
| "grad_norm": 1.355703618365484, |
| "learning_rate": 1.1433401879496723e-06, |
| "loss": 0.0366, |
| "step": 1234 |
| }, |
| { |
| "epoch": 6.909853249475891, |
| "grad_norm": 1.004917827499692, |
| "learning_rate": 1.135936350605438e-06, |
| "loss": 0.0496, |
| "step": 1236 |
| }, |
| { |
| "epoch": 6.921034241788959, |
| "grad_norm": 1.2615070307313305, |
| "learning_rate": 1.1285495095427563e-06, |
| "loss": 0.0461, |
| "step": 1238 |
| }, |
| { |
| "epoch": 6.932215234102027, |
| "grad_norm": 0.9861185460727813, |
| "learning_rate": 1.1211797568019312e-06, |
| "loss": 0.0366, |
| "step": 1240 |
| }, |
| { |
| "epoch": 6.943396226415095, |
| "grad_norm": 1.6576290169923233, |
| "learning_rate": 1.113827184210343e-06, |
| "loss": 0.0337, |
| "step": 1242 |
| }, |
| { |
| "epoch": 6.954577218728162, |
| "grad_norm": 1.1363579065284033, |
| "learning_rate": 1.1064918833813073e-06, |
| "loss": 0.0406, |
| "step": 1244 |
| }, |
| { |
| "epoch": 6.96575821104123, |
| "grad_norm": 1.3125191134965577, |
| "learning_rate": 1.0991739457129333e-06, |
| "loss": 0.0397, |
| "step": 1246 |
| }, |
| { |
| "epoch": 6.976939203354298, |
| "grad_norm": 0.8904462468667067, |
| "learning_rate": 1.0918734623869835e-06, |
| "loss": 0.0407, |
| "step": 1248 |
| }, |
| { |
| "epoch": 6.988120195667365, |
| "grad_norm": 2.263233580582389, |
| "learning_rate": 1.0845905243677416e-06, |
| "loss": 0.0307, |
| "step": 1250 |
| }, |
| { |
| "epoch": 6.999301187980433, |
| "grad_norm": 0.791294534235276, |
| "learning_rate": 1.0773252224008726e-06, |
| "loss": 0.0387, |
| "step": 1252 |
| }, |
| { |
| "epoch": 7.010482180293501, |
| "grad_norm": 0.76599595030522, |
| "learning_rate": 1.0700776470122981e-06, |
| "loss": 0.0269, |
| "step": 1254 |
| }, |
| { |
| "epoch": 7.021663172606569, |
| "grad_norm": 0.7331796337642835, |
| "learning_rate": 1.0628478885070647e-06, |
| "loss": 0.0221, |
| "step": 1256 |
| }, |
| { |
| "epoch": 7.032844164919637, |
| "grad_norm": 0.6845784469587074, |
| "learning_rate": 1.05563603696822e-06, |
| "loss": 0.0291, |
| "step": 1258 |
| }, |
| { |
| "epoch": 7.044025157232705, |
| "grad_norm": 0.8176233505690059, |
| "learning_rate": 1.0484421822556904e-06, |
| "loss": 0.0364, |
| "step": 1260 |
| }, |
| { |
| "epoch": 7.055206149545772, |
| "grad_norm": 0.8629657573128657, |
| "learning_rate": 1.041266414005162e-06, |
| "loss": 0.0265, |
| "step": 1262 |
| }, |
| { |
| "epoch": 7.06638714185884, |
| "grad_norm": 1.1172499462707595, |
| "learning_rate": 1.0341088216269625e-06, |
| "loss": 0.0157, |
| "step": 1264 |
| }, |
| { |
| "epoch": 7.077568134171908, |
| "grad_norm": 0.5230775744769823, |
| "learning_rate": 1.0269694943049462e-06, |
| "loss": 0.0157, |
| "step": 1266 |
| }, |
| { |
| "epoch": 7.088749126484975, |
| "grad_norm": 0.8978199171663125, |
| "learning_rate": 1.0198485209953865e-06, |
| "loss": 0.0275, |
| "step": 1268 |
| }, |
| { |
| "epoch": 7.099930118798043, |
| "grad_norm": 0.815308309594077, |
| "learning_rate": 1.0127459904258621e-06, |
| "loss": 0.0237, |
| "step": 1270 |
| }, |
| { |
| "epoch": 7.111111111111111, |
| "grad_norm": 0.8967571058386815, |
| "learning_rate": 1.0056619910941592e-06, |
| "loss": 0.019, |
| "step": 1272 |
| }, |
| { |
| "epoch": 7.122292103424179, |
| "grad_norm": 0.7843358442700527, |
| "learning_rate": 9.98596611267158e-07, |
| "loss": 0.021, |
| "step": 1274 |
| }, |
| { |
| "epoch": 7.133473095737247, |
| "grad_norm": 0.6797830063456453, |
| "learning_rate": 9.915499389797444e-07, |
| "loss": 0.0316, |
| "step": 1276 |
| }, |
| { |
| "epoch": 7.144654088050315, |
| "grad_norm": 0.6688875199025872, |
| "learning_rate": 9.845220620337054e-07, |
| "loss": 0.0303, |
| "step": 1278 |
| }, |
| { |
| "epoch": 7.155835080363382, |
| "grad_norm": 0.6664970872749731, |
| "learning_rate": 9.77513067996636e-07, |
| "loss": 0.0219, |
| "step": 1280 |
| }, |
| { |
| "epoch": 7.16701607267645, |
| "grad_norm": 0.7973098520727987, |
| "learning_rate": 9.705230442008542e-07, |
| "loss": 0.0376, |
| "step": 1282 |
| }, |
| { |
| "epoch": 7.178197064989518, |
| "grad_norm": 0.8759703504057706, |
| "learning_rate": 9.63552077742301e-07, |
| "loss": 0.0385, |
| "step": 1284 |
| }, |
| { |
| "epoch": 7.189378057302585, |
| "grad_norm": 1.0267904937054426, |
| "learning_rate": 9.56600255479469e-07, |
| "loss": 0.0222, |
| "step": 1286 |
| }, |
| { |
| "epoch": 7.200559049615653, |
| "grad_norm": 0.6389768145894307, |
| "learning_rate": 9.4966766403231e-07, |
| "loss": 0.018, |
| "step": 1288 |
| }, |
| { |
| "epoch": 7.211740041928721, |
| "grad_norm": 0.5762313893158477, |
| "learning_rate": 9.427543897811584e-07, |
| "loss": 0.0165, |
| "step": 1290 |
| }, |
| { |
| "epoch": 7.222921034241789, |
| "grad_norm": 0.5902518126138557, |
| "learning_rate": 9.358605188656603e-07, |
| "loss": 0.02, |
| "step": 1292 |
| }, |
| { |
| "epoch": 7.234102026554857, |
| "grad_norm": 0.824105561963567, |
| "learning_rate": 9.289861371836886e-07, |
| "loss": 0.0337, |
| "step": 1294 |
| }, |
| { |
| "epoch": 7.245283018867925, |
| "grad_norm": 0.504698332550927, |
| "learning_rate": 9.22131330390286e-07, |
| "loss": 0.0283, |
| "step": 1296 |
| }, |
| { |
| "epoch": 7.256464011180992, |
| "grad_norm": 0.5789695393721453, |
| "learning_rate": 9.152961838965879e-07, |
| "loss": 0.0169, |
| "step": 1298 |
| }, |
| { |
| "epoch": 7.26764500349406, |
| "grad_norm": 1.4892687104014115, |
| "learning_rate": 9.084807828687628e-07, |
| "loss": 0.0314, |
| "step": 1300 |
| }, |
| { |
| "epoch": 7.278825995807128, |
| "grad_norm": 1.0727067281323632, |
| "learning_rate": 9.016852122269493e-07, |
| "loss": 0.0274, |
| "step": 1302 |
| }, |
| { |
| "epoch": 7.290006988120195, |
| "grad_norm": 0.7309629553367788, |
| "learning_rate": 8.949095566441985e-07, |
| "loss": 0.0219, |
| "step": 1304 |
| }, |
| { |
| "epoch": 7.301187980433263, |
| "grad_norm": 0.6871990809680889, |
| "learning_rate": 8.881539005454215e-07, |
| "loss": 0.0339, |
| "step": 1306 |
| }, |
| { |
| "epoch": 7.312368972746331, |
| "grad_norm": 0.8530617423198913, |
| "learning_rate": 8.814183281063326e-07, |
| "loss": 0.0248, |
| "step": 1308 |
| }, |
| { |
| "epoch": 7.323549965059399, |
| "grad_norm": 0.76651991997128, |
| "learning_rate": 8.747029232524037e-07, |
| "loss": 0.023, |
| "step": 1310 |
| }, |
| { |
| "epoch": 7.334730957372467, |
| "grad_norm": 0.6966547986519114, |
| "learning_rate": 8.680077696578182e-07, |
| "loss": 0.0332, |
| "step": 1312 |
| }, |
| { |
| "epoch": 7.345911949685535, |
| "grad_norm": 1.0873098335521205, |
| "learning_rate": 8.613329507444274e-07, |
| "loss": 0.0234, |
| "step": 1314 |
| }, |
| { |
| "epoch": 7.357092941998602, |
| "grad_norm": 0.6461932986017782, |
| "learning_rate": 8.546785496807116e-07, |
| "loss": 0.0242, |
| "step": 1316 |
| }, |
| { |
| "epoch": 7.36827393431167, |
| "grad_norm": 0.7614414460885182, |
| "learning_rate": 8.480446493807464e-07, |
| "loss": 0.031, |
| "step": 1318 |
| }, |
| { |
| "epoch": 7.379454926624738, |
| "grad_norm": 0.641294466328584, |
| "learning_rate": 8.414313325031642e-07, |
| "loss": 0.028, |
| "step": 1320 |
| }, |
| { |
| "epoch": 7.3906359189378055, |
| "grad_norm": 0.47088954187562415, |
| "learning_rate": 8.348386814501286e-07, |
| "loss": 0.0186, |
| "step": 1322 |
| }, |
| { |
| "epoch": 7.401816911250873, |
| "grad_norm": 0.7909087034714356, |
| "learning_rate": 8.282667783663056e-07, |
| "loss": 0.0212, |
| "step": 1324 |
| }, |
| { |
| "epoch": 7.412997903563941, |
| "grad_norm": 0.8059238279425677, |
| "learning_rate": 8.217157051378411e-07, |
| "loss": 0.0239, |
| "step": 1326 |
| }, |
| { |
| "epoch": 7.424178895877009, |
| "grad_norm": 0.788531385863816, |
| "learning_rate": 8.151855433913414e-07, |
| "loss": 0.0199, |
| "step": 1328 |
| }, |
| { |
| "epoch": 7.435359888190077, |
| "grad_norm": 1.1393964476120448, |
| "learning_rate": 8.086763744928536e-07, |
| "loss": 0.0292, |
| "step": 1330 |
| }, |
| { |
| "epoch": 7.446540880503145, |
| "grad_norm": 0.5408108502649198, |
| "learning_rate": 8.02188279546853e-07, |
| "loss": 0.0146, |
| "step": 1332 |
| }, |
| { |
| "epoch": 7.4577218728162125, |
| "grad_norm": 0.8749206113652656, |
| "learning_rate": 7.957213393952335e-07, |
| "loss": 0.0247, |
| "step": 1334 |
| }, |
| { |
| "epoch": 7.46890286512928, |
| "grad_norm": 0.7053824386402378, |
| "learning_rate": 7.892756346162986e-07, |
| "loss": 0.02, |
| "step": 1336 |
| }, |
| { |
| "epoch": 7.480083857442348, |
| "grad_norm": 0.6965900833846856, |
| "learning_rate": 7.82851245523761e-07, |
| "loss": 0.0315, |
| "step": 1338 |
| }, |
| { |
| "epoch": 7.4912648497554155, |
| "grad_norm": 0.9392067120327887, |
| "learning_rate": 7.764482521657343e-07, |
| "loss": 0.0308, |
| "step": 1340 |
| }, |
| { |
| "epoch": 7.502445842068483, |
| "grad_norm": 0.7074561491918046, |
| "learning_rate": 7.700667343237453e-07, |
| "loss": 0.0171, |
| "step": 1342 |
| }, |
| { |
| "epoch": 7.513626834381551, |
| "grad_norm": 0.7697005768650605, |
| "learning_rate": 7.637067715117327e-07, |
| "loss": 0.0302, |
| "step": 1344 |
| }, |
| { |
| "epoch": 7.5248078266946195, |
| "grad_norm": 1.176668146060272, |
| "learning_rate": 7.573684429750583e-07, |
| "loss": 0.0265, |
| "step": 1346 |
| }, |
| { |
| "epoch": 7.535988819007687, |
| "grad_norm": 0.7258573280389607, |
| "learning_rate": 7.510518276895234e-07, |
| "loss": 0.0257, |
| "step": 1348 |
| }, |
| { |
| "epoch": 7.547169811320755, |
| "grad_norm": 1.1195611459347754, |
| "learning_rate": 7.447570043603755e-07, |
| "loss": 0.0261, |
| "step": 1350 |
| }, |
| { |
| "epoch": 7.5583508036338225, |
| "grad_norm": 0.9527258409378455, |
| "learning_rate": 7.384840514213404e-07, |
| "loss": 0.0524, |
| "step": 1352 |
| }, |
| { |
| "epoch": 7.56953179594689, |
| "grad_norm": 0.7074898357644916, |
| "learning_rate": 7.322330470336314e-07, |
| "loss": 0.0205, |
| "step": 1354 |
| }, |
| { |
| "epoch": 7.580712788259958, |
| "grad_norm": 0.9361424266631929, |
| "learning_rate": 7.26004069084987e-07, |
| "loss": 0.0217, |
| "step": 1356 |
| }, |
| { |
| "epoch": 7.5918937805730256, |
| "grad_norm": 1.7048958108176762, |
| "learning_rate": 7.197971951886956e-07, |
| "loss": 0.0225, |
| "step": 1358 |
| }, |
| { |
| "epoch": 7.603074772886094, |
| "grad_norm": 0.8812767707258257, |
| "learning_rate": 7.13612502682623e-07, |
| "loss": 0.0196, |
| "step": 1360 |
| }, |
| { |
| "epoch": 7.614255765199162, |
| "grad_norm": 0.5682027618905875, |
| "learning_rate": 7.074500686282609e-07, |
| "loss": 0.019, |
| "step": 1362 |
| }, |
| { |
| "epoch": 7.6254367575122295, |
| "grad_norm": 0.4475598932931596, |
| "learning_rate": 7.013099698097539e-07, |
| "loss": 0.0171, |
| "step": 1364 |
| }, |
| { |
| "epoch": 7.636617749825297, |
| "grad_norm": 0.5527498039813922, |
| "learning_rate": 6.951922827329535e-07, |
| "loss": 0.0217, |
| "step": 1366 |
| }, |
| { |
| "epoch": 7.647798742138365, |
| "grad_norm": 0.7984442985333638, |
| "learning_rate": 6.890970836244574e-07, |
| "loss": 0.0361, |
| "step": 1368 |
| }, |
| { |
| "epoch": 7.6589797344514325, |
| "grad_norm": 0.624268450810696, |
| "learning_rate": 6.830244484306623e-07, |
| "loss": 0.0158, |
| "step": 1370 |
| }, |
| { |
| "epoch": 7.6701607267645, |
| "grad_norm": 0.7493822409267487, |
| "learning_rate": 6.769744528168207e-07, |
| "loss": 0.0286, |
| "step": 1372 |
| }, |
| { |
| "epoch": 7.681341719077568, |
| "grad_norm": 0.6787647092695418, |
| "learning_rate": 6.709471721660904e-07, |
| "loss": 0.0215, |
| "step": 1374 |
| }, |
| { |
| "epoch": 7.692522711390636, |
| "grad_norm": 0.7321502006735149, |
| "learning_rate": 6.649426815786045e-07, |
| "loss": 0.0311, |
| "step": 1376 |
| }, |
| { |
| "epoch": 7.703703703703704, |
| "grad_norm": 0.701610396870259, |
| "learning_rate": 6.589610558705284e-07, |
| "loss": 0.0235, |
| "step": 1378 |
| }, |
| { |
| "epoch": 7.714884696016772, |
| "grad_norm": 0.6530846520546149, |
| "learning_rate": 6.53002369573131e-07, |
| "loss": 0.0245, |
| "step": 1380 |
| }, |
| { |
| "epoch": 7.7260656883298395, |
| "grad_norm": 0.7531427984254183, |
| "learning_rate": 6.470666969318554e-07, |
| "loss": 0.0315, |
| "step": 1382 |
| }, |
| { |
| "epoch": 7.737246680642907, |
| "grad_norm": 0.7301669272251805, |
| "learning_rate": 6.41154111905393e-07, |
| "loss": 0.0225, |
| "step": 1384 |
| }, |
| { |
| "epoch": 7.748427672955975, |
| "grad_norm": 0.8707140120777088, |
| "learning_rate": 6.352646881647647e-07, |
| "loss": 0.0259, |
| "step": 1386 |
| }, |
| { |
| "epoch": 7.759608665269043, |
| "grad_norm": 0.837200588883093, |
| "learning_rate": 6.29398499092399e-07, |
| "loss": 0.0474, |
| "step": 1388 |
| }, |
| { |
| "epoch": 7.77078965758211, |
| "grad_norm": 0.973530488120086, |
| "learning_rate": 6.235556177812205e-07, |
| "loss": 0.0329, |
| "step": 1390 |
| }, |
| { |
| "epoch": 7.781970649895178, |
| "grad_norm": 0.5813627298678434, |
| "learning_rate": 6.177361170337376e-07, |
| "loss": 0.0194, |
| "step": 1392 |
| }, |
| { |
| "epoch": 7.793151642208246, |
| "grad_norm": 0.8597088367336019, |
| "learning_rate": 6.119400693611358e-07, |
| "loss": 0.0123, |
| "step": 1394 |
| }, |
| { |
| "epoch": 7.804332634521314, |
| "grad_norm": 0.8368570476462492, |
| "learning_rate": 6.061675469823763e-07, |
| "loss": 0.0227, |
| "step": 1396 |
| }, |
| { |
| "epoch": 7.815513626834382, |
| "grad_norm": 0.5203392914919558, |
| "learning_rate": 6.004186218232933e-07, |
| "loss": 0.0217, |
| "step": 1398 |
| }, |
| { |
| "epoch": 7.82669461914745, |
| "grad_norm": 0.8572153440435842, |
| "learning_rate": 5.946933655156976e-07, |
| "loss": 0.0294, |
| "step": 1400 |
| }, |
| { |
| "epoch": 7.837875611460517, |
| "grad_norm": 0.6862577628733875, |
| "learning_rate": 5.889918493964869e-07, |
| "loss": 0.0228, |
| "step": 1402 |
| }, |
| { |
| "epoch": 7.849056603773585, |
| "grad_norm": 0.7097594226614418, |
| "learning_rate": 5.833141445067541e-07, |
| "loss": 0.0113, |
| "step": 1404 |
| }, |
| { |
| "epoch": 7.860237596086653, |
| "grad_norm": 0.6322499286175502, |
| "learning_rate": 5.776603215909041e-07, |
| "loss": 0.0229, |
| "step": 1406 |
| }, |
| { |
| "epoch": 7.87141858839972, |
| "grad_norm": 0.6798739232739857, |
| "learning_rate": 5.720304510957722e-07, |
| "loss": 0.0257, |
| "step": 1408 |
| }, |
| { |
| "epoch": 7.882599580712788, |
| "grad_norm": 0.6568708401714163, |
| "learning_rate": 5.66424603169744e-07, |
| "loss": 0.0285, |
| "step": 1410 |
| }, |
| { |
| "epoch": 7.893780573025856, |
| "grad_norm": 1.1483908878505031, |
| "learning_rate": 5.608428476618843e-07, |
| "loss": 0.0235, |
| "step": 1412 |
| }, |
| { |
| "epoch": 7.904961565338924, |
| "grad_norm": 0.9297111790590921, |
| "learning_rate": 5.552852541210651e-07, |
| "loss": 0.022, |
| "step": 1414 |
| }, |
| { |
| "epoch": 7.916142557651992, |
| "grad_norm": 0.7288896652277049, |
| "learning_rate": 5.497518917950986e-07, |
| "loss": 0.033, |
| "step": 1416 |
| }, |
| { |
| "epoch": 7.92732354996506, |
| "grad_norm": 1.3241630685241197, |
| "learning_rate": 5.44242829629878e-07, |
| "loss": 0.0236, |
| "step": 1418 |
| }, |
| { |
| "epoch": 7.938504542278127, |
| "grad_norm": 0.6616696784338312, |
| "learning_rate": 5.387581362685112e-07, |
| "loss": 0.03, |
| "step": 1420 |
| }, |
| { |
| "epoch": 7.949685534591195, |
| "grad_norm": 0.9223806906428696, |
| "learning_rate": 5.332978800504742e-07, |
| "loss": 0.0234, |
| "step": 1422 |
| }, |
| { |
| "epoch": 7.960866526904263, |
| "grad_norm": 1.1302104401143789, |
| "learning_rate": 5.278621290107533e-07, |
| "loss": 0.0334, |
| "step": 1424 |
| }, |
| { |
| "epoch": 7.97204751921733, |
| "grad_norm": 0.6145924647383543, |
| "learning_rate": 5.224509508789987e-07, |
| "loss": 0.0205, |
| "step": 1426 |
| }, |
| { |
| "epoch": 7.983228511530398, |
| "grad_norm": 0.6724718918142113, |
| "learning_rate": 5.170644130786842e-07, |
| "loss": 0.0315, |
| "step": 1428 |
| }, |
| { |
| "epoch": 7.994409503843466, |
| "grad_norm": 0.5897709957691004, |
| "learning_rate": 5.117025827262598e-07, |
| "loss": 0.0189, |
| "step": 1430 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1780, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 598197676277760.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|