{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 16482, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00036406662419222717, "grad_norm": 2.203125, "learning_rate": 1.2500000000000002e-07, "loss": 1.4439427852630615, "step": 2 }, { "epoch": 0.0007281332483844543, "grad_norm": 25.875, "learning_rate": 3.75e-07, "loss": 1.8903988599777222, "step": 4 }, { "epoch": 0.0010921998725766816, "grad_norm": 13.1875, "learning_rate": 6.25e-07, "loss": 1.9823973178863525, "step": 6 }, { "epoch": 0.0014562664967689087, "grad_norm": 7.84375, "learning_rate": 8.75e-07, "loss": 1.108595848083496, "step": 8 }, { "epoch": 0.0018203331209611358, "grad_norm": 54.5, "learning_rate": 1.125e-06, "loss": 1.6908321380615234, "step": 10 }, { "epoch": 0.002184399745153363, "grad_norm": 82.5, "learning_rate": 1.3750000000000002e-06, "loss": 2.864623546600342, "step": 12 }, { "epoch": 0.0025484663693455902, "grad_norm": 8.8125, "learning_rate": 1.6250000000000001e-06, "loss": 1.8119561672210693, "step": 14 }, { "epoch": 0.0029125329935378174, "grad_norm": 4.0, "learning_rate": 1.8750000000000003e-06, "loss": 1.419490933418274, "step": 16 }, { "epoch": 0.0032765996177300445, "grad_norm": 20.25, "learning_rate": 2.125e-06, "loss": 1.774035930633545, "step": 18 }, { "epoch": 0.0036406662419222716, "grad_norm": 49.25, "learning_rate": 2.375e-06, "loss": 2.2708888053894043, "step": 20 }, { "epoch": 0.004004732866114499, "grad_norm": 115.5, "learning_rate": 2.6250000000000003e-06, "loss": 2.69008207321167, "step": 22 }, { "epoch": 0.004368799490306726, "grad_norm": 23.125, "learning_rate": 2.875e-06, "loss": 1.6948392391204834, "step": 24 }, { "epoch": 0.004732866114498953, "grad_norm": 11.375, "learning_rate": 3.125e-06, "loss": 1.5246765613555908, "step": 26 }, { "epoch": 0.0050969327386911805, "grad_norm": 17.875, "learning_rate": 3.3750000000000003e-06, "loss": 1.863516092300415, "step": 28 }, { "epoch": 0.005460999362883408, "grad_norm": 15.25, "learning_rate": 3.625e-06, "loss": 1.9239016771316528, "step": 30 }, { "epoch": 0.005825065987075635, "grad_norm": 16.875, "learning_rate": 3.875e-06, "loss": 1.7198164463043213, "step": 32 }, { "epoch": 0.006189132611267862, "grad_norm": 35.0, "learning_rate": 4.125e-06, "loss": 2.3988637924194336, "step": 34 }, { "epoch": 0.006553199235460089, "grad_norm": 14.3125, "learning_rate": 4.3750000000000005e-06, "loss": 1.200141429901123, "step": 36 }, { "epoch": 0.006917265859652316, "grad_norm": 18.75, "learning_rate": 4.625000000000001e-06, "loss": 1.9250798225402832, "step": 38 }, { "epoch": 0.007281332483844543, "grad_norm": 12.875, "learning_rate": 4.875e-06, "loss": 1.8268136978149414, "step": 40 }, { "epoch": 0.007645399108036771, "grad_norm": 10.5, "learning_rate": 5.125e-06, "loss": 1.8823270797729492, "step": 42 }, { "epoch": 0.008009465732228998, "grad_norm": 9.75, "learning_rate": 5.375e-06, "loss": 1.8128286600112915, "step": 44 }, { "epoch": 0.008373532356421225, "grad_norm": 20.0, "learning_rate": 5.625e-06, "loss": 1.9367401599884033, "step": 46 }, { "epoch": 0.008737598980613452, "grad_norm": 11.8125, "learning_rate": 5.8750000000000005e-06, "loss": 1.669650912284851, "step": 48 }, { "epoch": 0.00910166560480568, "grad_norm": 37.5, "learning_rate": 6.125000000000001e-06, "loss": 1.978642225265503, "step": 50 }, { "epoch": 0.009465732228997907, "grad_norm": 12.4375, "learning_rate": 6.375e-06, "loss": 1.313441276550293, "step": 52 }, { "epoch": 0.009829798853190134, "grad_norm": 2.71875, "learning_rate": 6.625e-06, "loss": 1.225853443145752, "step": 54 }, { "epoch": 0.010193865477382361, "grad_norm": 3.625, "learning_rate": 6.875e-06, "loss": 1.0497552156448364, "step": 56 }, { "epoch": 0.010557932101574588, "grad_norm": 26.125, "learning_rate": 7.125e-06, "loss": 1.9384973049163818, "step": 58 }, { "epoch": 0.010921998725766815, "grad_norm": 5.125, "learning_rate": 7.375000000000001e-06, "loss": 1.4457396268844604, "step": 60 }, { "epoch": 0.011286065349959042, "grad_norm": 12.75, "learning_rate": 7.625e-06, "loss": 1.929830551147461, "step": 62 }, { "epoch": 0.01165013197415127, "grad_norm": 4.96875, "learning_rate": 7.875e-06, "loss": 1.4362810850143433, "step": 64 }, { "epoch": 0.012014198598343497, "grad_norm": 13.6875, "learning_rate": 8.125000000000001e-06, "loss": 1.7963954210281372, "step": 66 }, { "epoch": 0.012378265222535724, "grad_norm": 14.6875, "learning_rate": 8.375e-06, "loss": 1.2056946754455566, "step": 68 }, { "epoch": 0.01274233184672795, "grad_norm": 7.625, "learning_rate": 8.625000000000001e-06, "loss": 0.9177781343460083, "step": 70 }, { "epoch": 0.013106398470920178, "grad_norm": 15.3125, "learning_rate": 8.875e-06, "loss": 1.7942477464675903, "step": 72 }, { "epoch": 0.013470465095112405, "grad_norm": 30.125, "learning_rate": 9.125e-06, "loss": 1.7084472179412842, "step": 74 }, { "epoch": 0.013834531719304632, "grad_norm": 9.5625, "learning_rate": 9.375000000000001e-06, "loss": 1.5940330028533936, "step": 76 }, { "epoch": 0.01419859834349686, "grad_norm": 23.125, "learning_rate": 9.625e-06, "loss": 1.7003000974655151, "step": 78 }, { "epoch": 0.014562664967689086, "grad_norm": 15.3125, "learning_rate": 9.875000000000001e-06, "loss": 1.2153486013412476, "step": 80 }, { "epoch": 0.014926731591881313, "grad_norm": 11.625, "learning_rate": 9.999999926627027e-06, "loss": 1.7893983125686646, "step": 82 }, { "epoch": 0.015290798216073542, "grad_norm": 3.53125, "learning_rate": 9.999999339643249e-06, "loss": 1.1524908542633057, "step": 84 }, { "epoch": 0.015654864840265768, "grad_norm": 8.125, "learning_rate": 9.999998165675777e-06, "loss": 1.632883071899414, "step": 86 }, { "epoch": 0.016018931464457997, "grad_norm": 10.4375, "learning_rate": 9.999996404724785e-06, "loss": 1.738683819770813, "step": 88 }, { "epoch": 0.016382998088650222, "grad_norm": 14.9375, "learning_rate": 9.999994056790531e-06, "loss": 1.5707422494888306, "step": 90 }, { "epoch": 0.01674706471284245, "grad_norm": 7.90625, "learning_rate": 9.999991121873363e-06, "loss": 1.6899995803833008, "step": 92 }, { "epoch": 0.017111131337034676, "grad_norm": 11.5, "learning_rate": 9.999987599973705e-06, "loss": 1.8284931182861328, "step": 94 }, { "epoch": 0.017475197961226905, "grad_norm": 8.4375, "learning_rate": 9.999983491092077e-06, "loss": 1.0054672956466675, "step": 96 }, { "epoch": 0.01783926458541913, "grad_norm": 22.625, "learning_rate": 9.999978795229084e-06, "loss": 2.263137102127075, "step": 98 }, { "epoch": 0.01820333120961136, "grad_norm": 6.84375, "learning_rate": 9.999973512385412e-06, "loss": 1.502677321434021, "step": 100 }, { "epoch": 0.018567397833803585, "grad_norm": 10.6875, "learning_rate": 9.999967642561839e-06, "loss": 1.5737406015396118, "step": 102 }, { "epoch": 0.018931464457995813, "grad_norm": 11.5625, "learning_rate": 9.999961185759224e-06, "loss": 1.5635281801223755, "step": 104 }, { "epoch": 0.01929553108218804, "grad_norm": 83.5, "learning_rate": 9.999954141978516e-06, "loss": 1.48868989944458, "step": 106 }, { "epoch": 0.019659597706380268, "grad_norm": 34.0, "learning_rate": 9.999946511220748e-06, "loss": 1.5162310600280762, "step": 108 }, { "epoch": 0.020023664330572493, "grad_norm": 9.5, "learning_rate": 9.99993829348704e-06, "loss": 1.5604157447814941, "step": 110 }, { "epoch": 0.020387730954764722, "grad_norm": 10.75, "learning_rate": 9.999929488778595e-06, "loss": 1.359964370727539, "step": 112 }, { "epoch": 0.02075179757895695, "grad_norm": 10.5625, "learning_rate": 9.999920097096712e-06, "loss": 1.7113839387893677, "step": 114 }, { "epoch": 0.021115864203149176, "grad_norm": 23.0, "learning_rate": 9.999910118442761e-06, "loss": 1.6603569984436035, "step": 116 }, { "epoch": 0.021479930827341405, "grad_norm": 35.25, "learning_rate": 9.999899552818212e-06, "loss": 1.904317855834961, "step": 118 }, { "epoch": 0.02184399745153363, "grad_norm": 9.25, "learning_rate": 9.999888400224615e-06, "loss": 1.069483757019043, "step": 120 }, { "epoch": 0.02220806407572586, "grad_norm": 5.1875, "learning_rate": 9.999876660663605e-06, "loss": 1.128413438796997, "step": 122 }, { "epoch": 0.022572130699918085, "grad_norm": 5.03125, "learning_rate": 9.999864334136904e-06, "loss": 1.5015290975570679, "step": 124 }, { "epoch": 0.022936197324110313, "grad_norm": 13.6875, "learning_rate": 9.999851420646323e-06, "loss": 1.1502019166946411, "step": 126 }, { "epoch": 0.02330026394830254, "grad_norm": 11.6875, "learning_rate": 9.999837920193756e-06, "loss": 1.7315152883529663, "step": 128 }, { "epoch": 0.023664330572494768, "grad_norm": 16.75, "learning_rate": 9.999823832781184e-06, "loss": 1.6791443824768066, "step": 130 }, { "epoch": 0.024028397196686993, "grad_norm": 8.625, "learning_rate": 9.999809158410674e-06, "loss": 1.6817721128463745, "step": 132 }, { "epoch": 0.024392463820879222, "grad_norm": 17.625, "learning_rate": 9.99979389708438e-06, "loss": 1.598803997039795, "step": 134 }, { "epoch": 0.024756530445071447, "grad_norm": 16.0, "learning_rate": 9.999778048804541e-06, "loss": 1.5391294956207275, "step": 136 }, { "epoch": 0.025120597069263676, "grad_norm": 12.5, "learning_rate": 9.999761613573484e-06, "loss": 1.6425312757492065, "step": 138 }, { "epoch": 0.0254846636934559, "grad_norm": 10.3125, "learning_rate": 9.999744591393619e-06, "loss": 1.5416018962860107, "step": 140 }, { "epoch": 0.02584873031764813, "grad_norm": 7.6875, "learning_rate": 9.999726982267444e-06, "loss": 0.9722456932067871, "step": 142 }, { "epoch": 0.026212796941840356, "grad_norm": 9.8125, "learning_rate": 9.999708786197546e-06, "loss": 1.7667213678359985, "step": 144 }, { "epoch": 0.026576863566032585, "grad_norm": 22.25, "learning_rate": 9.99969000318659e-06, "loss": 1.0348119735717773, "step": 146 }, { "epoch": 0.02694093019022481, "grad_norm": 4.3125, "learning_rate": 9.999670633237337e-06, "loss": 0.9323269128799438, "step": 148 }, { "epoch": 0.02730499681441704, "grad_norm": 3.8125, "learning_rate": 9.99965067635263e-06, "loss": 0.9375461339950562, "step": 150 }, { "epoch": 0.027669063438609264, "grad_norm": 30.5, "learning_rate": 9.999630132535391e-06, "loss": 1.8954745531082153, "step": 152 }, { "epoch": 0.028033130062801493, "grad_norm": 13.6875, "learning_rate": 9.999609001788643e-06, "loss": 1.5002610683441162, "step": 154 }, { "epoch": 0.02839719668699372, "grad_norm": 7.59375, "learning_rate": 9.999587284115482e-06, "loss": 1.7262972593307495, "step": 156 }, { "epoch": 0.028761263311185947, "grad_norm": 7.03125, "learning_rate": 9.999564979519097e-06, "loss": 1.6420605182647705, "step": 158 }, { "epoch": 0.029125329935378173, "grad_norm": 8.375, "learning_rate": 9.999542088002755e-06, "loss": 1.707388162612915, "step": 160 }, { "epoch": 0.0294893965595704, "grad_norm": 7.84375, "learning_rate": 9.999518609569824e-06, "loss": 1.618377923965454, "step": 162 }, { "epoch": 0.029853463183762627, "grad_norm": 3.359375, "learning_rate": 9.999494544223747e-06, "loss": 1.1561410427093506, "step": 164 }, { "epoch": 0.030217529807954856, "grad_norm": 24.25, "learning_rate": 9.999469891968052e-06, "loss": 1.6138756275177002, "step": 166 }, { "epoch": 0.030581596432147085, "grad_norm": 9.125, "learning_rate": 9.999444652806361e-06, "loss": 0.8504073619842529, "step": 168 }, { "epoch": 0.03094566305633931, "grad_norm": 8.9375, "learning_rate": 9.999418826742373e-06, "loss": 1.608824610710144, "step": 170 }, { "epoch": 0.031309729680531535, "grad_norm": 14.25, "learning_rate": 9.999392413779883e-06, "loss": 1.5359747409820557, "step": 172 }, { "epoch": 0.031673796304723764, "grad_norm": 9.5, "learning_rate": 9.999365413922762e-06, "loss": 1.133074164390564, "step": 174 }, { "epoch": 0.03203786292891599, "grad_norm": 34.25, "learning_rate": 9.999337827174975e-06, "loss": 1.6861637830734253, "step": 176 }, { "epoch": 0.03240192955310822, "grad_norm": 13.25, "learning_rate": 9.99930965354057e-06, "loss": 1.4610446691513062, "step": 178 }, { "epoch": 0.032765996177300444, "grad_norm": 8.8125, "learning_rate": 9.999280893023682e-06, "loss": 1.3413668870925903, "step": 180 }, { "epoch": 0.03313006280149267, "grad_norm": 46.75, "learning_rate": 9.99925154562853e-06, "loss": 2.047168254852295, "step": 182 }, { "epoch": 0.0334941294256849, "grad_norm": 2.484375, "learning_rate": 9.99922161135942e-06, "loss": 1.0822950601577759, "step": 184 }, { "epoch": 0.03385819604987713, "grad_norm": 11.75, "learning_rate": 9.999191090220748e-06, "loss": 1.5762875080108643, "step": 186 }, { "epoch": 0.03422226267406935, "grad_norm": 4.375, "learning_rate": 9.99915998221699e-06, "loss": 1.0849459171295166, "step": 188 }, { "epoch": 0.03458632929826158, "grad_norm": 41.0, "learning_rate": 9.99912828735271e-06, "loss": 1.8276569843292236, "step": 190 }, { "epoch": 0.03495039592245381, "grad_norm": 7.0625, "learning_rate": 9.999096005632565e-06, "loss": 1.6723151206970215, "step": 192 }, { "epoch": 0.03531446254664604, "grad_norm": 35.5, "learning_rate": 9.999063137061284e-06, "loss": 0.9587855935096741, "step": 194 }, { "epoch": 0.03567852917083826, "grad_norm": 13.3125, "learning_rate": 9.999029681643694e-06, "loss": 1.6144752502441406, "step": 196 }, { "epoch": 0.03604259579503049, "grad_norm": 22.75, "learning_rate": 9.998995639384709e-06, "loss": 1.3902058601379395, "step": 198 }, { "epoch": 0.03640666241922272, "grad_norm": 18.0, "learning_rate": 9.99896101028932e-06, "loss": 1.6762981414794922, "step": 200 }, { "epoch": 0.03677072904341495, "grad_norm": 13.1875, "learning_rate": 9.998925794362606e-06, "loss": 0.8191450238227844, "step": 202 }, { "epoch": 0.03713479566760717, "grad_norm": 16.25, "learning_rate": 9.99888999160974e-06, "loss": 0.9685258865356445, "step": 204 }, { "epoch": 0.0374988622917994, "grad_norm": 17.0, "learning_rate": 9.998853602035974e-06, "loss": 1.5055614709854126, "step": 206 }, { "epoch": 0.03786292891599163, "grad_norm": 13.1875, "learning_rate": 9.998816625646646e-06, "loss": 1.3083559274673462, "step": 208 }, { "epoch": 0.038226995540183856, "grad_norm": 7.6875, "learning_rate": 9.998779062447183e-06, "loss": 1.0685100555419922, "step": 210 }, { "epoch": 0.03859106216437608, "grad_norm": 11.5, "learning_rate": 9.9987409124431e-06, "loss": 1.6704621315002441, "step": 212 }, { "epoch": 0.038955128788568306, "grad_norm": 14.625, "learning_rate": 9.998702175639997e-06, "loss": 0.9525822401046753, "step": 214 }, { "epoch": 0.039319195412760535, "grad_norm": 4.25, "learning_rate": 9.998662852043551e-06, "loss": 1.2631767988204956, "step": 216 }, { "epoch": 0.039683262036952764, "grad_norm": 21.875, "learning_rate": 9.998622941659538e-06, "loss": 1.7770329713821411, "step": 218 }, { "epoch": 0.040047328661144986, "grad_norm": 8.3125, "learning_rate": 9.998582444493812e-06, "loss": 1.6833420991897583, "step": 220 }, { "epoch": 0.040411395285337215, "grad_norm": 3.28125, "learning_rate": 9.998541360552318e-06, "loss": 1.1433168649673462, "step": 222 }, { "epoch": 0.040775461909529444, "grad_norm": 12.8125, "learning_rate": 9.998499689841084e-06, "loss": 1.5833598375320435, "step": 224 }, { "epoch": 0.04113952853372167, "grad_norm": 38.0, "learning_rate": 9.998457432366225e-06, "loss": 1.5597223043441772, "step": 226 }, { "epoch": 0.0415035951579139, "grad_norm": 9.25, "learning_rate": 9.998414588133943e-06, "loss": 1.6640405654907227, "step": 228 }, { "epoch": 0.04186766178210612, "grad_norm": 5.09375, "learning_rate": 9.998371157150522e-06, "loss": 1.2285068035125732, "step": 230 }, { "epoch": 0.04223172840629835, "grad_norm": 8.625, "learning_rate": 9.998327139422339e-06, "loss": 1.5521577596664429, "step": 232 }, { "epoch": 0.04259579503049058, "grad_norm": 9.625, "learning_rate": 9.998282534955851e-06, "loss": 1.3443025350570679, "step": 234 }, { "epoch": 0.04295986165468281, "grad_norm": 21.75, "learning_rate": 9.998237343757606e-06, "loss": 2.068732738494873, "step": 236 }, { "epoch": 0.04332392827887503, "grad_norm": 5.3125, "learning_rate": 9.998191565834235e-06, "loss": 0.8551888465881348, "step": 238 }, { "epoch": 0.04368799490306726, "grad_norm": 14.25, "learning_rate": 9.998145201192453e-06, "loss": 1.6422497034072876, "step": 240 }, { "epoch": 0.04405206152725949, "grad_norm": 6.25, "learning_rate": 9.998098249839067e-06, "loss": 0.9000154733657837, "step": 242 }, { "epoch": 0.04441612815145172, "grad_norm": 26.125, "learning_rate": 9.998050711780964e-06, "loss": 1.3616223335266113, "step": 244 }, { "epoch": 0.04478019477564394, "grad_norm": 24.5, "learning_rate": 9.998002587025124e-06, "loss": 1.8066420555114746, "step": 246 }, { "epoch": 0.04514426139983617, "grad_norm": 12.875, "learning_rate": 9.997953875578608e-06, "loss": 1.7318825721740723, "step": 248 }, { "epoch": 0.0455083280240284, "grad_norm": 11.6875, "learning_rate": 9.997904577448561e-06, "loss": 1.0848517417907715, "step": 250 }, { "epoch": 0.04587239464822063, "grad_norm": 9.8125, "learning_rate": 9.99785469264222e-06, "loss": 1.163558006286621, "step": 252 }, { "epoch": 0.04623646127241285, "grad_norm": 12.4375, "learning_rate": 9.997804221166903e-06, "loss": 2.1979146003723145, "step": 254 }, { "epoch": 0.04660052789660508, "grad_norm": 17.75, "learning_rate": 9.99775316303002e-06, "loss": 1.7131332159042358, "step": 256 }, { "epoch": 0.046964594520797306, "grad_norm": 16.625, "learning_rate": 9.99770151823906e-06, "loss": 0.8940047025680542, "step": 258 }, { "epoch": 0.047328661144989535, "grad_norm": 10.3125, "learning_rate": 9.997649286801605e-06, "loss": 1.5393319129943848, "step": 260 }, { "epoch": 0.04769272776918176, "grad_norm": 10.0625, "learning_rate": 9.997596468725319e-06, "loss": 2.0852997303009033, "step": 262 }, { "epoch": 0.048056794393373986, "grad_norm": 2.578125, "learning_rate": 9.997543064017949e-06, "loss": 1.1634950637817383, "step": 264 }, { "epoch": 0.048420861017566215, "grad_norm": 41.25, "learning_rate": 9.997489072687338e-06, "loss": 0.6052025556564331, "step": 266 }, { "epoch": 0.048784927641758444, "grad_norm": 10.5, "learning_rate": 9.997434494741406e-06, "loss": 1.594040036201477, "step": 268 }, { "epoch": 0.049148994265950666, "grad_norm": 9.875, "learning_rate": 9.997379330188159e-06, "loss": 0.9371297359466553, "step": 270 }, { "epoch": 0.049513060890142895, "grad_norm": 9.5625, "learning_rate": 9.997323579035698e-06, "loss": 1.5109306573867798, "step": 272 }, { "epoch": 0.04987712751433512, "grad_norm": 5.6875, "learning_rate": 9.997267241292202e-06, "loss": 0.7313521504402161, "step": 274 }, { "epoch": 0.05024119413852735, "grad_norm": 2.390625, "learning_rate": 9.997210316965935e-06, "loss": 0.7774074077606201, "step": 276 }, { "epoch": 0.05060526076271958, "grad_norm": 5.84375, "learning_rate": 9.997152806065255e-06, "loss": 1.225516676902771, "step": 278 }, { "epoch": 0.0509693273869118, "grad_norm": 23.25, "learning_rate": 9.9970947085986e-06, "loss": 0.763280987739563, "step": 280 }, { "epoch": 0.05133339401110403, "grad_norm": 50.0, "learning_rate": 9.997036024574495e-06, "loss": 1.6328344345092773, "step": 282 }, { "epoch": 0.05169746063529626, "grad_norm": 22.875, "learning_rate": 9.996976754001552e-06, "loss": 1.7208383083343506, "step": 284 }, { "epoch": 0.05206152725948849, "grad_norm": 17.375, "learning_rate": 9.99691689688847e-06, "loss": 1.9400101900100708, "step": 286 }, { "epoch": 0.05242559388368071, "grad_norm": 3.328125, "learning_rate": 9.996856453244029e-06, "loss": 1.0347318649291992, "step": 288 }, { "epoch": 0.05278966050787294, "grad_norm": 53.75, "learning_rate": 9.996795423077101e-06, "loss": 2.3298144340515137, "step": 290 }, { "epoch": 0.05315372713206517, "grad_norm": 26.125, "learning_rate": 9.996733806396646e-06, "loss": 2.0908212661743164, "step": 292 }, { "epoch": 0.0535177937562574, "grad_norm": 23.375, "learning_rate": 9.996671603211699e-06, "loss": 1.9290943145751953, "step": 294 }, { "epoch": 0.05388186038044962, "grad_norm": 6.84375, "learning_rate": 9.996608813531392e-06, "loss": 1.1384938955307007, "step": 296 }, { "epoch": 0.05424592700464185, "grad_norm": 9.25, "learning_rate": 9.99654543736494e-06, "loss": 1.6162941455841064, "step": 298 }, { "epoch": 0.05460999362883408, "grad_norm": 11.8125, "learning_rate": 9.996481474721638e-06, "loss": 1.863888144493103, "step": 300 }, { "epoch": 0.054974060253026306, "grad_norm": 4.15625, "learning_rate": 9.99641692561088e-06, "loss": 1.0767062902450562, "step": 302 }, { "epoch": 0.05533812687721853, "grad_norm": 3.78125, "learning_rate": 9.996351790042132e-06, "loss": 1.0521377325057983, "step": 304 }, { "epoch": 0.05570219350141076, "grad_norm": 18.75, "learning_rate": 9.996286068024956e-06, "loss": 1.1314442157745361, "step": 306 }, { "epoch": 0.056066260125602986, "grad_norm": 37.5, "learning_rate": 9.996219759568992e-06, "loss": 1.8040153980255127, "step": 308 }, { "epoch": 0.056430326749795215, "grad_norm": 8.375, "learning_rate": 9.996152864683977e-06, "loss": 1.5515364408493042, "step": 310 }, { "epoch": 0.05679439337398744, "grad_norm": 36.75, "learning_rate": 9.996085383379724e-06, "loss": 0.8420185446739197, "step": 312 }, { "epoch": 0.057158459998179666, "grad_norm": 18.625, "learning_rate": 9.996017315666134e-06, "loss": 1.5096814632415771, "step": 314 }, { "epoch": 0.057522526622371895, "grad_norm": 19.0, "learning_rate": 9.995948661553196e-06, "loss": 1.18393874168396, "step": 316 }, { "epoch": 0.05788659324656412, "grad_norm": 26.375, "learning_rate": 9.995879421050989e-06, "loss": 2.3062312602996826, "step": 318 }, { "epoch": 0.058250659870756345, "grad_norm": 11.8125, "learning_rate": 9.99580959416967e-06, "loss": 1.6267517805099487, "step": 320 }, { "epoch": 0.058614726494948574, "grad_norm": 31.625, "learning_rate": 9.995739180919487e-06, "loss": 0.3946562707424164, "step": 322 }, { "epoch": 0.0589787931191408, "grad_norm": 26.5, "learning_rate": 9.99566818131077e-06, "loss": 1.8692868947982788, "step": 324 }, { "epoch": 0.05934285974333303, "grad_norm": 55.25, "learning_rate": 9.995596595353943e-06, "loss": 1.4824076890945435, "step": 326 }, { "epoch": 0.059706926367525254, "grad_norm": 60.5, "learning_rate": 9.995524423059508e-06, "loss": 1.873770833015442, "step": 328 }, { "epoch": 0.06007099299171748, "grad_norm": 22.875, "learning_rate": 9.995451664438057e-06, "loss": 2.0384387969970703, "step": 330 }, { "epoch": 0.06043505961590971, "grad_norm": 11.875, "learning_rate": 9.995378319500264e-06, "loss": 1.7458500862121582, "step": 332 }, { "epoch": 0.06079912624010194, "grad_norm": 18.375, "learning_rate": 9.995304388256898e-06, "loss": 1.488701343536377, "step": 334 }, { "epoch": 0.06116319286429417, "grad_norm": 30.625, "learning_rate": 9.9952298707188e-06, "loss": 1.4240211248397827, "step": 336 }, { "epoch": 0.06152725948848639, "grad_norm": 28.0, "learning_rate": 9.995154766896913e-06, "loss": 1.5667370557785034, "step": 338 }, { "epoch": 0.06189132611267862, "grad_norm": 14.5, "learning_rate": 9.995079076802254e-06, "loss": 1.5257785320281982, "step": 340 }, { "epoch": 0.06225539273687085, "grad_norm": 5.6875, "learning_rate": 9.995002800445932e-06, "loss": 1.3201079368591309, "step": 342 }, { "epoch": 0.06261945936106307, "grad_norm": 15.0, "learning_rate": 9.99492593783914e-06, "loss": 1.531526803970337, "step": 344 }, { "epoch": 0.0629835259852553, "grad_norm": 12.5, "learning_rate": 9.994848488993155e-06, "loss": 1.606310486793518, "step": 346 }, { "epoch": 0.06334759260944753, "grad_norm": 22.5, "learning_rate": 9.994770453919343e-06, "loss": 1.9906249046325684, "step": 348 }, { "epoch": 0.06371165923363975, "grad_norm": 14.75, "learning_rate": 9.994691832629157e-06, "loss": 1.6271324157714844, "step": 350 }, { "epoch": 0.06407572585783199, "grad_norm": 22.0, "learning_rate": 9.994612625134134e-06, "loss": 1.7152941226959229, "step": 352 }, { "epoch": 0.06443979248202421, "grad_norm": 6.21875, "learning_rate": 9.994532831445898e-06, "loss": 1.098881721496582, "step": 354 }, { "epoch": 0.06480385910621644, "grad_norm": 9.25, "learning_rate": 9.994452451576155e-06, "loss": 1.5961560010910034, "step": 356 }, { "epoch": 0.06516792573040867, "grad_norm": 14.125, "learning_rate": 9.994371485536705e-06, "loss": 1.7563493251800537, "step": 358 }, { "epoch": 0.06553199235460089, "grad_norm": 10.6875, "learning_rate": 9.994289933339426e-06, "loss": 1.4499320983886719, "step": 360 }, { "epoch": 0.06589605897879312, "grad_norm": 8.8125, "learning_rate": 9.994207794996289e-06, "loss": 1.6752084493637085, "step": 362 }, { "epoch": 0.06626012560298535, "grad_norm": 6.0625, "learning_rate": 9.994125070519343e-06, "loss": 1.204414963722229, "step": 364 }, { "epoch": 0.06662419222717757, "grad_norm": 9.375, "learning_rate": 9.99404175992073e-06, "loss": 1.204558253288269, "step": 366 }, { "epoch": 0.0669882588513698, "grad_norm": 11.1875, "learning_rate": 9.993957863212676e-06, "loss": 1.3985226154327393, "step": 368 }, { "epoch": 0.06735232547556202, "grad_norm": 12.875, "learning_rate": 9.993873380407491e-06, "loss": 0.8727314472198486, "step": 370 }, { "epoch": 0.06771639209975426, "grad_norm": 11.3125, "learning_rate": 9.993788311517574e-06, "loss": 1.6418206691741943, "step": 372 }, { "epoch": 0.06808045872394648, "grad_norm": 17.875, "learning_rate": 9.993702656555406e-06, "loss": 1.4980840682983398, "step": 374 }, { "epoch": 0.0684445253481387, "grad_norm": 9.0625, "learning_rate": 9.99361641553356e-06, "loss": 1.5730842351913452, "step": 376 }, { "epoch": 0.06880859197233094, "grad_norm": 9.1875, "learning_rate": 9.993529588464688e-06, "loss": 1.6968036890029907, "step": 378 }, { "epoch": 0.06917265859652316, "grad_norm": 14.375, "learning_rate": 9.993442175361534e-06, "loss": 1.8771188259124756, "step": 380 }, { "epoch": 0.06953672522071538, "grad_norm": 12.5, "learning_rate": 9.993354176236925e-06, "loss": 1.5471888780593872, "step": 382 }, { "epoch": 0.06990079184490762, "grad_norm": 15.625, "learning_rate": 9.993265591103774e-06, "loss": 1.749687910079956, "step": 384 }, { "epoch": 0.07026485846909984, "grad_norm": 4.6875, "learning_rate": 9.993176419975082e-06, "loss": 1.1482858657836914, "step": 386 }, { "epoch": 0.07062892509329208, "grad_norm": 44.75, "learning_rate": 9.993086662863931e-06, "loss": 0.842327356338501, "step": 388 }, { "epoch": 0.0709929917174843, "grad_norm": 11.625, "learning_rate": 9.992996319783496e-06, "loss": 1.451005458831787, "step": 390 }, { "epoch": 0.07135705834167652, "grad_norm": 10.75, "learning_rate": 9.992905390747035e-06, "loss": 1.5364468097686768, "step": 392 }, { "epoch": 0.07172112496586876, "grad_norm": 17.0, "learning_rate": 9.992813875767889e-06, "loss": 1.2617599964141846, "step": 394 }, { "epoch": 0.07208519159006098, "grad_norm": 10.4375, "learning_rate": 9.992721774859487e-06, "loss": 1.686112403869629, "step": 396 }, { "epoch": 0.07244925821425321, "grad_norm": 10.0, "learning_rate": 9.992629088035344e-06, "loss": 1.6116350889205933, "step": 398 }, { "epoch": 0.07281332483844544, "grad_norm": 7.65625, "learning_rate": 9.992535815309065e-06, "loss": 0.8564869165420532, "step": 400 }, { "epoch": 0.07317739146263766, "grad_norm": 22.25, "learning_rate": 9.992441956694337e-06, "loss": 2.0098888874053955, "step": 402 }, { "epoch": 0.0735414580868299, "grad_norm": 10.1875, "learning_rate": 9.992347512204929e-06, "loss": 1.6706129312515259, "step": 404 }, { "epoch": 0.07390552471102212, "grad_norm": 90.0, "learning_rate": 9.992252481854705e-06, "loss": 1.5262184143066406, "step": 406 }, { "epoch": 0.07426959133521434, "grad_norm": 8.125, "learning_rate": 9.992156865657608e-06, "loss": 1.7168821096420288, "step": 408 }, { "epoch": 0.07463365795940657, "grad_norm": 12.1875, "learning_rate": 9.992060663627669e-06, "loss": 1.509940505027771, "step": 410 }, { "epoch": 0.0749977245835988, "grad_norm": 10.5, "learning_rate": 9.991963875779007e-06, "loss": 1.7270442247390747, "step": 412 }, { "epoch": 0.07536179120779103, "grad_norm": 6.1875, "learning_rate": 9.991866502125822e-06, "loss": 1.1227549314498901, "step": 414 }, { "epoch": 0.07572585783198325, "grad_norm": 37.5, "learning_rate": 9.991768542682409e-06, "loss": 1.3501100540161133, "step": 416 }, { "epoch": 0.07608992445617548, "grad_norm": 26.375, "learning_rate": 9.991669997463139e-06, "loss": 2.197963237762451, "step": 418 }, { "epoch": 0.07645399108036771, "grad_norm": 9.75, "learning_rate": 9.991570866482471e-06, "loss": 1.4900336265563965, "step": 420 }, { "epoch": 0.07681805770455993, "grad_norm": 7.15625, "learning_rate": 9.991471149754957e-06, "loss": 1.4062421321868896, "step": 422 }, { "epoch": 0.07718212432875216, "grad_norm": 8.375, "learning_rate": 9.991370847295228e-06, "loss": 1.5279009342193604, "step": 424 }, { "epoch": 0.07754619095294439, "grad_norm": 18.375, "learning_rate": 9.991269959118002e-06, "loss": 1.131893515586853, "step": 426 }, { "epoch": 0.07791025757713661, "grad_norm": 15.625, "learning_rate": 9.991168485238083e-06, "loss": 1.4996836185455322, "step": 428 }, { "epoch": 0.07827432420132885, "grad_norm": 15.0625, "learning_rate": 9.991066425670365e-06, "loss": 1.921349048614502, "step": 430 }, { "epoch": 0.07863839082552107, "grad_norm": 9.0, "learning_rate": 9.990963780429824e-06, "loss": 1.4701411724090576, "step": 432 }, { "epoch": 0.07900245744971329, "grad_norm": 9.5625, "learning_rate": 9.990860549531522e-06, "loss": 1.564692735671997, "step": 434 }, { "epoch": 0.07936652407390553, "grad_norm": 12.625, "learning_rate": 9.990756732990607e-06, "loss": 1.6308060884475708, "step": 436 }, { "epoch": 0.07973059069809775, "grad_norm": 9.125, "learning_rate": 9.990652330822315e-06, "loss": 1.5474727153778076, "step": 438 }, { "epoch": 0.08009465732228997, "grad_norm": 22.0, "learning_rate": 9.990547343041968e-06, "loss": 1.6923563480377197, "step": 440 }, { "epoch": 0.08045872394648221, "grad_norm": 3.25, "learning_rate": 9.990441769664969e-06, "loss": 1.285474419593811, "step": 442 }, { "epoch": 0.08082279057067443, "grad_norm": 25.75, "learning_rate": 9.990335610706812e-06, "loss": 2.203094005584717, "step": 444 }, { "epoch": 0.08118685719486667, "grad_norm": 6.21875, "learning_rate": 9.990228866183076e-06, "loss": 1.5876837968826294, "step": 446 }, { "epoch": 0.08155092381905889, "grad_norm": 16.125, "learning_rate": 9.990121536109423e-06, "loss": 0.9697806239128113, "step": 448 }, { "epoch": 0.08191499044325111, "grad_norm": 15.0, "learning_rate": 9.990013620501609e-06, "loss": 1.0794289112091064, "step": 450 }, { "epoch": 0.08227905706744335, "grad_norm": 10.0, "learning_rate": 9.989905119375463e-06, "loss": 1.4529541730880737, "step": 452 }, { "epoch": 0.08264312369163557, "grad_norm": 53.75, "learning_rate": 9.98979603274691e-06, "loss": 1.9725878238677979, "step": 454 }, { "epoch": 0.0830071903158278, "grad_norm": 12.375, "learning_rate": 9.98968636063196e-06, "loss": 1.691248893737793, "step": 456 }, { "epoch": 0.08337125694002002, "grad_norm": 15.0625, "learning_rate": 9.989576103046706e-06, "loss": 0.7653573751449585, "step": 458 }, { "epoch": 0.08373532356421225, "grad_norm": 7.96875, "learning_rate": 9.989465260007326e-06, "loss": 1.3986103534698486, "step": 460 }, { "epoch": 0.08409939018840448, "grad_norm": 32.5, "learning_rate": 9.989353831530089e-06, "loss": 2.173577070236206, "step": 462 }, { "epoch": 0.0844634568125967, "grad_norm": 9.125, "learning_rate": 9.989241817631344e-06, "loss": 1.509488821029663, "step": 464 }, { "epoch": 0.08482752343678893, "grad_norm": 27.25, "learning_rate": 9.989129218327526e-06, "loss": 2.010396957397461, "step": 466 }, { "epoch": 0.08519159006098116, "grad_norm": 8.375, "learning_rate": 9.989016033635164e-06, "loss": 1.5934710502624512, "step": 468 }, { "epoch": 0.08555565668517338, "grad_norm": 9.625, "learning_rate": 9.988902263570865e-06, "loss": 1.3932366371154785, "step": 470 }, { "epoch": 0.08591972330936562, "grad_norm": 35.25, "learning_rate": 9.988787908151326e-06, "loss": 0.9453686475753784, "step": 472 }, { "epoch": 0.08628378993355784, "grad_norm": 14.625, "learning_rate": 9.988672967393325e-06, "loss": 1.237920880317688, "step": 474 }, { "epoch": 0.08664785655775006, "grad_norm": 14.1875, "learning_rate": 9.98855744131373e-06, "loss": 0.9060868620872498, "step": 476 }, { "epoch": 0.0870119231819423, "grad_norm": 14.0625, "learning_rate": 9.988441329929497e-06, "loss": 1.101029872894287, "step": 478 }, { "epoch": 0.08737598980613452, "grad_norm": 10.875, "learning_rate": 9.98832463325766e-06, "loss": 1.4590922594070435, "step": 480 }, { "epoch": 0.08774005643032674, "grad_norm": 13.25, "learning_rate": 9.988207351315349e-06, "loss": 1.5289921760559082, "step": 482 }, { "epoch": 0.08810412305451898, "grad_norm": 8.25, "learning_rate": 9.98808948411977e-06, "loss": 1.443752646446228, "step": 484 }, { "epoch": 0.0884681896787112, "grad_norm": 27.875, "learning_rate": 9.987971031688221e-06, "loss": 1.3074227571487427, "step": 486 }, { "epoch": 0.08883225630290344, "grad_norm": 13.75, "learning_rate": 9.987851994038088e-06, "loss": 1.5410394668579102, "step": 488 }, { "epoch": 0.08919632292709566, "grad_norm": 13.4375, "learning_rate": 9.987732371186834e-06, "loss": 1.531282901763916, "step": 490 }, { "epoch": 0.08956038955128788, "grad_norm": 24.625, "learning_rate": 9.987612163152014e-06, "loss": 2.0960986614227295, "step": 492 }, { "epoch": 0.08992445617548012, "grad_norm": 19.5, "learning_rate": 9.987491369951271e-06, "loss": 1.941217064857483, "step": 494 }, { "epoch": 0.09028852279967234, "grad_norm": 21.375, "learning_rate": 9.987369991602329e-06, "loss": 2.0323238372802734, "step": 496 }, { "epoch": 0.09065258942386457, "grad_norm": 11.5625, "learning_rate": 9.987248028123003e-06, "loss": 0.8621390461921692, "step": 498 }, { "epoch": 0.0910166560480568, "grad_norm": 18.5, "learning_rate": 9.987125479531186e-06, "loss": 1.1050454378128052, "step": 500 }, { "epoch": 0.09138072267224902, "grad_norm": 30.5, "learning_rate": 9.98700234584486e-06, "loss": 1.3388925790786743, "step": 502 }, { "epoch": 0.09174478929644125, "grad_norm": 5.28125, "learning_rate": 9.986878627082102e-06, "loss": 0.9674100875854492, "step": 504 }, { "epoch": 0.09210885592063348, "grad_norm": 14.3125, "learning_rate": 9.986754323261061e-06, "loss": 1.6390407085418701, "step": 506 }, { "epoch": 0.0924729225448257, "grad_norm": 20.0, "learning_rate": 9.98662943439998e-06, "loss": 0.7929630875587463, "step": 508 }, { "epoch": 0.09283698916901793, "grad_norm": 16.75, "learning_rate": 9.986503960517185e-06, "loss": 1.8872112035751343, "step": 510 }, { "epoch": 0.09320105579321016, "grad_norm": 15.875, "learning_rate": 9.98637790163109e-06, "loss": 1.5856091976165771, "step": 512 }, { "epoch": 0.09356512241740239, "grad_norm": 12.5625, "learning_rate": 9.986251257760195e-06, "loss": 1.5681712627410889, "step": 514 }, { "epoch": 0.09392918904159461, "grad_norm": 15.375, "learning_rate": 9.986124028923083e-06, "loss": 1.0393840074539185, "step": 516 }, { "epoch": 0.09429325566578683, "grad_norm": 11.0625, "learning_rate": 9.985996215138423e-06, "loss": 1.6617491245269775, "step": 518 }, { "epoch": 0.09465732228997907, "grad_norm": 9.3125, "learning_rate": 9.98586781642497e-06, "loss": 1.6009297370910645, "step": 520 }, { "epoch": 0.09502138891417129, "grad_norm": 11.75, "learning_rate": 9.98573883280157e-06, "loss": 1.736532211303711, "step": 522 }, { "epoch": 0.09538545553836351, "grad_norm": 24.875, "learning_rate": 9.98560926428715e-06, "loss": 2.1928296089172363, "step": 524 }, { "epoch": 0.09574952216255575, "grad_norm": 4.40625, "learning_rate": 9.985479110900721e-06, "loss": 1.0732488632202148, "step": 526 }, { "epoch": 0.09611358878674797, "grad_norm": 9.0625, "learning_rate": 9.985348372661388e-06, "loss": 1.6667879819869995, "step": 528 }, { "epoch": 0.09647765541094021, "grad_norm": 13.9375, "learning_rate": 9.98521704958833e-06, "loss": 1.4967536926269531, "step": 530 }, { "epoch": 0.09684172203513243, "grad_norm": 4.0625, "learning_rate": 9.98508514170082e-06, "loss": 1.0845718383789062, "step": 532 }, { "epoch": 0.09720578865932465, "grad_norm": 3.265625, "learning_rate": 9.984952649018215e-06, "loss": 1.1927682161331177, "step": 534 }, { "epoch": 0.09756985528351689, "grad_norm": 11.125, "learning_rate": 9.98481957155996e-06, "loss": 1.3530960083007812, "step": 536 }, { "epoch": 0.09793392190770911, "grad_norm": 9.3125, "learning_rate": 9.984685909345582e-06, "loss": 1.5236634016036987, "step": 538 }, { "epoch": 0.09829798853190133, "grad_norm": 5.3125, "learning_rate": 9.984551662394695e-06, "loss": 1.1593369245529175, "step": 540 }, { "epoch": 0.09866205515609357, "grad_norm": 5.90625, "learning_rate": 9.984416830727e-06, "loss": 1.307875156402588, "step": 542 }, { "epoch": 0.09902612178028579, "grad_norm": 3.8125, "learning_rate": 9.98428141436228e-06, "loss": 1.5498743057250977, "step": 544 }, { "epoch": 0.09939018840447802, "grad_norm": 5.125, "learning_rate": 9.984145413320412e-06, "loss": 1.4448132514953613, "step": 546 }, { "epoch": 0.09975425502867025, "grad_norm": 2.765625, "learning_rate": 9.984008827621349e-06, "loss": 1.1951658725738525, "step": 548 }, { "epoch": 0.10011832165286247, "grad_norm": 15.375, "learning_rate": 9.983871657285138e-06, "loss": 1.38310706615448, "step": 550 }, { "epoch": 0.1004823882770547, "grad_norm": 70.5, "learning_rate": 9.983733902331907e-06, "loss": 0.7291164994239807, "step": 552 }, { "epoch": 0.10084645490124693, "grad_norm": 6.53125, "learning_rate": 9.98359556278187e-06, "loss": 1.401672124862671, "step": 554 }, { "epoch": 0.10121052152543916, "grad_norm": 9.25, "learning_rate": 9.983456638655327e-06, "loss": 1.8654245138168335, "step": 556 }, { "epoch": 0.10157458814963138, "grad_norm": 15.0625, "learning_rate": 9.983317129972667e-06, "loss": 1.8804476261138916, "step": 558 }, { "epoch": 0.1019386547738236, "grad_norm": 10.75, "learning_rate": 9.98317703675436e-06, "loss": 1.4194706678390503, "step": 560 }, { "epoch": 0.10230272139801584, "grad_norm": 26.75, "learning_rate": 9.983036359020965e-06, "loss": 1.7774877548217773, "step": 562 }, { "epoch": 0.10266678802220806, "grad_norm": 17.375, "learning_rate": 9.982895096793128e-06, "loss": 1.0727057456970215, "step": 564 }, { "epoch": 0.10303085464640029, "grad_norm": 16.75, "learning_rate": 9.982753250091577e-06, "loss": 1.36929190158844, "step": 566 }, { "epoch": 0.10339492127059252, "grad_norm": 10.6875, "learning_rate": 9.982610818937124e-06, "loss": 1.3780345916748047, "step": 568 }, { "epoch": 0.10375898789478474, "grad_norm": 15.0625, "learning_rate": 9.982467803350675e-06, "loss": 0.8375037908554077, "step": 570 }, { "epoch": 0.10412305451897698, "grad_norm": 20.5, "learning_rate": 9.982324203353217e-06, "loss": 1.4844985008239746, "step": 572 }, { "epoch": 0.1044871211431692, "grad_norm": 19.125, "learning_rate": 9.98218001896582e-06, "loss": 1.5376529693603516, "step": 574 }, { "epoch": 0.10485118776736142, "grad_norm": 18.375, "learning_rate": 9.982035250209642e-06, "loss": 1.4483085870742798, "step": 576 }, { "epoch": 0.10521525439155366, "grad_norm": 10.5, "learning_rate": 9.981889897105932e-06, "loss": 1.7304250001907349, "step": 578 }, { "epoch": 0.10557932101574588, "grad_norm": 26.375, "learning_rate": 9.981743959676016e-06, "loss": 2.1831817626953125, "step": 580 }, { "epoch": 0.1059433876399381, "grad_norm": 10.5, "learning_rate": 9.981597437941309e-06, "loss": 1.4532653093338013, "step": 582 }, { "epoch": 0.10630745426413034, "grad_norm": 11.0, "learning_rate": 9.981450331923315e-06, "loss": 1.6760830879211426, "step": 584 }, { "epoch": 0.10667152088832256, "grad_norm": 45.75, "learning_rate": 9.98130264164362e-06, "loss": 1.4925655126571655, "step": 586 }, { "epoch": 0.1070355875125148, "grad_norm": 11.6875, "learning_rate": 9.981154367123898e-06, "loss": 1.2994002103805542, "step": 588 }, { "epoch": 0.10739965413670702, "grad_norm": 7.8125, "learning_rate": 9.981005508385904e-06, "loss": 1.09633207321167, "step": 590 }, { "epoch": 0.10776372076089924, "grad_norm": 14.5, "learning_rate": 9.980856065451487e-06, "loss": 1.5942790508270264, "step": 592 }, { "epoch": 0.10812778738509148, "grad_norm": 11.5, "learning_rate": 9.980706038342575e-06, "loss": 1.3079761266708374, "step": 594 }, { "epoch": 0.1084918540092837, "grad_norm": 27.625, "learning_rate": 9.980555427081187e-06, "loss": 1.7374396324157715, "step": 596 }, { "epoch": 0.10885592063347592, "grad_norm": 22.125, "learning_rate": 9.980404231689418e-06, "loss": 1.8327726125717163, "step": 598 }, { "epoch": 0.10921998725766816, "grad_norm": 5.21875, "learning_rate": 9.98025245218946e-06, "loss": 1.228814959526062, "step": 600 }, { "epoch": 0.10958405388186038, "grad_norm": 8.9375, "learning_rate": 9.980100088603588e-06, "loss": 1.3061808347702026, "step": 602 }, { "epoch": 0.10994812050605261, "grad_norm": 8.1875, "learning_rate": 9.979947140954156e-06, "loss": 1.4228764772415161, "step": 604 }, { "epoch": 0.11031218713024483, "grad_norm": 7.875, "learning_rate": 9.979793609263609e-06, "loss": 1.4661059379577637, "step": 606 }, { "epoch": 0.11067625375443706, "grad_norm": 11.25, "learning_rate": 9.97963949355448e-06, "loss": 1.5948870182037354, "step": 608 }, { "epoch": 0.11104032037862929, "grad_norm": 46.5, "learning_rate": 9.979484793849383e-06, "loss": 1.5271118879318237, "step": 610 }, { "epoch": 0.11140438700282151, "grad_norm": 7.9375, "learning_rate": 9.979329510171021e-06, "loss": 1.2037501335144043, "step": 612 }, { "epoch": 0.11176845362701375, "grad_norm": 6.53125, "learning_rate": 9.979173642542179e-06, "loss": 1.2872017621994019, "step": 614 }, { "epoch": 0.11213252025120597, "grad_norm": 7.0, "learning_rate": 9.979017190985732e-06, "loss": 1.5186052322387695, "step": 616 }, { "epoch": 0.1124965868753982, "grad_norm": 3.796875, "learning_rate": 9.978860155524637e-06, "loss": 1.2106845378875732, "step": 618 }, { "epoch": 0.11286065349959043, "grad_norm": 25.875, "learning_rate": 9.978702536181939e-06, "loss": 1.4363855123519897, "step": 620 }, { "epoch": 0.11322472012378265, "grad_norm": 22.875, "learning_rate": 9.978544332980769e-06, "loss": 1.5833772420883179, "step": 622 }, { "epoch": 0.11358878674797487, "grad_norm": 29.75, "learning_rate": 9.97838554594434e-06, "loss": 1.4303171634674072, "step": 624 }, { "epoch": 0.11395285337216711, "grad_norm": 23.0, "learning_rate": 9.978226175095957e-06, "loss": 1.8701369762420654, "step": 626 }, { "epoch": 0.11431691999635933, "grad_norm": 31.5, "learning_rate": 9.978066220459004e-06, "loss": 1.5721532106399536, "step": 628 }, { "epoch": 0.11468098662055157, "grad_norm": 11.375, "learning_rate": 9.977905682056957e-06, "loss": 1.6384358406066895, "step": 630 }, { "epoch": 0.11504505324474379, "grad_norm": 59.0, "learning_rate": 9.977744559913369e-06, "loss": 1.4425804615020752, "step": 632 }, { "epoch": 0.11540911986893601, "grad_norm": 21.25, "learning_rate": 9.977582854051887e-06, "loss": 1.9026998281478882, "step": 634 }, { "epoch": 0.11577318649312825, "grad_norm": 8.625, "learning_rate": 9.977420564496244e-06, "loss": 1.4961724281311035, "step": 636 }, { "epoch": 0.11613725311732047, "grad_norm": 14.25, "learning_rate": 9.97725769127025e-06, "loss": 1.5527920722961426, "step": 638 }, { "epoch": 0.11650131974151269, "grad_norm": 12.1875, "learning_rate": 9.977094234397811e-06, "loss": 1.6305630207061768, "step": 640 }, { "epoch": 0.11686538636570493, "grad_norm": 12.5625, "learning_rate": 9.976930193902909e-06, "loss": 1.5491584539413452, "step": 642 }, { "epoch": 0.11722945298989715, "grad_norm": 14.0, "learning_rate": 9.97676556980962e-06, "loss": 1.3492447137832642, "step": 644 }, { "epoch": 0.11759351961408938, "grad_norm": 15.375, "learning_rate": 9.976600362142095e-06, "loss": 1.4404855966567993, "step": 646 }, { "epoch": 0.1179575862382816, "grad_norm": 13.3125, "learning_rate": 9.976434570924587e-06, "loss": 1.5379244089126587, "step": 648 }, { "epoch": 0.11832165286247383, "grad_norm": 5.6875, "learning_rate": 9.976268196181418e-06, "loss": 0.9439575672149658, "step": 650 }, { "epoch": 0.11868571948666606, "grad_norm": 7.8125, "learning_rate": 9.976101237937008e-06, "loss": 1.5202707052230835, "step": 652 }, { "epoch": 0.11904978611085829, "grad_norm": 20.125, "learning_rate": 9.975933696215854e-06, "loss": 2.09470796585083, "step": 654 }, { "epoch": 0.11941385273505051, "grad_norm": 7.625, "learning_rate": 9.975765571042543e-06, "loss": 0.9947249889373779, "step": 656 }, { "epoch": 0.11977791935924274, "grad_norm": 9.3125, "learning_rate": 9.975596862441748e-06, "loss": 0.9893943071365356, "step": 658 }, { "epoch": 0.12014198598343497, "grad_norm": 6.875, "learning_rate": 9.975427570438223e-06, "loss": 0.8843866586685181, "step": 660 }, { "epoch": 0.1205060526076272, "grad_norm": 3.265625, "learning_rate": 9.975257695056815e-06, "loss": 0.9782657027244568, "step": 662 }, { "epoch": 0.12087011923181942, "grad_norm": 25.625, "learning_rate": 9.97508723632245e-06, "loss": 1.4665272235870361, "step": 664 }, { "epoch": 0.12123418585601164, "grad_norm": 13.5625, "learning_rate": 9.974916194260143e-06, "loss": 1.4590774774551392, "step": 666 }, { "epoch": 0.12159825248020388, "grad_norm": 13.625, "learning_rate": 9.974744568894991e-06, "loss": 1.5867903232574463, "step": 668 }, { "epoch": 0.1219623191043961, "grad_norm": 14.5, "learning_rate": 9.974572360252185e-06, "loss": 1.555790901184082, "step": 670 }, { "epoch": 0.12232638572858834, "grad_norm": 30.375, "learning_rate": 9.974399568356991e-06, "loss": 2.1648361682891846, "step": 672 }, { "epoch": 0.12269045235278056, "grad_norm": 16.25, "learning_rate": 9.974226193234768e-06, "loss": 1.1645123958587646, "step": 674 }, { "epoch": 0.12305451897697278, "grad_norm": 11.5625, "learning_rate": 9.974052234910958e-06, "loss": 1.2378907203674316, "step": 676 }, { "epoch": 0.12341858560116502, "grad_norm": 9.4375, "learning_rate": 9.973877693411088e-06, "loss": 1.3132832050323486, "step": 678 }, { "epoch": 0.12378265222535724, "grad_norm": 11.5625, "learning_rate": 9.973702568760768e-06, "loss": 1.4829702377319336, "step": 680 }, { "epoch": 0.12414671884954946, "grad_norm": 7.3125, "learning_rate": 9.973526860985702e-06, "loss": 1.3518272638320923, "step": 682 }, { "epoch": 0.1245107854737417, "grad_norm": 8.0, "learning_rate": 9.973350570111673e-06, "loss": 0.964803159236908, "step": 684 }, { "epoch": 0.12487485209793392, "grad_norm": 7.8125, "learning_rate": 9.973173696164549e-06, "loss": 1.5220879316329956, "step": 686 }, { "epoch": 0.12523891872212614, "grad_norm": 7.09375, "learning_rate": 9.97299623917029e-06, "loss": 1.4920923709869385, "step": 688 }, { "epoch": 0.12560298534631836, "grad_norm": 7.3125, "learning_rate": 9.97281819915493e-06, "loss": 1.5284004211425781, "step": 690 }, { "epoch": 0.1259670519705106, "grad_norm": 7.3125, "learning_rate": 9.972639576144603e-06, "loss": 1.1986876726150513, "step": 692 }, { "epoch": 0.12633111859470283, "grad_norm": 4.90625, "learning_rate": 9.972460370165516e-06, "loss": 1.2006864547729492, "step": 694 }, { "epoch": 0.12669518521889506, "grad_norm": 17.125, "learning_rate": 9.97228058124397e-06, "loss": 1.4673051834106445, "step": 696 }, { "epoch": 0.12705925184308728, "grad_norm": 11.5, "learning_rate": 9.972100209406345e-06, "loss": 1.475710153579712, "step": 698 }, { "epoch": 0.1274233184672795, "grad_norm": 31.125, "learning_rate": 9.971919254679113e-06, "loss": 2.0274956226348877, "step": 700 }, { "epoch": 0.12778738509147175, "grad_norm": 14.875, "learning_rate": 9.971737717088826e-06, "loss": 1.4405767917633057, "step": 702 }, { "epoch": 0.12815145171566397, "grad_norm": 14.0, "learning_rate": 9.971555596662125e-06, "loss": 1.1153233051300049, "step": 704 }, { "epoch": 0.1285155183398562, "grad_norm": 9.875, "learning_rate": 9.971372893425739e-06, "loss": 1.256136417388916, "step": 706 }, { "epoch": 0.12887958496404842, "grad_norm": 15.9375, "learning_rate": 9.971189607406473e-06, "loss": 1.79349684715271, "step": 708 }, { "epoch": 0.12924365158824064, "grad_norm": 4.34375, "learning_rate": 9.971005738631226e-06, "loss": 0.9817097783088684, "step": 710 }, { "epoch": 0.1296077182124329, "grad_norm": 8.0625, "learning_rate": 9.97082128712698e-06, "loss": 1.4649547338485718, "step": 712 }, { "epoch": 0.1299717848366251, "grad_norm": 17.125, "learning_rate": 9.970636252920802e-06, "loss": 0.6673729419708252, "step": 714 }, { "epoch": 0.13033585146081733, "grad_norm": 6.9375, "learning_rate": 9.970450636039846e-06, "loss": 1.397263765335083, "step": 716 }, { "epoch": 0.13069991808500955, "grad_norm": 23.75, "learning_rate": 9.97026443651135e-06, "loss": 1.0178101062774658, "step": 718 }, { "epoch": 0.13106398470920178, "grad_norm": 9.5, "learning_rate": 9.970077654362637e-06, "loss": 1.8943710327148438, "step": 720 }, { "epoch": 0.13142805133339402, "grad_norm": 41.75, "learning_rate": 9.969890289621117e-06, "loss": 1.5627148151397705, "step": 722 }, { "epoch": 0.13179211795758625, "grad_norm": 9.625, "learning_rate": 9.969702342314289e-06, "loss": 1.4578361511230469, "step": 724 }, { "epoch": 0.13215618458177847, "grad_norm": 11.0625, "learning_rate": 9.969513812469726e-06, "loss": 1.7569128274917603, "step": 726 }, { "epoch": 0.1325202512059707, "grad_norm": 24.375, "learning_rate": 9.969324700115101e-06, "loss": 1.8174906969070435, "step": 728 }, { "epoch": 0.1328843178301629, "grad_norm": 6.21875, "learning_rate": 9.969135005278164e-06, "loss": 0.8841165900230408, "step": 730 }, { "epoch": 0.13324838445435513, "grad_norm": 3.921875, "learning_rate": 9.968944727986746e-06, "loss": 1.0754631757736206, "step": 732 }, { "epoch": 0.13361245107854738, "grad_norm": 21.25, "learning_rate": 9.968753868268776e-06, "loss": 1.3698331117630005, "step": 734 }, { "epoch": 0.1339765177027396, "grad_norm": 6.28125, "learning_rate": 9.96856242615226e-06, "loss": 1.5272889137268066, "step": 736 }, { "epoch": 0.13434058432693183, "grad_norm": 18.5, "learning_rate": 9.96837040166529e-06, "loss": 1.4165997505187988, "step": 738 }, { "epoch": 0.13470465095112405, "grad_norm": 20.5, "learning_rate": 9.968177794836047e-06, "loss": 2.025388240814209, "step": 740 }, { "epoch": 0.13506871757531627, "grad_norm": 8.375, "learning_rate": 9.967984605692796e-06, "loss": 1.4579286575317383, "step": 742 }, { "epoch": 0.13543278419950852, "grad_norm": 6.5625, "learning_rate": 9.967790834263882e-06, "loss": 1.4546328783035278, "step": 744 }, { "epoch": 0.13579685082370074, "grad_norm": 20.5, "learning_rate": 9.967596480577744e-06, "loss": 0.7280870079994202, "step": 746 }, { "epoch": 0.13616091744789297, "grad_norm": 20.75, "learning_rate": 9.967401544662902e-06, "loss": 1.4604501724243164, "step": 748 }, { "epoch": 0.1365249840720852, "grad_norm": 4.0, "learning_rate": 9.967206026547962e-06, "loss": 1.1921180486679077, "step": 750 }, { "epoch": 0.1368890506962774, "grad_norm": 11.9375, "learning_rate": 9.967009926261615e-06, "loss": 1.5692627429962158, "step": 752 }, { "epoch": 0.13725311732046966, "grad_norm": 13.5, "learning_rate": 9.966813243832638e-06, "loss": 1.506689429283142, "step": 754 }, { "epoch": 0.13761718394466188, "grad_norm": 14.6875, "learning_rate": 9.966615979289893e-06, "loss": 1.4427322149276733, "step": 756 }, { "epoch": 0.1379812505688541, "grad_norm": 4.09375, "learning_rate": 9.96641813266233e-06, "loss": 1.2432501316070557, "step": 758 }, { "epoch": 0.13834531719304632, "grad_norm": 8.625, "learning_rate": 9.966219703978979e-06, "loss": 1.504516839981079, "step": 760 }, { "epoch": 0.13870938381723855, "grad_norm": 30.75, "learning_rate": 9.966020693268961e-06, "loss": 1.0999618768692017, "step": 762 }, { "epoch": 0.13907345044143077, "grad_norm": 6.1875, "learning_rate": 9.965821100561479e-06, "loss": 1.4633733034133911, "step": 764 }, { "epoch": 0.13943751706562302, "grad_norm": 8.6875, "learning_rate": 9.965620925885822e-06, "loss": 1.5323455333709717, "step": 766 }, { "epoch": 0.13980158368981524, "grad_norm": 9.375, "learning_rate": 9.965420169271367e-06, "loss": 1.4056990146636963, "step": 768 }, { "epoch": 0.14016565031400746, "grad_norm": 6.28125, "learning_rate": 9.96521883074757e-06, "loss": 1.4805994033813477, "step": 770 }, { "epoch": 0.14052971693819968, "grad_norm": 10.0625, "learning_rate": 9.96501691034398e-06, "loss": 1.500640869140625, "step": 772 }, { "epoch": 0.1408937835623919, "grad_norm": 10.125, "learning_rate": 9.964814408090228e-06, "loss": 1.409245252609253, "step": 774 }, { "epoch": 0.14125785018658416, "grad_norm": 17.25, "learning_rate": 9.96461132401603e-06, "loss": 1.5120081901550293, "step": 776 }, { "epoch": 0.14162191681077638, "grad_norm": 39.5, "learning_rate": 9.964407658151188e-06, "loss": 1.4226701259613037, "step": 778 }, { "epoch": 0.1419859834349686, "grad_norm": 11.1875, "learning_rate": 9.964203410525585e-06, "loss": 1.6625443696975708, "step": 780 }, { "epoch": 0.14235005005916082, "grad_norm": 9.75, "learning_rate": 9.963998581169201e-06, "loss": 1.989743947982788, "step": 782 }, { "epoch": 0.14271411668335304, "grad_norm": 22.5, "learning_rate": 9.963793170112089e-06, "loss": 1.6876050233840942, "step": 784 }, { "epoch": 0.1430781833075453, "grad_norm": 8.1875, "learning_rate": 9.963587177384391e-06, "loss": 1.217936635017395, "step": 786 }, { "epoch": 0.14344224993173751, "grad_norm": 9.375, "learning_rate": 9.963380603016339e-06, "loss": 1.0292983055114746, "step": 788 }, { "epoch": 0.14380631655592974, "grad_norm": 7.3125, "learning_rate": 9.963173447038246e-06, "loss": 1.520676612854004, "step": 790 }, { "epoch": 0.14417038318012196, "grad_norm": 26.0, "learning_rate": 9.96296570948051e-06, "loss": 1.4777098894119263, "step": 792 }, { "epoch": 0.14453444980431418, "grad_norm": 22.5, "learning_rate": 9.962757390373616e-06, "loss": 1.348071813583374, "step": 794 }, { "epoch": 0.14489851642850643, "grad_norm": 23.0, "learning_rate": 9.962548489748138e-06, "loss": 1.1797630786895752, "step": 796 }, { "epoch": 0.14526258305269865, "grad_norm": 14.8125, "learning_rate": 9.962339007634724e-06, "loss": 2.2863168716430664, "step": 798 }, { "epoch": 0.14562664967689087, "grad_norm": 4.25, "learning_rate": 9.962128944064123e-06, "loss": 1.5526676177978516, "step": 800 }, { "epoch": 0.1459907163010831, "grad_norm": 11.1875, "learning_rate": 9.961918299067152e-06, "loss": 1.201155662536621, "step": 802 }, { "epoch": 0.14635478292527532, "grad_norm": 3.015625, "learning_rate": 9.961707072674731e-06, "loss": 1.3782280683517456, "step": 804 }, { "epoch": 0.14671884954946754, "grad_norm": 258.0, "learning_rate": 9.961495264917849e-06, "loss": 1.1102190017700195, "step": 806 }, { "epoch": 0.1470829161736598, "grad_norm": 35.75, "learning_rate": 9.961282875827593e-06, "loss": 1.995614767074585, "step": 808 }, { "epoch": 0.147446982797852, "grad_norm": 76.5, "learning_rate": 9.961069905435127e-06, "loss": 2.0287728309631348, "step": 810 }, { "epoch": 0.14781104942204423, "grad_norm": 38.25, "learning_rate": 9.960856353771709e-06, "loss": 0.8975493311882019, "step": 812 }, { "epoch": 0.14817511604623645, "grad_norm": 16.5, "learning_rate": 9.96064222086867e-06, "loss": 1.629402756690979, "step": 814 }, { "epoch": 0.14853918267042868, "grad_norm": 8.0625, "learning_rate": 9.960427506757438e-06, "loss": 1.3786468505859375, "step": 816 }, { "epoch": 0.14890324929462093, "grad_norm": 16.5, "learning_rate": 9.960212211469518e-06, "loss": 1.452179193496704, "step": 818 }, { "epoch": 0.14926731591881315, "grad_norm": 12.1875, "learning_rate": 9.959996335036507e-06, "loss": 1.387854814529419, "step": 820 }, { "epoch": 0.14963138254300537, "grad_norm": 9.1875, "learning_rate": 9.959779877490079e-06, "loss": 1.5545549392700195, "step": 822 }, { "epoch": 0.1499954491671976, "grad_norm": 23.625, "learning_rate": 9.959562838862003e-06, "loss": 1.6043356657028198, "step": 824 }, { "epoch": 0.15035951579138981, "grad_norm": 9.875, "learning_rate": 9.959345219184128e-06, "loss": 1.181017518043518, "step": 826 }, { "epoch": 0.15072358241558206, "grad_norm": 48.25, "learning_rate": 9.959127018488388e-06, "loss": 0.8593971133232117, "step": 828 }, { "epoch": 0.15108764903977429, "grad_norm": 13.125, "learning_rate": 9.958908236806801e-06, "loss": 1.6113003492355347, "step": 830 }, { "epoch": 0.1514517156639665, "grad_norm": 3.296875, "learning_rate": 9.958688874171475e-06, "loss": 1.1172258853912354, "step": 832 }, { "epoch": 0.15181578228815873, "grad_norm": 40.75, "learning_rate": 9.958468930614601e-06, "loss": 1.400185227394104, "step": 834 }, { "epoch": 0.15217984891235095, "grad_norm": 4.65625, "learning_rate": 9.958248406168456e-06, "loss": 0.9719647169113159, "step": 836 }, { "epoch": 0.1525439155365432, "grad_norm": 37.5, "learning_rate": 9.958027300865395e-06, "loss": 2.0113043785095215, "step": 838 }, { "epoch": 0.15290798216073542, "grad_norm": 6.28125, "learning_rate": 9.95780561473787e-06, "loss": 1.525176763534546, "step": 840 }, { "epoch": 0.15327204878492764, "grad_norm": 7.875, "learning_rate": 9.95758334781841e-06, "loss": 1.3904681205749512, "step": 842 }, { "epoch": 0.15363611540911987, "grad_norm": 9.0, "learning_rate": 9.957360500139633e-06, "loss": 1.3753796815872192, "step": 844 }, { "epoch": 0.1540001820333121, "grad_norm": 9.0, "learning_rate": 9.957137071734239e-06, "loss": 1.5650053024291992, "step": 846 }, { "epoch": 0.1543642486575043, "grad_norm": 15.875, "learning_rate": 9.956913062635017e-06, "loss": 1.6978940963745117, "step": 848 }, { "epoch": 0.15472831528169656, "grad_norm": 33.0, "learning_rate": 9.956688472874838e-06, "loss": 2.079392433166504, "step": 850 }, { "epoch": 0.15509238190588878, "grad_norm": 8.875, "learning_rate": 9.956463302486662e-06, "loss": 1.4703980684280396, "step": 852 }, { "epoch": 0.155456448530081, "grad_norm": 7.75, "learning_rate": 9.95623755150353e-06, "loss": 1.3990546464920044, "step": 854 }, { "epoch": 0.15582051515427323, "grad_norm": 17.25, "learning_rate": 9.956011219958572e-06, "loss": 1.414876937866211, "step": 856 }, { "epoch": 0.15618458177846545, "grad_norm": 20.125, "learning_rate": 9.955784307884999e-06, "loss": 1.403910517692566, "step": 858 }, { "epoch": 0.1565486484026577, "grad_norm": 15.5625, "learning_rate": 9.95555681531611e-06, "loss": 1.23539400100708, "step": 860 }, { "epoch": 0.15691271502684992, "grad_norm": 9.3125, "learning_rate": 9.955328742285288e-06, "loss": 1.3026257753372192, "step": 862 }, { "epoch": 0.15727678165104214, "grad_norm": 25.375, "learning_rate": 9.955100088826005e-06, "loss": 0.5582795143127441, "step": 864 }, { "epoch": 0.15764084827523436, "grad_norm": 5.875, "learning_rate": 9.954870854971809e-06, "loss": 1.4719241857528687, "step": 866 }, { "epoch": 0.15800491489942659, "grad_norm": 4.75, "learning_rate": 9.954641040756346e-06, "loss": 1.4069263935089111, "step": 868 }, { "epoch": 0.15836898152361883, "grad_norm": 4.5625, "learning_rate": 9.954410646213334e-06, "loss": 1.4353796243667603, "step": 870 }, { "epoch": 0.15873304814781106, "grad_norm": 5.875, "learning_rate": 9.954179671376589e-06, "loss": 1.2221503257751465, "step": 872 }, { "epoch": 0.15909711477200328, "grad_norm": 11.8125, "learning_rate": 9.953948116280001e-06, "loss": 1.591544270515442, "step": 874 }, { "epoch": 0.1594611813961955, "grad_norm": 6.75, "learning_rate": 9.95371598095755e-06, "loss": 1.3720848560333252, "step": 876 }, { "epoch": 0.15982524802038772, "grad_norm": 90.5, "learning_rate": 9.953483265443303e-06, "loss": 1.4762324094772339, "step": 878 }, { "epoch": 0.16018931464457994, "grad_norm": 14.375, "learning_rate": 9.953249969771408e-06, "loss": 1.6112408638000488, "step": 880 }, { "epoch": 0.1605533812687722, "grad_norm": 11.5, "learning_rate": 9.9530160939761e-06, "loss": 1.8050957918167114, "step": 882 }, { "epoch": 0.16091744789296442, "grad_norm": 18.5, "learning_rate": 9.952781638091702e-06, "loss": 1.5882316827774048, "step": 884 }, { "epoch": 0.16128151451715664, "grad_norm": 34.5, "learning_rate": 9.952546602152618e-06, "loss": 2.116976737976074, "step": 886 }, { "epoch": 0.16164558114134886, "grad_norm": 32.5, "learning_rate": 9.952310986193337e-06, "loss": 1.351901650428772, "step": 888 }, { "epoch": 0.16200964776554108, "grad_norm": 17.5, "learning_rate": 9.952074790248436e-06, "loss": 1.0881125926971436, "step": 890 }, { "epoch": 0.16237371438973333, "grad_norm": 5.96875, "learning_rate": 9.951838014352578e-06, "loss": 1.182440996170044, "step": 892 }, { "epoch": 0.16273778101392555, "grad_norm": 11.1875, "learning_rate": 9.951600658540503e-06, "loss": 0.9450311660766602, "step": 894 }, { "epoch": 0.16310184763811778, "grad_norm": 9.4375, "learning_rate": 9.951362722847048e-06, "loss": 1.0391665697097778, "step": 896 }, { "epoch": 0.16346591426231, "grad_norm": 8.375, "learning_rate": 9.951124207307127e-06, "loss": 1.4324138164520264, "step": 898 }, { "epoch": 0.16382998088650222, "grad_norm": 22.75, "learning_rate": 9.950885111955742e-06, "loss": 1.5406783819198608, "step": 900 }, { "epoch": 0.16419404751069447, "grad_norm": 11.5625, "learning_rate": 9.950645436827977e-06, "loss": 1.5500184297561646, "step": 902 }, { "epoch": 0.1645581141348867, "grad_norm": 14.4375, "learning_rate": 9.950405181959007e-06, "loss": 1.4855787754058838, "step": 904 }, { "epoch": 0.1649221807590789, "grad_norm": 38.0, "learning_rate": 9.950164347384083e-06, "loss": 1.4765028953552246, "step": 906 }, { "epoch": 0.16528624738327113, "grad_norm": 9.5, "learning_rate": 9.949922933138552e-06, "loss": 1.1264008283615112, "step": 908 }, { "epoch": 0.16565031400746336, "grad_norm": 8.25, "learning_rate": 9.949680939257838e-06, "loss": 1.071408987045288, "step": 910 }, { "epoch": 0.1660143806316556, "grad_norm": 12.8125, "learning_rate": 9.94943836577745e-06, "loss": 1.0403443574905396, "step": 912 }, { "epoch": 0.16637844725584783, "grad_norm": 5.21875, "learning_rate": 9.94919521273299e-06, "loss": 1.491145372390747, "step": 914 }, { "epoch": 0.16674251388004005, "grad_norm": 18.375, "learning_rate": 9.948951480160138e-06, "loss": 1.5384421348571777, "step": 916 }, { "epoch": 0.16710658050423227, "grad_norm": 19.625, "learning_rate": 9.948707168094658e-06, "loss": 1.332274317741394, "step": 918 }, { "epoch": 0.1674706471284245, "grad_norm": 8.4375, "learning_rate": 9.948462276572404e-06, "loss": 1.6329753398895264, "step": 920 }, { "epoch": 0.16783471375261672, "grad_norm": 18.875, "learning_rate": 9.948216805629314e-06, "loss": 1.5353933572769165, "step": 922 }, { "epoch": 0.16819878037680897, "grad_norm": 10.1875, "learning_rate": 9.947970755301408e-06, "loss": 1.2155869007110596, "step": 924 }, { "epoch": 0.1685628470010012, "grad_norm": 9.75, "learning_rate": 9.947724125624793e-06, "loss": 1.4639822244644165, "step": 926 }, { "epoch": 0.1689269136251934, "grad_norm": 13.125, "learning_rate": 9.94747691663566e-06, "loss": 1.5783990621566772, "step": 928 }, { "epoch": 0.16929098024938563, "grad_norm": 21.75, "learning_rate": 9.947229128370289e-06, "loss": 1.7080756425857544, "step": 930 }, { "epoch": 0.16965504687357785, "grad_norm": 21.0, "learning_rate": 9.94698076086504e-06, "loss": 1.9166209697723389, "step": 932 }, { "epoch": 0.1700191134977701, "grad_norm": 7.96875, "learning_rate": 9.946731814156358e-06, "loss": 1.4945251941680908, "step": 934 }, { "epoch": 0.17038318012196232, "grad_norm": 8.5625, "learning_rate": 9.946482288280782e-06, "loss": 1.2176241874694824, "step": 936 }, { "epoch": 0.17074724674615455, "grad_norm": 13.1875, "learning_rate": 9.946232183274918e-06, "loss": 1.4079389572143555, "step": 938 }, { "epoch": 0.17111131337034677, "grad_norm": 16.75, "learning_rate": 9.945981499175476e-06, "loss": 1.3811269998550415, "step": 940 }, { "epoch": 0.171475379994539, "grad_norm": 13.0625, "learning_rate": 9.945730236019242e-06, "loss": 1.600013017654419, "step": 942 }, { "epoch": 0.17183944661873124, "grad_norm": 28.5, "learning_rate": 9.945478393843086e-06, "loss": 1.5965791940689087, "step": 944 }, { "epoch": 0.17220351324292346, "grad_norm": 12.4375, "learning_rate": 9.945225972683965e-06, "loss": 1.4906278848648071, "step": 946 }, { "epoch": 0.17256757986711568, "grad_norm": 6.46875, "learning_rate": 9.944972972578921e-06, "loss": 1.0816751718521118, "step": 948 }, { "epoch": 0.1729316464913079, "grad_norm": 7.75, "learning_rate": 9.94471939356508e-06, "loss": 1.4420520067214966, "step": 950 }, { "epoch": 0.17329571311550013, "grad_norm": 11.5, "learning_rate": 9.944465235679657e-06, "loss": 1.559586524963379, "step": 952 }, { "epoch": 0.17365977973969238, "grad_norm": 7.0625, "learning_rate": 9.944210498959943e-06, "loss": 1.3783751726150513, "step": 954 }, { "epoch": 0.1740238463638846, "grad_norm": 12.875, "learning_rate": 9.943955183443325e-06, "loss": 1.5681862831115723, "step": 956 }, { "epoch": 0.17438791298807682, "grad_norm": 12.75, "learning_rate": 9.943699289167265e-06, "loss": 1.6779510974884033, "step": 958 }, { "epoch": 0.17475197961226904, "grad_norm": 19.625, "learning_rate": 9.94344281616932e-06, "loss": 1.9288862943649292, "step": 960 }, { "epoch": 0.17511604623646126, "grad_norm": 7.71875, "learning_rate": 9.943185764487122e-06, "loss": 1.4111822843551636, "step": 962 }, { "epoch": 0.1754801128606535, "grad_norm": 18.5, "learning_rate": 9.942928134158392e-06, "loss": 1.4696686267852783, "step": 964 }, { "epoch": 0.17584417948484574, "grad_norm": 47.25, "learning_rate": 9.942669925220938e-06, "loss": 1.5688788890838623, "step": 966 }, { "epoch": 0.17620824610903796, "grad_norm": 39.5, "learning_rate": 9.942411137712651e-06, "loss": 2.28415584564209, "step": 968 }, { "epoch": 0.17657231273323018, "grad_norm": 6.5, "learning_rate": 9.942151771671506e-06, "loss": 1.3252819776535034, "step": 970 }, { "epoch": 0.1769363793574224, "grad_norm": 6.4375, "learning_rate": 9.941891827135568e-06, "loss": 1.2047700881958008, "step": 972 }, { "epoch": 0.17730044598161462, "grad_norm": 8.125, "learning_rate": 9.941631304142976e-06, "loss": 1.415588617324829, "step": 974 }, { "epoch": 0.17766451260580687, "grad_norm": 10.5625, "learning_rate": 9.941370202731966e-06, "loss": 1.6344401836395264, "step": 976 }, { "epoch": 0.1780285792299991, "grad_norm": 43.75, "learning_rate": 9.94110852294085e-06, "loss": 1.6803901195526123, "step": 978 }, { "epoch": 0.17839264585419132, "grad_norm": 9.9375, "learning_rate": 9.940846264808031e-06, "loss": 2.081049919128418, "step": 980 }, { "epoch": 0.17875671247838354, "grad_norm": 7.125, "learning_rate": 9.940583428371993e-06, "loss": 1.3472354412078857, "step": 982 }, { "epoch": 0.17912077910257576, "grad_norm": 11.5625, "learning_rate": 9.94032001367131e-06, "loss": 1.3411457538604736, "step": 984 }, { "epoch": 0.179484845726768, "grad_norm": 8.0625, "learning_rate": 9.940056020744628e-06, "loss": 1.1788313388824463, "step": 986 }, { "epoch": 0.17984891235096023, "grad_norm": 16.0, "learning_rate": 9.939791449630696e-06, "loss": 1.1841378211975098, "step": 988 }, { "epoch": 0.18021297897515245, "grad_norm": 8.75, "learning_rate": 9.939526300368337e-06, "loss": 1.285358190536499, "step": 990 }, { "epoch": 0.18057704559934468, "grad_norm": 12.0625, "learning_rate": 9.939260572996456e-06, "loss": 1.534583330154419, "step": 992 }, { "epoch": 0.1809411122235369, "grad_norm": 10.5625, "learning_rate": 9.93899426755405e-06, "loss": 1.439420223236084, "step": 994 }, { "epoch": 0.18130517884772915, "grad_norm": 12.0625, "learning_rate": 9.938727384080201e-06, "loss": 1.6394792795181274, "step": 996 }, { "epoch": 0.18166924547192137, "grad_norm": 18.375, "learning_rate": 9.938459922614069e-06, "loss": 2.0668153762817383, "step": 998 }, { "epoch": 0.1820333120961136, "grad_norm": 11.0625, "learning_rate": 9.938191883194906e-06, "loss": 1.2434909343719482, "step": 1000 }, { "epoch": 0.18239737872030581, "grad_norm": 8.625, "learning_rate": 9.937923265862041e-06, "loss": 1.194491982460022, "step": 1002 }, { "epoch": 0.18276144534449804, "grad_norm": 12.75, "learning_rate": 9.937654070654898e-06, "loss": 0.9255943298339844, "step": 1004 }, { "epoch": 0.18312551196869026, "grad_norm": 9.25, "learning_rate": 9.93738429761298e-06, "loss": 1.0794122219085693, "step": 1006 }, { "epoch": 0.1834895785928825, "grad_norm": 11.3125, "learning_rate": 9.93711394677587e-06, "loss": 0.2285841405391693, "step": 1008 }, { "epoch": 0.18385364521707473, "grad_norm": 9.8125, "learning_rate": 9.936843018183245e-06, "loss": 0.5525633096694946, "step": 1010 }, { "epoch": 0.18421771184126695, "grad_norm": 39.0, "learning_rate": 9.936571511874863e-06, "loss": 1.6073859930038452, "step": 1012 }, { "epoch": 0.18458177846545917, "grad_norm": 38.5, "learning_rate": 9.936299427890563e-06, "loss": 1.7834596633911133, "step": 1014 }, { "epoch": 0.1849458450896514, "grad_norm": 10.9375, "learning_rate": 9.936026766270274e-06, "loss": 1.6835075616836548, "step": 1016 }, { "epoch": 0.18530991171384364, "grad_norm": 22.75, "learning_rate": 9.93575352705401e-06, "loss": 2.135343074798584, "step": 1018 }, { "epoch": 0.18567397833803587, "grad_norm": 7.8125, "learning_rate": 9.935479710281867e-06, "loss": 1.7683391571044922, "step": 1020 }, { "epoch": 0.1860380449622281, "grad_norm": 18.75, "learning_rate": 9.935205315994025e-06, "loss": 1.1597998142242432, "step": 1022 }, { "epoch": 0.1864021115864203, "grad_norm": 11.5, "learning_rate": 9.934930344230748e-06, "loss": 1.8588007688522339, "step": 1024 }, { "epoch": 0.18676617821061253, "grad_norm": 7.46875, "learning_rate": 9.934654795032394e-06, "loss": 1.4659507274627686, "step": 1026 }, { "epoch": 0.18713024483480478, "grad_norm": 276.0, "learning_rate": 9.934378668439394e-06, "loss": 1.1973302364349365, "step": 1028 }, { "epoch": 0.187494311458997, "grad_norm": 10.3125, "learning_rate": 9.934101964492266e-06, "loss": 1.6813024282455444, "step": 1030 }, { "epoch": 0.18785837808318923, "grad_norm": 18.75, "learning_rate": 9.93382468323162e-06, "loss": 1.5307891368865967, "step": 1032 }, { "epoch": 0.18822244470738145, "grad_norm": 10.3125, "learning_rate": 9.933546824698145e-06, "loss": 1.5396398305892944, "step": 1034 }, { "epoch": 0.18858651133157367, "grad_norm": 13.1875, "learning_rate": 9.933268388932612e-06, "loss": 1.411933422088623, "step": 1036 }, { "epoch": 0.1889505779557659, "grad_norm": 10.25, "learning_rate": 9.932989375975888e-06, "loss": 1.581071138381958, "step": 1038 }, { "epoch": 0.18931464457995814, "grad_norm": 31.0, "learning_rate": 9.932709785868908e-06, "loss": 1.5011827945709229, "step": 1040 }, { "epoch": 0.18967871120415036, "grad_norm": 30.75, "learning_rate": 9.932429618652706e-06, "loss": 1.411431074142456, "step": 1042 }, { "epoch": 0.19004277782834259, "grad_norm": 7.96875, "learning_rate": 9.932148874368395e-06, "loss": 1.2337169647216797, "step": 1044 }, { "epoch": 0.1904068444525348, "grad_norm": 10.9375, "learning_rate": 9.931867553057171e-06, "loss": 1.2204968929290771, "step": 1046 }, { "epoch": 0.19077091107672703, "grad_norm": 15.5, "learning_rate": 9.931585654760318e-06, "loss": 1.4795103073120117, "step": 1048 }, { "epoch": 0.19113497770091928, "grad_norm": 21.375, "learning_rate": 9.931303179519205e-06, "loss": 1.5027167797088623, "step": 1050 }, { "epoch": 0.1914990443251115, "grad_norm": 12.5625, "learning_rate": 9.931020127375281e-06, "loss": 1.405234456062317, "step": 1052 }, { "epoch": 0.19186311094930372, "grad_norm": 36.75, "learning_rate": 9.930736498370086e-06, "loss": 1.4587359428405762, "step": 1054 }, { "epoch": 0.19222717757349594, "grad_norm": 11.0625, "learning_rate": 9.930452292545239e-06, "loss": 1.5163779258728027, "step": 1056 }, { "epoch": 0.19259124419768817, "grad_norm": 13.875, "learning_rate": 9.930167509942445e-06, "loss": 1.6596927642822266, "step": 1058 }, { "epoch": 0.19295531082188042, "grad_norm": 18.375, "learning_rate": 9.929882150603499e-06, "loss": 1.8548587560653687, "step": 1060 }, { "epoch": 0.19331937744607264, "grad_norm": 8.0625, "learning_rate": 9.929596214570272e-06, "loss": 1.2847120761871338, "step": 1062 }, { "epoch": 0.19368344407026486, "grad_norm": 2.71875, "learning_rate": 9.929309701884725e-06, "loss": 0.8553503751754761, "step": 1064 }, { "epoch": 0.19404751069445708, "grad_norm": 5.0625, "learning_rate": 9.929022612588908e-06, "loss": 1.2123963832855225, "step": 1066 }, { "epoch": 0.1944115773186493, "grad_norm": 19.125, "learning_rate": 9.92873494672494e-06, "loss": 1.4959981441497803, "step": 1068 }, { "epoch": 0.19477564394284155, "grad_norm": 8.25, "learning_rate": 9.928446704335044e-06, "loss": 1.4451946020126343, "step": 1070 }, { "epoch": 0.19513971056703378, "grad_norm": 6.96875, "learning_rate": 9.928157885461514e-06, "loss": 1.088136911392212, "step": 1072 }, { "epoch": 0.195503777191226, "grad_norm": 10.125, "learning_rate": 9.927868490146732e-06, "loss": 1.2119640111923218, "step": 1074 }, { "epoch": 0.19586784381541822, "grad_norm": 5.4375, "learning_rate": 9.927578518433167e-06, "loss": 1.1197935342788696, "step": 1076 }, { "epoch": 0.19623191043961044, "grad_norm": 43.0, "learning_rate": 9.927287970363375e-06, "loss": 1.6458243131637573, "step": 1078 }, { "epoch": 0.19659597706380266, "grad_norm": 11.25, "learning_rate": 9.926996845979986e-06, "loss": 1.4721394777297974, "step": 1080 }, { "epoch": 0.1969600436879949, "grad_norm": 15.0, "learning_rate": 9.926705145325729e-06, "loss": 1.404916763305664, "step": 1082 }, { "epoch": 0.19732411031218713, "grad_norm": 6.90625, "learning_rate": 9.9264128684434e-06, "loss": 1.3604183197021484, "step": 1084 }, { "epoch": 0.19768817693637936, "grad_norm": 7.5625, "learning_rate": 9.926120015375897e-06, "loss": 1.0928027629852295, "step": 1086 }, { "epoch": 0.19805224356057158, "grad_norm": 4.53125, "learning_rate": 9.925826586166193e-06, "loss": 1.4573407173156738, "step": 1088 }, { "epoch": 0.1984163101847638, "grad_norm": 4.34375, "learning_rate": 9.925532580857349e-06, "loss": 0.8799431324005127, "step": 1090 }, { "epoch": 0.19878037680895605, "grad_norm": 16.625, "learning_rate": 9.925237999492505e-06, "loss": 1.3133742809295654, "step": 1092 }, { "epoch": 0.19914444343314827, "grad_norm": 11.9375, "learning_rate": 9.924942842114895e-06, "loss": 1.619940161705017, "step": 1094 }, { "epoch": 0.1995085100573405, "grad_norm": 15.4375, "learning_rate": 9.924647108767826e-06, "loss": 1.6402709484100342, "step": 1096 }, { "epoch": 0.19987257668153272, "grad_norm": 18.75, "learning_rate": 9.924350799494701e-06, "loss": 1.0292584896087646, "step": 1098 }, { "epoch": 0.20023664330572494, "grad_norm": 7.375, "learning_rate": 9.924053914339e-06, "loss": 1.3446272611618042, "step": 1100 }, { "epoch": 0.2006007099299172, "grad_norm": 46.0, "learning_rate": 9.923756453344291e-06, "loss": 1.4331274032592773, "step": 1102 }, { "epoch": 0.2009647765541094, "grad_norm": 9.0625, "learning_rate": 9.923458416554221e-06, "loss": 1.8346397876739502, "step": 1104 }, { "epoch": 0.20132884317830163, "grad_norm": 29.875, "learning_rate": 9.923159804012531e-06, "loss": 1.6610716581344604, "step": 1106 }, { "epoch": 0.20169290980249385, "grad_norm": 10.5625, "learning_rate": 9.922860615763039e-06, "loss": 1.5934720039367676, "step": 1108 }, { "epoch": 0.20205697642668607, "grad_norm": 14.0, "learning_rate": 9.92256085184965e-06, "loss": 1.4009087085723877, "step": 1110 }, { "epoch": 0.20242104305087832, "grad_norm": 10.0, "learning_rate": 9.922260512316352e-06, "loss": 1.3968048095703125, "step": 1112 }, { "epoch": 0.20278510967507055, "grad_norm": 3.703125, "learning_rate": 9.92195959720722e-06, "loss": 1.24936842918396, "step": 1114 }, { "epoch": 0.20314917629926277, "grad_norm": 10.25, "learning_rate": 9.92165810656641e-06, "loss": 1.251719355583191, "step": 1116 }, { "epoch": 0.203513242923455, "grad_norm": 14.4375, "learning_rate": 9.921356040438165e-06, "loss": 1.711976408958435, "step": 1118 }, { "epoch": 0.2038773095476472, "grad_norm": 14.5, "learning_rate": 9.921053398866816e-06, "loss": 1.462119698524475, "step": 1120 }, { "epoch": 0.20424137617183943, "grad_norm": 24.0, "learning_rate": 9.92075018189677e-06, "loss": 1.8700501918792725, "step": 1122 }, { "epoch": 0.20460544279603168, "grad_norm": 15.875, "learning_rate": 9.920446389572523e-06, "loss": 1.4322757720947266, "step": 1124 }, { "epoch": 0.2049695094202239, "grad_norm": 22.25, "learning_rate": 9.92014202193866e-06, "loss": 1.3662235736846924, "step": 1126 }, { "epoch": 0.20533357604441613, "grad_norm": 12.75, "learning_rate": 9.919837079039838e-06, "loss": 0.9338915944099426, "step": 1128 }, { "epoch": 0.20569764266860835, "grad_norm": 35.5, "learning_rate": 9.919531560920812e-06, "loss": 1.1594756841659546, "step": 1130 }, { "epoch": 0.20606170929280057, "grad_norm": 6.1875, "learning_rate": 9.919225467626414e-06, "loss": 0.9257606267929077, "step": 1132 }, { "epoch": 0.20642577591699282, "grad_norm": 20.125, "learning_rate": 9.918918799201563e-06, "loss": 1.1696546077728271, "step": 1134 }, { "epoch": 0.20678984254118504, "grad_norm": 8.875, "learning_rate": 9.918611555691258e-06, "loss": 1.4146943092346191, "step": 1136 }, { "epoch": 0.20715390916537726, "grad_norm": 18.25, "learning_rate": 9.91830373714059e-06, "loss": 1.5787546634674072, "step": 1138 }, { "epoch": 0.2075179757895695, "grad_norm": 7.71875, "learning_rate": 9.917995343594726e-06, "loss": 1.4461215734481812, "step": 1140 }, { "epoch": 0.2078820424137617, "grad_norm": 13.8125, "learning_rate": 9.917686375098925e-06, "loss": 1.4070762395858765, "step": 1142 }, { "epoch": 0.20824610903795396, "grad_norm": 28.25, "learning_rate": 9.917376831698526e-06, "loss": 1.3918594121932983, "step": 1144 }, { "epoch": 0.20861017566214618, "grad_norm": 14.125, "learning_rate": 9.917066713438952e-06, "loss": 1.3972517251968384, "step": 1146 }, { "epoch": 0.2089742422863384, "grad_norm": 9.3125, "learning_rate": 9.916756020365712e-06, "loss": 1.2324825525283813, "step": 1148 }, { "epoch": 0.20933830891053062, "grad_norm": 8.6875, "learning_rate": 9.916444752524398e-06, "loss": 0.8400664925575256, "step": 1150 }, { "epoch": 0.20970237553472285, "grad_norm": 7.75, "learning_rate": 9.91613290996069e-06, "loss": 0.996133029460907, "step": 1152 }, { "epoch": 0.21006644215891507, "grad_norm": 6.625, "learning_rate": 9.91582049272035e-06, "loss": 0.4878230690956116, "step": 1154 }, { "epoch": 0.21043050878310732, "grad_norm": 11.6875, "learning_rate": 9.915507500849219e-06, "loss": 1.4658123254776, "step": 1156 }, { "epoch": 0.21079457540729954, "grad_norm": 4.09375, "learning_rate": 9.915193934393232e-06, "loss": 0.9831236600875854, "step": 1158 }, { "epoch": 0.21115864203149176, "grad_norm": 13.0, "learning_rate": 9.914879793398402e-06, "loss": 1.397628903388977, "step": 1160 }, { "epoch": 0.21152270865568398, "grad_norm": 22.0, "learning_rate": 9.914565077910827e-06, "loss": 1.3963582515716553, "step": 1162 }, { "epoch": 0.2118867752798762, "grad_norm": 61.5, "learning_rate": 9.914249787976694e-06, "loss": 1.5733776092529297, "step": 1164 }, { "epoch": 0.21225084190406845, "grad_norm": 39.75, "learning_rate": 9.913933923642263e-06, "loss": 0.5213334560394287, "step": 1166 }, { "epoch": 0.21261490852826068, "grad_norm": 4.09375, "learning_rate": 9.913617484953895e-06, "loss": 1.096614122390747, "step": 1168 }, { "epoch": 0.2129789751524529, "grad_norm": 12.25, "learning_rate": 9.913300471958019e-06, "loss": 1.6260157823562622, "step": 1170 }, { "epoch": 0.21334304177664512, "grad_norm": 11.9375, "learning_rate": 9.912982884701157e-06, "loss": 1.5148996114730835, "step": 1172 }, { "epoch": 0.21370710840083734, "grad_norm": 10.25, "learning_rate": 9.912664723229916e-06, "loss": 1.464977741241455, "step": 1174 }, { "epoch": 0.2140711750250296, "grad_norm": 7.375, "learning_rate": 9.912345987590985e-06, "loss": 1.406298041343689, "step": 1176 }, { "epoch": 0.2144352416492218, "grad_norm": 3.0, "learning_rate": 9.912026677831135e-06, "loss": 1.0304433107376099, "step": 1178 }, { "epoch": 0.21479930827341404, "grad_norm": 5.3125, "learning_rate": 9.911706793997223e-06, "loss": 1.3358741998672485, "step": 1180 }, { "epoch": 0.21516337489760626, "grad_norm": 64.5, "learning_rate": 9.91138633613619e-06, "loss": 1.5699183940887451, "step": 1182 }, { "epoch": 0.21552744152179848, "grad_norm": 28.0, "learning_rate": 9.911065304295067e-06, "loss": 1.9996180534362793, "step": 1184 }, { "epoch": 0.21589150814599073, "grad_norm": 17.0, "learning_rate": 9.910743698520959e-06, "loss": 1.4907023906707764, "step": 1186 }, { "epoch": 0.21625557477018295, "grad_norm": 17.625, "learning_rate": 9.910421518861062e-06, "loss": 1.4150099754333496, "step": 1188 }, { "epoch": 0.21661964139437517, "grad_norm": 9.3125, "learning_rate": 9.910098765362657e-06, "loss": 1.4136031866073608, "step": 1190 }, { "epoch": 0.2169837080185674, "grad_norm": 10.8125, "learning_rate": 9.909775438073102e-06, "loss": 1.5777891874313354, "step": 1192 }, { "epoch": 0.21734777464275962, "grad_norm": 58.0, "learning_rate": 9.909451537039847e-06, "loss": 1.3451259136199951, "step": 1194 }, { "epoch": 0.21771184126695184, "grad_norm": 8.0625, "learning_rate": 9.909127062310422e-06, "loss": 1.3976876735687256, "step": 1196 }, { "epoch": 0.2180759078911441, "grad_norm": 12.5, "learning_rate": 9.908802013932443e-06, "loss": 1.479409098625183, "step": 1198 }, { "epoch": 0.2184399745153363, "grad_norm": 12.3125, "learning_rate": 9.90847639195361e-06, "loss": 1.1923532485961914, "step": 1200 }, { "epoch": 0.21880404113952853, "grad_norm": 28.625, "learning_rate": 9.908150196421707e-06, "loss": 0.7754479050636292, "step": 1202 }, { "epoch": 0.21916810776372075, "grad_norm": 11.5, "learning_rate": 9.9078234273846e-06, "loss": 0.6064488291740417, "step": 1204 }, { "epoch": 0.21953217438791298, "grad_norm": 9.9375, "learning_rate": 9.907496084890242e-06, "loss": 1.1636220216751099, "step": 1206 }, { "epoch": 0.21989624101210523, "grad_norm": 15.0625, "learning_rate": 9.90716816898667e-06, "loss": 1.0404331684112549, "step": 1208 }, { "epoch": 0.22026030763629745, "grad_norm": 37.75, "learning_rate": 9.906839679722002e-06, "loss": 1.5967075824737549, "step": 1210 }, { "epoch": 0.22062437426048967, "grad_norm": 14.8125, "learning_rate": 9.906510617144448e-06, "loss": 1.4154984951019287, "step": 1212 }, { "epoch": 0.2209884408846819, "grad_norm": 10.5, "learning_rate": 9.906180981302286e-06, "loss": 1.3427590131759644, "step": 1214 }, { "epoch": 0.2213525075088741, "grad_norm": 6.03125, "learning_rate": 9.905850772243901e-06, "loss": 1.381961464881897, "step": 1216 }, { "epoch": 0.22171657413306636, "grad_norm": 29.0, "learning_rate": 9.905519990017742e-06, "loss": 1.1825193166732788, "step": 1218 }, { "epoch": 0.22208064075725858, "grad_norm": 7.40625, "learning_rate": 9.905188634672352e-06, "loss": 1.1286404132843018, "step": 1220 }, { "epoch": 0.2224447073814508, "grad_norm": 9.875, "learning_rate": 9.904856706256356e-06, "loss": 1.1375209093093872, "step": 1222 }, { "epoch": 0.22280877400564303, "grad_norm": 68.0, "learning_rate": 9.904524204818464e-06, "loss": 0.8734057545661926, "step": 1224 }, { "epoch": 0.22317284062983525, "grad_norm": 33.0, "learning_rate": 9.90419113040747e-06, "loss": 1.3355128765106201, "step": 1226 }, { "epoch": 0.2235369072540275, "grad_norm": 9.6875, "learning_rate": 9.903857483072247e-06, "loss": 1.3340837955474854, "step": 1228 }, { "epoch": 0.22390097387821972, "grad_norm": 6.75, "learning_rate": 9.903523262861763e-06, "loss": 1.293943166732788, "step": 1230 }, { "epoch": 0.22426504050241194, "grad_norm": 16.25, "learning_rate": 9.903188469825057e-06, "loss": 1.562679409980774, "step": 1232 }, { "epoch": 0.22462910712660417, "grad_norm": 18.75, "learning_rate": 9.902853104011261e-06, "loss": 1.3159688711166382, "step": 1234 }, { "epoch": 0.2249931737507964, "grad_norm": 14.0625, "learning_rate": 9.902517165469589e-06, "loss": 1.25418221950531, "step": 1236 }, { "epoch": 0.2253572403749886, "grad_norm": 15.5, "learning_rate": 9.90218065424934e-06, "loss": 0.9642902612686157, "step": 1238 }, { "epoch": 0.22572130699918086, "grad_norm": 11.6875, "learning_rate": 9.901843570399895e-06, "loss": 1.0084102153778076, "step": 1240 }, { "epoch": 0.22608537362337308, "grad_norm": 15.625, "learning_rate": 9.901505913970716e-06, "loss": 1.6591867208480835, "step": 1242 }, { "epoch": 0.2264494402475653, "grad_norm": 7.71875, "learning_rate": 9.901167685011358e-06, "loss": 1.203679084777832, "step": 1244 }, { "epoch": 0.22681350687175753, "grad_norm": 10.5, "learning_rate": 9.90082888357145e-06, "loss": 1.0881551504135132, "step": 1246 }, { "epoch": 0.22717757349594975, "grad_norm": 10.8125, "learning_rate": 9.900489509700713e-06, "loss": 1.368391513824463, "step": 1248 }, { "epoch": 0.227541640120142, "grad_norm": 5.625, "learning_rate": 9.900149563448947e-06, "loss": 1.1805520057678223, "step": 1250 }, { "epoch": 0.22790570674433422, "grad_norm": 78.0, "learning_rate": 9.89980904486604e-06, "loss": 0.828658938407898, "step": 1252 }, { "epoch": 0.22826977336852644, "grad_norm": 7.8125, "learning_rate": 9.89946795400196e-06, "loss": 0.939128041267395, "step": 1254 }, { "epoch": 0.22863383999271866, "grad_norm": 11.0625, "learning_rate": 9.89912629090676e-06, "loss": 1.503872275352478, "step": 1256 }, { "epoch": 0.22899790661691088, "grad_norm": 17.0, "learning_rate": 9.898784055630576e-06, "loss": 1.7752597332000732, "step": 1258 }, { "epoch": 0.22936197324110313, "grad_norm": 6.59375, "learning_rate": 9.898441248223638e-06, "loss": 1.3853998184204102, "step": 1260 }, { "epoch": 0.22972603986529536, "grad_norm": 10.125, "learning_rate": 9.898097868736243e-06, "loss": 1.2807327508926392, "step": 1262 }, { "epoch": 0.23009010648948758, "grad_norm": 14.375, "learning_rate": 9.897753917218782e-06, "loss": 1.4517203569412231, "step": 1264 }, { "epoch": 0.2304541731136798, "grad_norm": 10.75, "learning_rate": 9.897409393721731e-06, "loss": 1.5580300092697144, "step": 1266 }, { "epoch": 0.23081823973787202, "grad_norm": 6.3125, "learning_rate": 9.897064298295646e-06, "loss": 1.2930718660354614, "step": 1268 }, { "epoch": 0.23118230636206427, "grad_norm": 13.125, "learning_rate": 9.89671863099117e-06, "loss": 1.1210490465164185, "step": 1270 }, { "epoch": 0.2315463729862565, "grad_norm": 10.1875, "learning_rate": 9.896372391859026e-06, "loss": 1.4651648998260498, "step": 1272 }, { "epoch": 0.23191043961044872, "grad_norm": 7.59375, "learning_rate": 9.896025580950027e-06, "loss": 0.7894704341888428, "step": 1274 }, { "epoch": 0.23227450623464094, "grad_norm": 8.4375, "learning_rate": 9.895678198315057e-06, "loss": 1.3474678993225098, "step": 1276 }, { "epoch": 0.23263857285883316, "grad_norm": 7.75, "learning_rate": 9.895330244005105e-06, "loss": 0.9716839790344238, "step": 1278 }, { "epoch": 0.23300263948302538, "grad_norm": 9.4375, "learning_rate": 9.894981718071225e-06, "loss": 1.635238766670227, "step": 1280 }, { "epoch": 0.23336670610721763, "grad_norm": 7.96875, "learning_rate": 9.89463262056456e-06, "loss": 1.4350322484970093, "step": 1282 }, { "epoch": 0.23373077273140985, "grad_norm": 7.15625, "learning_rate": 9.894282951536342e-06, "loss": 1.4168959856033325, "step": 1284 }, { "epoch": 0.23409483935560207, "grad_norm": 26.375, "learning_rate": 9.893932711037885e-06, "loss": 1.4836479425430298, "step": 1286 }, { "epoch": 0.2344589059797943, "grad_norm": 105.5, "learning_rate": 9.893581899120582e-06, "loss": 0.9905297756195068, "step": 1288 }, { "epoch": 0.23482297260398652, "grad_norm": 5.21875, "learning_rate": 9.893230515835915e-06, "loss": 0.49923619627952576, "step": 1290 }, { "epoch": 0.23518703922817877, "grad_norm": 6.625, "learning_rate": 9.892878561235448e-06, "loss": 1.3243517875671387, "step": 1292 }, { "epoch": 0.235551105852371, "grad_norm": 9.1875, "learning_rate": 9.892526035370829e-06, "loss": 1.0172383785247803, "step": 1294 }, { "epoch": 0.2359151724765632, "grad_norm": 26.875, "learning_rate": 9.89217293829379e-06, "loss": 1.0750057697296143, "step": 1296 }, { "epoch": 0.23627923910075543, "grad_norm": 55.75, "learning_rate": 9.891819270056143e-06, "loss": 0.7329437732696533, "step": 1298 }, { "epoch": 0.23664330572494766, "grad_norm": 22.625, "learning_rate": 9.891465030709792e-06, "loss": 1.4872076511383057, "step": 1300 }, { "epoch": 0.2370073723491399, "grad_norm": 9.3125, "learning_rate": 9.891110220306717e-06, "loss": 1.3877533674240112, "step": 1302 }, { "epoch": 0.23737143897333213, "grad_norm": 14.5625, "learning_rate": 9.890754838898988e-06, "loss": 1.449622631072998, "step": 1304 }, { "epoch": 0.23773550559752435, "grad_norm": 14.0, "learning_rate": 9.890398886538754e-06, "loss": 1.658739686012268, "step": 1306 }, { "epoch": 0.23809957222171657, "grad_norm": 11.375, "learning_rate": 9.890042363278252e-06, "loss": 1.7893016338348389, "step": 1308 }, { "epoch": 0.2384636388459088, "grad_norm": 2.15625, "learning_rate": 9.889685269169795e-06, "loss": 0.8570665121078491, "step": 1310 }, { "epoch": 0.23882770547010101, "grad_norm": 14.25, "learning_rate": 9.889327604265789e-06, "loss": 1.2315696477890015, "step": 1312 }, { "epoch": 0.23919177209429326, "grad_norm": 40.25, "learning_rate": 9.88896936861872e-06, "loss": 1.3997361660003662, "step": 1314 }, { "epoch": 0.2395558387184855, "grad_norm": 7.6875, "learning_rate": 9.888610562281156e-06, "loss": 1.5080149173736572, "step": 1316 }, { "epoch": 0.2399199053426777, "grad_norm": 8.0625, "learning_rate": 9.888251185305751e-06, "loss": 1.3581037521362305, "step": 1318 }, { "epoch": 0.24028397196686993, "grad_norm": 11.3125, "learning_rate": 9.887891237745243e-06, "loss": 0.9687387347221375, "step": 1320 }, { "epoch": 0.24064803859106215, "grad_norm": 20.875, "learning_rate": 9.887530719652452e-06, "loss": 1.9171817302703857, "step": 1322 }, { "epoch": 0.2410121052152544, "grad_norm": 17.375, "learning_rate": 9.887169631080282e-06, "loss": 1.739109754562378, "step": 1324 }, { "epoch": 0.24137617183944662, "grad_norm": 15.9375, "learning_rate": 9.886807972081723e-06, "loss": 1.485357642173767, "step": 1326 }, { "epoch": 0.24174023846363885, "grad_norm": 7.3125, "learning_rate": 9.886445742709844e-06, "loss": 1.400372862815857, "step": 1328 }, { "epoch": 0.24210430508783107, "grad_norm": 23.0, "learning_rate": 9.886082943017804e-06, "loss": 1.637008547782898, "step": 1330 }, { "epoch": 0.2424683717120233, "grad_norm": 6.9375, "learning_rate": 9.88571957305884e-06, "loss": 1.6923367977142334, "step": 1332 }, { "epoch": 0.24283243833621554, "grad_norm": 50.5, "learning_rate": 9.885355632886278e-06, "loss": 1.4662747383117676, "step": 1334 }, { "epoch": 0.24319650496040776, "grad_norm": 11.875, "learning_rate": 9.884991122553522e-06, "loss": 1.4708256721496582, "step": 1336 }, { "epoch": 0.24356057158459998, "grad_norm": 15.5625, "learning_rate": 9.884626042114063e-06, "loss": 1.5708904266357422, "step": 1338 }, { "epoch": 0.2439246382087922, "grad_norm": 74.0, "learning_rate": 9.884260391621477e-06, "loss": 1.4456716775894165, "step": 1340 }, { "epoch": 0.24428870483298443, "grad_norm": 15.0, "learning_rate": 9.883894171129418e-06, "loss": 1.475473403930664, "step": 1342 }, { "epoch": 0.24465277145717668, "grad_norm": 6.9375, "learning_rate": 9.883527380691628e-06, "loss": 1.3831150531768799, "step": 1344 }, { "epoch": 0.2450168380813689, "grad_norm": 23.125, "learning_rate": 9.883160020361938e-06, "loss": 1.4802080392837524, "step": 1346 }, { "epoch": 0.24538090470556112, "grad_norm": 40.75, "learning_rate": 9.882792090194248e-06, "loss": 1.7584106922149658, "step": 1348 }, { "epoch": 0.24574497132975334, "grad_norm": 12.0, "learning_rate": 9.882423590242556e-06, "loss": 1.4928392171859741, "step": 1350 }, { "epoch": 0.24610903795394556, "grad_norm": 13.375, "learning_rate": 9.882054520560936e-06, "loss": 1.6194241046905518, "step": 1352 }, { "epoch": 0.24647310457813779, "grad_norm": 10.75, "learning_rate": 9.881684881203547e-06, "loss": 1.4315046072006226, "step": 1354 }, { "epoch": 0.24683717120233004, "grad_norm": 8.125, "learning_rate": 9.881314672224634e-06, "loss": 1.2643805742263794, "step": 1356 }, { "epoch": 0.24720123782652226, "grad_norm": 21.75, "learning_rate": 9.880943893678523e-06, "loss": 1.4243457317352295, "step": 1358 }, { "epoch": 0.24756530445071448, "grad_norm": 6.96875, "learning_rate": 9.880572545619622e-06, "loss": 1.4940712451934814, "step": 1360 }, { "epoch": 0.2479293710749067, "grad_norm": 3.65625, "learning_rate": 9.880200628102427e-06, "loss": 0.874103844165802, "step": 1362 }, { "epoch": 0.24829343769909892, "grad_norm": 23.5, "learning_rate": 9.879828141181515e-06, "loss": 1.1623986959457397, "step": 1364 }, { "epoch": 0.24865750432329117, "grad_norm": 12.5, "learning_rate": 9.879455084911547e-06, "loss": 1.583655834197998, "step": 1366 }, { "epoch": 0.2490215709474834, "grad_norm": 9.3125, "learning_rate": 9.879081459347268e-06, "loss": 1.4835476875305176, "step": 1368 }, { "epoch": 0.24938563757167562, "grad_norm": 18.75, "learning_rate": 9.878707264543504e-06, "loss": 1.3770345449447632, "step": 1370 }, { "epoch": 0.24974970419586784, "grad_norm": 17.875, "learning_rate": 9.878332500555168e-06, "loss": 1.46439528465271, "step": 1372 }, { "epoch": 0.2501137708200601, "grad_norm": 31.375, "learning_rate": 9.877957167437258e-06, "loss": 1.293486475944519, "step": 1374 }, { "epoch": 0.2504778374442523, "grad_norm": 9.3125, "learning_rate": 9.877581265244847e-06, "loss": 1.4575697183609009, "step": 1376 }, { "epoch": 0.25084190406844453, "grad_norm": 12.1875, "learning_rate": 9.8772047940331e-06, "loss": 1.5639668703079224, "step": 1378 }, { "epoch": 0.2512059706926367, "grad_norm": 8.75, "learning_rate": 9.87682775385726e-06, "loss": 1.3468657732009888, "step": 1380 }, { "epoch": 0.251570037316829, "grad_norm": 11.3125, "learning_rate": 9.876450144772663e-06, "loss": 1.6191599369049072, "step": 1382 }, { "epoch": 0.2519341039410212, "grad_norm": 7.9375, "learning_rate": 9.876071966834715e-06, "loss": 1.406794548034668, "step": 1384 }, { "epoch": 0.2522981705652134, "grad_norm": 237.0, "learning_rate": 9.875693220098915e-06, "loss": 1.6290373802185059, "step": 1386 }, { "epoch": 0.25266223718940567, "grad_norm": 9.6875, "learning_rate": 9.87531390462084e-06, "loss": 1.4231597185134888, "step": 1388 }, { "epoch": 0.25302630381359786, "grad_norm": 9.375, "learning_rate": 9.874934020456158e-06, "loss": 1.5313165187835693, "step": 1390 }, { "epoch": 0.2533903704377901, "grad_norm": 10.75, "learning_rate": 9.874553567660607e-06, "loss": 1.6047847270965576, "step": 1392 }, { "epoch": 0.25375443706198236, "grad_norm": 16.375, "learning_rate": 9.874172546290028e-06, "loss": 1.9918204545974731, "step": 1394 }, { "epoch": 0.25411850368617456, "grad_norm": 11.0, "learning_rate": 9.873790956400325e-06, "loss": 1.765520691871643, "step": 1396 }, { "epoch": 0.2544825703103668, "grad_norm": 9.25, "learning_rate": 9.873408798047498e-06, "loss": 1.5310132503509521, "step": 1398 }, { "epoch": 0.254846636934559, "grad_norm": 7.5625, "learning_rate": 9.873026071287627e-06, "loss": 1.395627737045288, "step": 1400 }, { "epoch": 0.25521070355875125, "grad_norm": 31.625, "learning_rate": 9.87264277617688e-06, "loss": 1.5362725257873535, "step": 1402 }, { "epoch": 0.2555747701829435, "grad_norm": 33.5, "learning_rate": 9.872258912771497e-06, "loss": 1.6543318033218384, "step": 1404 }, { "epoch": 0.2559388368071357, "grad_norm": 8.3125, "learning_rate": 9.871874481127813e-06, "loss": 1.240380883216858, "step": 1406 }, { "epoch": 0.25630290343132794, "grad_norm": 28.625, "learning_rate": 9.871489481302239e-06, "loss": 1.4505254030227661, "step": 1408 }, { "epoch": 0.25666697005552014, "grad_norm": 27.5, "learning_rate": 9.871103913351273e-06, "loss": 1.2581229209899902, "step": 1410 }, { "epoch": 0.2570310366797124, "grad_norm": 9.875, "learning_rate": 9.870717777331497e-06, "loss": 1.4536426067352295, "step": 1412 }, { "epoch": 0.25739510330390464, "grad_norm": 17.75, "learning_rate": 9.870331073299576e-06, "loss": 1.31911039352417, "step": 1414 }, { "epoch": 0.25775916992809683, "grad_norm": 5.4375, "learning_rate": 9.86994380131225e-06, "loss": 1.2973837852478027, "step": 1416 }, { "epoch": 0.2581232365522891, "grad_norm": 12.0, "learning_rate": 9.869555961426356e-06, "loss": 1.1395800113677979, "step": 1418 }, { "epoch": 0.2584873031764813, "grad_norm": 6.96875, "learning_rate": 9.869167553698809e-06, "loss": 1.3528611660003662, "step": 1420 }, { "epoch": 0.2588513698006735, "grad_norm": 14.9375, "learning_rate": 9.868778578186602e-06, "loss": 1.6839865446090698, "step": 1422 }, { "epoch": 0.2592154364248658, "grad_norm": 11.375, "learning_rate": 9.86838903494682e-06, "loss": 2.011813163757324, "step": 1424 }, { "epoch": 0.25957950304905797, "grad_norm": 12.875, "learning_rate": 9.867998924036622e-06, "loss": 1.4388245344161987, "step": 1426 }, { "epoch": 0.2599435696732502, "grad_norm": 12.375, "learning_rate": 9.867608245513256e-06, "loss": 1.5373148918151855, "step": 1428 }, { "epoch": 0.2603076362974424, "grad_norm": 10.5, "learning_rate": 9.867216999434057e-06, "loss": 1.4208545684814453, "step": 1430 }, { "epoch": 0.26067170292163466, "grad_norm": 14.125, "learning_rate": 9.866825185856435e-06, "loss": 0.8961284756660461, "step": 1432 }, { "epoch": 0.2610357695458269, "grad_norm": 16.25, "learning_rate": 9.866432804837886e-06, "loss": 0.7015911936759949, "step": 1434 }, { "epoch": 0.2613998361700191, "grad_norm": 11.3125, "learning_rate": 9.866039856435994e-06, "loss": 1.6991215944290161, "step": 1436 }, { "epoch": 0.26176390279421136, "grad_norm": 6.9375, "learning_rate": 9.865646340708422e-06, "loss": 1.207765817642212, "step": 1438 }, { "epoch": 0.26212796941840355, "grad_norm": 48.75, "learning_rate": 9.865252257712914e-06, "loss": 1.382554531097412, "step": 1440 }, { "epoch": 0.2624920360425958, "grad_norm": 5.09375, "learning_rate": 9.864857607507302e-06, "loss": 1.594775676727295, "step": 1442 }, { "epoch": 0.26285610266678805, "grad_norm": 19.125, "learning_rate": 9.864462390149499e-06, "loss": 1.1088712215423584, "step": 1444 }, { "epoch": 0.26322016929098024, "grad_norm": 8.4375, "learning_rate": 9.8640666056975e-06, "loss": 1.2831988334655762, "step": 1446 }, { "epoch": 0.2635842359151725, "grad_norm": 9.875, "learning_rate": 9.863670254209388e-06, "loss": 1.4446523189544678, "step": 1448 }, { "epoch": 0.2639483025393647, "grad_norm": 9.125, "learning_rate": 9.863273335743324e-06, "loss": 1.4284683465957642, "step": 1450 }, { "epoch": 0.26431236916355694, "grad_norm": 11.875, "learning_rate": 9.862875850357553e-06, "loss": 1.5124616622924805, "step": 1452 }, { "epoch": 0.26467643578774913, "grad_norm": 3.859375, "learning_rate": 9.862477798110408e-06, "loss": 1.2909436225891113, "step": 1454 }, { "epoch": 0.2650405024119414, "grad_norm": 9.25, "learning_rate": 9.862079179060298e-06, "loss": 1.0865919589996338, "step": 1456 }, { "epoch": 0.26540456903613363, "grad_norm": 34.0, "learning_rate": 9.86167999326572e-06, "loss": 1.8287204504013062, "step": 1458 }, { "epoch": 0.2657686356603258, "grad_norm": 17.375, "learning_rate": 9.861280240785257e-06, "loss": 1.4243782758712769, "step": 1460 }, { "epoch": 0.2661327022845181, "grad_norm": 11.0, "learning_rate": 9.860879921677561e-06, "loss": 1.4999065399169922, "step": 1462 }, { "epoch": 0.26649676890871027, "grad_norm": 6.53125, "learning_rate": 9.860479036001386e-06, "loss": 1.460152268409729, "step": 1464 }, { "epoch": 0.2668608355329025, "grad_norm": 10.1875, "learning_rate": 9.86007758381556e-06, "loss": 1.4036602973937988, "step": 1466 }, { "epoch": 0.26722490215709477, "grad_norm": 21.125, "learning_rate": 9.859675565178988e-06, "loss": 1.90579354763031, "step": 1468 }, { "epoch": 0.26758896878128696, "grad_norm": 24.125, "learning_rate": 9.859272980150669e-06, "loss": 1.1752175092697144, "step": 1470 }, { "epoch": 0.2679530354054792, "grad_norm": 11.625, "learning_rate": 9.858869828789684e-06, "loss": 1.4299908876419067, "step": 1472 }, { "epoch": 0.2683171020296714, "grad_norm": 10.0625, "learning_rate": 9.858466111155188e-06, "loss": 1.4527727365493774, "step": 1474 }, { "epoch": 0.26868116865386366, "grad_norm": 7.375, "learning_rate": 9.858061827306427e-06, "loss": 1.5154187679290771, "step": 1476 }, { "epoch": 0.2690452352780559, "grad_norm": 13.4375, "learning_rate": 9.857656977302727e-06, "loss": 1.2620866298675537, "step": 1478 }, { "epoch": 0.2694093019022481, "grad_norm": 8.8125, "learning_rate": 9.857251561203503e-06, "loss": 1.7393077611923218, "step": 1480 }, { "epoch": 0.26977336852644035, "grad_norm": 13.375, "learning_rate": 9.856845579068242e-06, "loss": 1.2178714275360107, "step": 1482 }, { "epoch": 0.27013743515063254, "grad_norm": 15.125, "learning_rate": 9.856439030956521e-06, "loss": 1.5004127025604248, "step": 1484 }, { "epoch": 0.2705015017748248, "grad_norm": 14.875, "learning_rate": 9.856031916928004e-06, "loss": 1.3529198169708252, "step": 1486 }, { "epoch": 0.27086556839901704, "grad_norm": 15.1875, "learning_rate": 9.85562423704243e-06, "loss": 1.7885589599609375, "step": 1488 }, { "epoch": 0.27122963502320924, "grad_norm": 12.8125, "learning_rate": 9.855215991359624e-06, "loss": 1.7847390174865723, "step": 1490 }, { "epoch": 0.2715937016474015, "grad_norm": 31.875, "learning_rate": 9.854807179939493e-06, "loss": 1.7920869588851929, "step": 1492 }, { "epoch": 0.2719577682715937, "grad_norm": 10.6875, "learning_rate": 9.854397802842036e-06, "loss": 1.6153438091278076, "step": 1494 }, { "epoch": 0.27232183489578593, "grad_norm": 23.0, "learning_rate": 9.853987860127316e-06, "loss": 1.0620189905166626, "step": 1496 }, { "epoch": 0.2726859015199782, "grad_norm": 30.875, "learning_rate": 9.8535773518555e-06, "loss": 1.0283299684524536, "step": 1498 }, { "epoch": 0.2730499681441704, "grad_norm": 21.0, "learning_rate": 9.853166278086823e-06, "loss": 1.4579250812530518, "step": 1500 }, { "epoch": 0.2734140347683626, "grad_norm": 6.96875, "learning_rate": 9.852754638881611e-06, "loss": 0.9517868757247925, "step": 1502 }, { "epoch": 0.2737781013925548, "grad_norm": 26.375, "learning_rate": 9.85234243430027e-06, "loss": 1.4180676937103271, "step": 1504 }, { "epoch": 0.27414216801674707, "grad_norm": 17.125, "learning_rate": 9.851929664403288e-06, "loss": 1.859014630317688, "step": 1506 }, { "epoch": 0.2745062346409393, "grad_norm": 26.0, "learning_rate": 9.851516329251238e-06, "loss": 1.5463061332702637, "step": 1508 }, { "epoch": 0.2748703012651315, "grad_norm": 15.4375, "learning_rate": 9.851102428904777e-06, "loss": 1.5154248476028442, "step": 1510 }, { "epoch": 0.27523436788932376, "grad_norm": 11.0625, "learning_rate": 9.85068796342464e-06, "loss": 1.578622579574585, "step": 1512 }, { "epoch": 0.27559843451351596, "grad_norm": 13.3125, "learning_rate": 9.850272932871652e-06, "loss": 1.9917669296264648, "step": 1514 }, { "epoch": 0.2759625011377082, "grad_norm": 14.125, "learning_rate": 9.849857337306715e-06, "loss": 1.6202292442321777, "step": 1516 }, { "epoch": 0.27632656776190045, "grad_norm": 9.4375, "learning_rate": 9.849441176790812e-06, "loss": 1.3205862045288086, "step": 1518 }, { "epoch": 0.27669063438609265, "grad_norm": 4.40625, "learning_rate": 9.849024451385019e-06, "loss": 1.511529803276062, "step": 1520 }, { "epoch": 0.2770547010102849, "grad_norm": 4.21875, "learning_rate": 9.848607161150488e-06, "loss": 1.0927166938781738, "step": 1522 }, { "epoch": 0.2774187676344771, "grad_norm": 13.5625, "learning_rate": 9.848189306148453e-06, "loss": 1.257932186126709, "step": 1524 }, { "epoch": 0.27778283425866934, "grad_norm": 15.0, "learning_rate": 9.847770886440229e-06, "loss": 1.434149980545044, "step": 1526 }, { "epoch": 0.27814690088286154, "grad_norm": 18.25, "learning_rate": 9.847351902087225e-06, "loss": 1.5815212726593018, "step": 1528 }, { "epoch": 0.2785109675070538, "grad_norm": 39.5, "learning_rate": 9.84693235315092e-06, "loss": 1.7690224647521973, "step": 1530 }, { "epoch": 0.27887503413124604, "grad_norm": 17.375, "learning_rate": 9.846512239692883e-06, "loss": 1.7450687885284424, "step": 1532 }, { "epoch": 0.27923910075543823, "grad_norm": 14.5, "learning_rate": 9.846091561774762e-06, "loss": 1.1304049491882324, "step": 1534 }, { "epoch": 0.2796031673796305, "grad_norm": 25.625, "learning_rate": 9.84567031945829e-06, "loss": 1.400388479232788, "step": 1536 }, { "epoch": 0.2799672340038227, "grad_norm": 26.25, "learning_rate": 9.845248512805288e-06, "loss": 1.3323603868484497, "step": 1538 }, { "epoch": 0.2803313006280149, "grad_norm": 9.1875, "learning_rate": 9.844826141877646e-06, "loss": 0.9121460318565369, "step": 1540 }, { "epoch": 0.2806953672522072, "grad_norm": 78.0, "learning_rate": 9.844403206737352e-06, "loss": 1.4342448711395264, "step": 1542 }, { "epoch": 0.28105943387639937, "grad_norm": 9.75, "learning_rate": 9.843979707446468e-06, "loss": 1.3524324893951416, "step": 1544 }, { "epoch": 0.2814235005005916, "grad_norm": 18.5, "learning_rate": 9.843555644067138e-06, "loss": 0.9413845539093018, "step": 1546 }, { "epoch": 0.2817875671247838, "grad_norm": 8.0625, "learning_rate": 9.843131016661594e-06, "loss": 1.6236538887023926, "step": 1548 }, { "epoch": 0.28215163374897606, "grad_norm": 16.75, "learning_rate": 9.84270582529215e-06, "loss": 1.3235279321670532, "step": 1550 }, { "epoch": 0.2825157003731683, "grad_norm": 17.875, "learning_rate": 9.842280070021198e-06, "loss": 1.366626501083374, "step": 1552 }, { "epoch": 0.2828797669973605, "grad_norm": 15.75, "learning_rate": 9.841853750911216e-06, "loss": 1.478304386138916, "step": 1554 }, { "epoch": 0.28324383362155275, "grad_norm": 20.375, "learning_rate": 9.841426868024768e-06, "loss": 1.4233434200286865, "step": 1556 }, { "epoch": 0.28360790024574495, "grad_norm": 7.75, "learning_rate": 9.840999421424494e-06, "loss": 1.4747873544692993, "step": 1558 }, { "epoch": 0.2839719668699372, "grad_norm": 9.1875, "learning_rate": 9.84057141117312e-06, "loss": 1.232305645942688, "step": 1560 }, { "epoch": 0.28433603349412945, "grad_norm": 8.25, "learning_rate": 9.840142837333457e-06, "loss": 1.073418378829956, "step": 1562 }, { "epoch": 0.28470010011832164, "grad_norm": 4.125, "learning_rate": 9.839713699968396e-06, "loss": 1.256094217300415, "step": 1564 }, { "epoch": 0.2850641667425139, "grad_norm": 41.0, "learning_rate": 9.839283999140909e-06, "loss": 1.3534482717514038, "step": 1566 }, { "epoch": 0.2854282333667061, "grad_norm": 20.25, "learning_rate": 9.838853734914055e-06, "loss": 1.4696502685546875, "step": 1568 }, { "epoch": 0.28579229999089834, "grad_norm": 6.0, "learning_rate": 9.838422907350972e-06, "loss": 1.550337314605713, "step": 1570 }, { "epoch": 0.2861563666150906, "grad_norm": 14.3125, "learning_rate": 9.837991516514886e-06, "loss": 1.1209887266159058, "step": 1572 }, { "epoch": 0.2865204332392828, "grad_norm": 13.0625, "learning_rate": 9.837559562469096e-06, "loss": 1.3484582901000977, "step": 1574 }, { "epoch": 0.28688449986347503, "grad_norm": 8.9375, "learning_rate": 9.837127045276996e-06, "loss": 1.2887582778930664, "step": 1576 }, { "epoch": 0.2872485664876672, "grad_norm": 10.125, "learning_rate": 9.83669396500205e-06, "loss": 1.5810158252716064, "step": 1578 }, { "epoch": 0.2876126331118595, "grad_norm": 17.5, "learning_rate": 9.836260321707813e-06, "loss": 1.284956932067871, "step": 1580 }, { "epoch": 0.2879766997360517, "grad_norm": 12.75, "learning_rate": 9.835826115457922e-06, "loss": 1.606035590171814, "step": 1582 }, { "epoch": 0.2883407663602439, "grad_norm": 8.5625, "learning_rate": 9.835391346316093e-06, "loss": 1.323326587677002, "step": 1584 }, { "epoch": 0.28870483298443617, "grad_norm": 10.3125, "learning_rate": 9.834956014346127e-06, "loss": 1.0132527351379395, "step": 1586 }, { "epoch": 0.28906889960862836, "grad_norm": 12.25, "learning_rate": 9.834520119611908e-06, "loss": 1.1451250314712524, "step": 1588 }, { "epoch": 0.2894329662328206, "grad_norm": 3.671875, "learning_rate": 9.834083662177403e-06, "loss": 1.3297022581100464, "step": 1590 }, { "epoch": 0.28979703285701286, "grad_norm": 9.8125, "learning_rate": 9.833646642106657e-06, "loss": 1.092158555984497, "step": 1592 }, { "epoch": 0.29016109948120505, "grad_norm": 11.125, "learning_rate": 9.833209059463804e-06, "loss": 1.427994728088379, "step": 1594 }, { "epoch": 0.2905251661053973, "grad_norm": 10.625, "learning_rate": 9.832770914313055e-06, "loss": 1.4076528549194336, "step": 1596 }, { "epoch": 0.2908892327295895, "grad_norm": 11.25, "learning_rate": 9.832332206718706e-06, "loss": 1.4831396341323853, "step": 1598 }, { "epoch": 0.29125329935378175, "grad_norm": 6.4375, "learning_rate": 9.83189293674514e-06, "loss": 1.3140579462051392, "step": 1600 }, { "epoch": 0.291617365977974, "grad_norm": 19.125, "learning_rate": 9.831453104456812e-06, "loss": 1.156083345413208, "step": 1602 }, { "epoch": 0.2919814326021662, "grad_norm": 8.3125, "learning_rate": 9.83101270991827e-06, "loss": 0.5887618064880371, "step": 1604 }, { "epoch": 0.29234549922635844, "grad_norm": 6.125, "learning_rate": 9.830571753194137e-06, "loss": 1.233492374420166, "step": 1606 }, { "epoch": 0.29270956585055063, "grad_norm": 8.875, "learning_rate": 9.830130234349127e-06, "loss": 1.5636699199676514, "step": 1608 }, { "epoch": 0.2930736324747429, "grad_norm": 15.5, "learning_rate": 9.829688153448023e-06, "loss": 1.3661271333694458, "step": 1610 }, { "epoch": 0.2934376990989351, "grad_norm": 9.0625, "learning_rate": 9.829245510555704e-06, "loss": 1.3555999994277954, "step": 1612 }, { "epoch": 0.29380176572312733, "grad_norm": 11.125, "learning_rate": 9.828802305737127e-06, "loss": 1.480827808380127, "step": 1614 }, { "epoch": 0.2941658323473196, "grad_norm": 13.3125, "learning_rate": 9.828358539057325e-06, "loss": 1.3503471612930298, "step": 1616 }, { "epoch": 0.29452989897151177, "grad_norm": 5.625, "learning_rate": 9.827914210581425e-06, "loss": 1.2063119411468506, "step": 1618 }, { "epoch": 0.294893965595704, "grad_norm": 6.21875, "learning_rate": 9.827469320374627e-06, "loss": 1.237090826034546, "step": 1620 }, { "epoch": 0.2952580322198962, "grad_norm": 19.75, "learning_rate": 9.827023868502218e-06, "loss": 1.2755967378616333, "step": 1622 }, { "epoch": 0.29562209884408847, "grad_norm": 12.5625, "learning_rate": 9.826577855029564e-06, "loss": 1.341290831565857, "step": 1624 }, { "epoch": 0.2959861654682807, "grad_norm": 6.90625, "learning_rate": 9.82613128002212e-06, "loss": 1.2220969200134277, "step": 1626 }, { "epoch": 0.2963502320924729, "grad_norm": 14.25, "learning_rate": 9.825684143545416e-06, "loss": 1.5383570194244385, "step": 1628 }, { "epoch": 0.29671429871666516, "grad_norm": 8.9375, "learning_rate": 9.825236445665068e-06, "loss": 1.8045324087142944, "step": 1630 }, { "epoch": 0.29707836534085735, "grad_norm": 9.625, "learning_rate": 9.824788186446771e-06, "loss": 1.0440577268600464, "step": 1632 }, { "epoch": 0.2974424319650496, "grad_norm": 15.3125, "learning_rate": 9.824339365956313e-06, "loss": 1.281540036201477, "step": 1634 }, { "epoch": 0.29780649858924185, "grad_norm": 7.625, "learning_rate": 9.823889984259546e-06, "loss": 0.13898517191410065, "step": 1636 }, { "epoch": 0.29817056521343405, "grad_norm": 21.0, "learning_rate": 9.823440041422424e-06, "loss": 0.4255257546901703, "step": 1638 }, { "epoch": 0.2985346318376263, "grad_norm": 19.75, "learning_rate": 9.822989537510975e-06, "loss": 1.3400230407714844, "step": 1640 }, { "epoch": 0.2988986984618185, "grad_norm": 63.5, "learning_rate": 9.8225384725913e-06, "loss": 0.9882588386535645, "step": 1642 }, { "epoch": 0.29926276508601074, "grad_norm": 11.8125, "learning_rate": 9.822086846729595e-06, "loss": 1.5584814548492432, "step": 1644 }, { "epoch": 0.299626831710203, "grad_norm": 8.9375, "learning_rate": 9.821634659992137e-06, "loss": 1.435099482536316, "step": 1646 }, { "epoch": 0.2999908983343952, "grad_norm": 6.3125, "learning_rate": 9.821181912445278e-06, "loss": 0.9147496819496155, "step": 1648 }, { "epoch": 0.30035496495858743, "grad_norm": 9.0625, "learning_rate": 9.820728604155461e-06, "loss": 1.538068175315857, "step": 1650 }, { "epoch": 0.30071903158277963, "grad_norm": 21.25, "learning_rate": 9.820274735189203e-06, "loss": 1.2914254665374756, "step": 1652 }, { "epoch": 0.3010830982069719, "grad_norm": 14.1875, "learning_rate": 9.819820305613113e-06, "loss": 1.0761820077896118, "step": 1654 }, { "epoch": 0.3014471648311641, "grad_norm": 23.5, "learning_rate": 9.819365315493871e-06, "loss": 1.4292621612548828, "step": 1656 }, { "epoch": 0.3018112314553563, "grad_norm": 33.5, "learning_rate": 9.818909764898251e-06, "loss": 1.3318498134613037, "step": 1658 }, { "epoch": 0.30217529807954857, "grad_norm": 17.625, "learning_rate": 9.818453653893097e-06, "loss": 0.6170845627784729, "step": 1660 }, { "epoch": 0.30253936470374077, "grad_norm": 28.125, "learning_rate": 9.817996982545346e-06, "loss": 1.8151825666427612, "step": 1662 }, { "epoch": 0.302903431327933, "grad_norm": 8.1875, "learning_rate": 9.81753975092201e-06, "loss": 1.3903858661651611, "step": 1664 }, { "epoch": 0.30326749795212526, "grad_norm": 10.1875, "learning_rate": 9.817081959090184e-06, "loss": 1.486268401145935, "step": 1666 }, { "epoch": 0.30363156457631746, "grad_norm": 12.9375, "learning_rate": 9.816623607117053e-06, "loss": 1.3959635496139526, "step": 1668 }, { "epoch": 0.3039956312005097, "grad_norm": 19.125, "learning_rate": 9.816164695069874e-06, "loss": 1.384684443473816, "step": 1670 }, { "epoch": 0.3043596978247019, "grad_norm": 9.9375, "learning_rate": 9.81570522301599e-06, "loss": 0.8759585022926331, "step": 1672 }, { "epoch": 0.30472376444889415, "grad_norm": 18.625, "learning_rate": 9.815245191022832e-06, "loss": 1.1100056171417236, "step": 1674 }, { "epoch": 0.3050878310730864, "grad_norm": 5.1875, "learning_rate": 9.8147845991579e-06, "loss": 0.5108053684234619, "step": 1676 }, { "epoch": 0.3054518976972786, "grad_norm": 16.0, "learning_rate": 9.81432344748879e-06, "loss": 1.3095556497573853, "step": 1678 }, { "epoch": 0.30581596432147085, "grad_norm": 16.375, "learning_rate": 9.813861736083172e-06, "loss": 1.2732908725738525, "step": 1680 }, { "epoch": 0.30618003094566304, "grad_norm": 8.625, "learning_rate": 9.813399465008802e-06, "loss": 1.582122802734375, "step": 1682 }, { "epoch": 0.3065440975698553, "grad_norm": 4.90625, "learning_rate": 9.812936634333512e-06, "loss": 0.9677426218986511, "step": 1684 }, { "epoch": 0.3069081641940475, "grad_norm": 9.4375, "learning_rate": 9.812473244125225e-06, "loss": 1.2957812547683716, "step": 1686 }, { "epoch": 0.30727223081823973, "grad_norm": 7.3125, "learning_rate": 9.812009294451939e-06, "loss": 1.3681244850158691, "step": 1688 }, { "epoch": 0.307636297442432, "grad_norm": 10.25, "learning_rate": 9.811544785381738e-06, "loss": 1.2751357555389404, "step": 1690 }, { "epoch": 0.3080003640666242, "grad_norm": 55.0, "learning_rate": 9.811079716982787e-06, "loss": 1.1589667797088623, "step": 1692 }, { "epoch": 0.3083644306908164, "grad_norm": 11.75, "learning_rate": 9.810614089323333e-06, "loss": 1.3476650714874268, "step": 1694 }, { "epoch": 0.3087284973150086, "grad_norm": 60.25, "learning_rate": 9.810147902471706e-06, "loss": 1.2627832889556885, "step": 1696 }, { "epoch": 0.30909256393920087, "grad_norm": 64.0, "learning_rate": 9.809681156496313e-06, "loss": 0.7154263854026794, "step": 1698 }, { "epoch": 0.3094566305633931, "grad_norm": 7.0, "learning_rate": 9.809213851465652e-06, "loss": 1.081131100654602, "step": 1700 }, { "epoch": 0.3098206971875853, "grad_norm": 15.625, "learning_rate": 9.808745987448292e-06, "loss": 1.3883049488067627, "step": 1702 }, { "epoch": 0.31018476381177756, "grad_norm": 26.5, "learning_rate": 9.808277564512896e-06, "loss": 1.5548325777053833, "step": 1704 }, { "epoch": 0.31054883043596976, "grad_norm": 13.4375, "learning_rate": 9.8078085827282e-06, "loss": 1.3953886032104492, "step": 1706 }, { "epoch": 0.310912897060162, "grad_norm": 6.375, "learning_rate": 9.807339042163027e-06, "loss": 1.2681118249893188, "step": 1708 }, { "epoch": 0.31127696368435426, "grad_norm": 20.25, "learning_rate": 9.80686894288628e-06, "loss": 1.185044765472412, "step": 1710 }, { "epoch": 0.31164103030854645, "grad_norm": 17.75, "learning_rate": 9.806398284966943e-06, "loss": 1.1058142185211182, "step": 1712 }, { "epoch": 0.3120050969327387, "grad_norm": 4.375, "learning_rate": 9.805927068474083e-06, "loss": 1.2485921382904053, "step": 1714 }, { "epoch": 0.3123691635569309, "grad_norm": 15.875, "learning_rate": 9.805455293476848e-06, "loss": 1.3581323623657227, "step": 1716 }, { "epoch": 0.31273323018112315, "grad_norm": 17.875, "learning_rate": 9.804982960044475e-06, "loss": 1.1913976669311523, "step": 1718 }, { "epoch": 0.3130972968053154, "grad_norm": 9.5625, "learning_rate": 9.804510068246271e-06, "loss": 1.160415530204773, "step": 1720 }, { "epoch": 0.3134613634295076, "grad_norm": 35.5, "learning_rate": 9.804036618151633e-06, "loss": 1.5496083498001099, "step": 1722 }, { "epoch": 0.31382543005369984, "grad_norm": 23.875, "learning_rate": 9.803562609830037e-06, "loss": 1.7188440561294556, "step": 1724 }, { "epoch": 0.31418949667789203, "grad_norm": 14.0625, "learning_rate": 9.803088043351043e-06, "loss": 1.419533610343933, "step": 1726 }, { "epoch": 0.3145535633020843, "grad_norm": 28.375, "learning_rate": 9.802612918784291e-06, "loss": 1.4445359706878662, "step": 1728 }, { "epoch": 0.31491762992627653, "grad_norm": 6.59375, "learning_rate": 9.802137236199505e-06, "loss": 1.1445637941360474, "step": 1730 }, { "epoch": 0.3152816965504687, "grad_norm": 11.5, "learning_rate": 9.801660995666486e-06, "loss": 1.4296021461486816, "step": 1732 }, { "epoch": 0.315645763174661, "grad_norm": 16.75, "learning_rate": 9.801184197255125e-06, "loss": 1.4084744453430176, "step": 1734 }, { "epoch": 0.31600982979885317, "grad_norm": 6.03125, "learning_rate": 9.800706841035385e-06, "loss": 1.0689120292663574, "step": 1736 }, { "epoch": 0.3163738964230454, "grad_norm": 9.0625, "learning_rate": 9.800228927077322e-06, "loss": 1.274327039718628, "step": 1738 }, { "epoch": 0.31673796304723767, "grad_norm": 14.1875, "learning_rate": 9.799750455451065e-06, "loss": 1.524749994277954, "step": 1740 }, { "epoch": 0.31710202967142986, "grad_norm": 13.125, "learning_rate": 9.799271426226823e-06, "loss": 1.7396551370620728, "step": 1742 }, { "epoch": 0.3174660962956221, "grad_norm": 22.0, "learning_rate": 9.798791839474902e-06, "loss": 1.7445507049560547, "step": 1744 }, { "epoch": 0.3178301629198143, "grad_norm": 39.75, "learning_rate": 9.798311695265672e-06, "loss": 1.9367200136184692, "step": 1746 }, { "epoch": 0.31819422954400656, "grad_norm": 3.28125, "learning_rate": 9.797830993669592e-06, "loss": 1.2434825897216797, "step": 1748 }, { "epoch": 0.3185582961681988, "grad_norm": 7.1875, "learning_rate": 9.797349734757206e-06, "loss": 1.0539696216583252, "step": 1750 }, { "epoch": 0.318922362792391, "grad_norm": 9.9375, "learning_rate": 9.796867918599138e-06, "loss": 1.5123122930526733, "step": 1752 }, { "epoch": 0.31928642941658325, "grad_norm": 9.75, "learning_rate": 9.796385545266086e-06, "loss": 1.2620917558670044, "step": 1754 }, { "epoch": 0.31965049604077544, "grad_norm": 15.0, "learning_rate": 9.795902614828846e-06, "loss": 1.4678680896759033, "step": 1756 }, { "epoch": 0.3200145626649677, "grad_norm": 10.4375, "learning_rate": 9.795419127358276e-06, "loss": 1.2457855939865112, "step": 1758 }, { "epoch": 0.3203786292891599, "grad_norm": 6.15625, "learning_rate": 9.794935082925333e-06, "loss": 1.2296313047409058, "step": 1760 }, { "epoch": 0.32074269591335214, "grad_norm": 6.15625, "learning_rate": 9.794450481601046e-06, "loss": 1.0726215839385986, "step": 1762 }, { "epoch": 0.3211067625375444, "grad_norm": 11.0, "learning_rate": 9.793965323456526e-06, "loss": 1.5971686840057373, "step": 1764 }, { "epoch": 0.3214708291617366, "grad_norm": 5.3125, "learning_rate": 9.793479608562972e-06, "loss": 1.3719642162322998, "step": 1766 }, { "epoch": 0.32183489578592883, "grad_norm": 7.3125, "learning_rate": 9.79299333699166e-06, "loss": 1.2214471101760864, "step": 1768 }, { "epoch": 0.322198962410121, "grad_norm": 6.5625, "learning_rate": 9.792506508813946e-06, "loss": 1.3700158596038818, "step": 1770 }, { "epoch": 0.3225630290343133, "grad_norm": 9.8125, "learning_rate": 9.792019124101273e-06, "loss": 1.152277946472168, "step": 1772 }, { "epoch": 0.3229270956585055, "grad_norm": 17.25, "learning_rate": 9.79153118292516e-06, "loss": 0.443513959646225, "step": 1774 }, { "epoch": 0.3232911622826977, "grad_norm": 11.5, "learning_rate": 9.791042685357212e-06, "loss": 1.2890838384628296, "step": 1776 }, { "epoch": 0.32365522890688997, "grad_norm": 7.71875, "learning_rate": 9.790553631469116e-06, "loss": 1.5680713653564453, "step": 1778 }, { "epoch": 0.32401929553108216, "grad_norm": 23.5, "learning_rate": 9.790064021332633e-06, "loss": 1.4733153581619263, "step": 1780 }, { "epoch": 0.3243833621552744, "grad_norm": 15.4375, "learning_rate": 9.789573855019616e-06, "loss": 1.6985194683074951, "step": 1782 }, { "epoch": 0.32474742877946666, "grad_norm": 9.8125, "learning_rate": 9.789083132601992e-06, "loss": 1.5019042491912842, "step": 1784 }, { "epoch": 0.32511149540365886, "grad_norm": 15.125, "learning_rate": 9.788591854151777e-06, "loss": 1.2202986478805542, "step": 1786 }, { "epoch": 0.3254755620278511, "grad_norm": 764.0, "learning_rate": 9.788100019741059e-06, "loss": 1.676939606666565, "step": 1788 }, { "epoch": 0.3258396286520433, "grad_norm": 17.25, "learning_rate": 9.787607629442015e-06, "loss": 1.8803229331970215, "step": 1790 }, { "epoch": 0.32620369527623555, "grad_norm": 15.0625, "learning_rate": 9.787114683326903e-06, "loss": 1.6080893278121948, "step": 1792 }, { "epoch": 0.3265677619004278, "grad_norm": 5.78125, "learning_rate": 9.786621181468057e-06, "loss": 1.2275793552398682, "step": 1794 }, { "epoch": 0.32693182852462, "grad_norm": 17.125, "learning_rate": 9.786127123937901e-06, "loss": 0.8695844411849976, "step": 1796 }, { "epoch": 0.32729589514881224, "grad_norm": 21.125, "learning_rate": 9.78563251080893e-06, "loss": 0.6618102788925171, "step": 1798 }, { "epoch": 0.32765996177300444, "grad_norm": 15.875, "learning_rate": 9.785137342153733e-06, "loss": 1.7897183895111084, "step": 1800 }, { "epoch": 0.3280240283971967, "grad_norm": 12.5625, "learning_rate": 9.784641618044968e-06, "loss": 1.4144792556762695, "step": 1802 }, { "epoch": 0.32838809502138894, "grad_norm": 11.3125, "learning_rate": 9.784145338555384e-06, "loss": 1.4410724639892578, "step": 1804 }, { "epoch": 0.32875216164558113, "grad_norm": 6.125, "learning_rate": 9.783648503757809e-06, "loss": 1.3081823587417603, "step": 1806 }, { "epoch": 0.3291162282697734, "grad_norm": 3.671875, "learning_rate": 9.783151113725148e-06, "loss": 1.0756101608276367, "step": 1808 }, { "epoch": 0.3294802948939656, "grad_norm": 11.9375, "learning_rate": 9.782653168530397e-06, "loss": 0.8786813020706177, "step": 1810 }, { "epoch": 0.3298443615181578, "grad_norm": 20.75, "learning_rate": 9.78215466824662e-06, "loss": 1.1601600646972656, "step": 1812 }, { "epoch": 0.3302084281423501, "grad_norm": 29.375, "learning_rate": 9.781655612946971e-06, "loss": 1.689734935760498, "step": 1814 }, { "epoch": 0.33057249476654227, "grad_norm": 11.8125, "learning_rate": 9.78115600270469e-06, "loss": 1.6082452535629272, "step": 1816 }, { "epoch": 0.3309365613907345, "grad_norm": 10.9375, "learning_rate": 9.780655837593087e-06, "loss": 1.4208040237426758, "step": 1818 }, { "epoch": 0.3313006280149267, "grad_norm": 15.4375, "learning_rate": 9.780155117685564e-06, "loss": 1.8875669240951538, "step": 1820 }, { "epoch": 0.33166469463911896, "grad_norm": 9.25, "learning_rate": 9.779653843055594e-06, "loss": 1.3704841136932373, "step": 1822 }, { "epoch": 0.3320287612633112, "grad_norm": 16.125, "learning_rate": 9.779152013776743e-06, "loss": 1.2594153881072998, "step": 1824 }, { "epoch": 0.3323928278875034, "grad_norm": 54.5, "learning_rate": 9.778649629922647e-06, "loss": 0.7861361503601074, "step": 1826 }, { "epoch": 0.33275689451169566, "grad_norm": 2.28125, "learning_rate": 9.778146691567034e-06, "loss": 0.9009575843811035, "step": 1828 }, { "epoch": 0.33312096113588785, "grad_norm": 9.9375, "learning_rate": 9.777643198783703e-06, "loss": 1.0703434944152832, "step": 1830 }, { "epoch": 0.3334850277600801, "grad_norm": 6.625, "learning_rate": 9.777139151646545e-06, "loss": 1.5458961725234985, "step": 1832 }, { "epoch": 0.33384909438427235, "grad_norm": 11.1875, "learning_rate": 9.776634550229523e-06, "loss": 1.2762070894241333, "step": 1834 }, { "epoch": 0.33421316100846454, "grad_norm": 25.0, "learning_rate": 9.776129394606684e-06, "loss": 1.1095106601715088, "step": 1836 }, { "epoch": 0.3345772276326568, "grad_norm": 21.875, "learning_rate": 9.77562368485216e-06, "loss": 1.7597427368164062, "step": 1838 }, { "epoch": 0.334941294256849, "grad_norm": 3.46875, "learning_rate": 9.775117421040163e-06, "loss": 0.9171950817108154, "step": 1840 }, { "epoch": 0.33530536088104124, "grad_norm": 14.0625, "learning_rate": 9.774610603244983e-06, "loss": 1.3025262355804443, "step": 1842 }, { "epoch": 0.33566942750523343, "grad_norm": 9.3125, "learning_rate": 9.774103231540995e-06, "loss": 1.7508900165557861, "step": 1844 }, { "epoch": 0.3360334941294257, "grad_norm": 8.3125, "learning_rate": 9.773595306002652e-06, "loss": 1.1705724000930786, "step": 1846 }, { "epoch": 0.33639756075361793, "grad_norm": 10.375, "learning_rate": 9.77308682670449e-06, "loss": 1.5776920318603516, "step": 1848 }, { "epoch": 0.3367616273778101, "grad_norm": 14.75, "learning_rate": 9.77257779372113e-06, "loss": 1.6449886560440063, "step": 1850 }, { "epoch": 0.3371256940020024, "grad_norm": 14.0625, "learning_rate": 9.772068207127265e-06, "loss": 1.361955165863037, "step": 1852 }, { "epoch": 0.33748976062619457, "grad_norm": 10.625, "learning_rate": 9.771558066997677e-06, "loss": 1.4854588508605957, "step": 1854 }, { "epoch": 0.3378538272503868, "grad_norm": 9.625, "learning_rate": 9.77104737340723e-06, "loss": 1.811299204826355, "step": 1856 }, { "epoch": 0.33821789387457907, "grad_norm": 8.9375, "learning_rate": 9.770536126430861e-06, "loss": 1.18406081199646, "step": 1858 }, { "epoch": 0.33858196049877126, "grad_norm": 18.125, "learning_rate": 9.7700243261436e-06, "loss": 0.7963888049125671, "step": 1860 }, { "epoch": 0.3389460271229635, "grad_norm": 19.125, "learning_rate": 9.769511972620542e-06, "loss": 1.499111294746399, "step": 1862 }, { "epoch": 0.3393100937471557, "grad_norm": 18.375, "learning_rate": 9.768999065936883e-06, "loss": 1.6931930780410767, "step": 1864 }, { "epoch": 0.33967416037134796, "grad_norm": 12.0, "learning_rate": 9.768485606167886e-06, "loss": 1.2183482646942139, "step": 1866 }, { "epoch": 0.3400382269955402, "grad_norm": 44.5, "learning_rate": 9.767971593388897e-06, "loss": 1.1583776473999023, "step": 1868 }, { "epoch": 0.3404022936197324, "grad_norm": 6.96875, "learning_rate": 9.767457027675345e-06, "loss": 1.2160083055496216, "step": 1870 }, { "epoch": 0.34076636024392465, "grad_norm": 30.625, "learning_rate": 9.766941909102746e-06, "loss": 1.2918589115142822, "step": 1872 }, { "epoch": 0.34113042686811684, "grad_norm": 16.125, "learning_rate": 9.766426237746685e-06, "loss": 1.6872920989990234, "step": 1874 }, { "epoch": 0.3414944934923091, "grad_norm": 26.25, "learning_rate": 9.765910013682838e-06, "loss": 2.0386478900909424, "step": 1876 }, { "epoch": 0.34185856011650134, "grad_norm": 6.125, "learning_rate": 9.76539323698696e-06, "loss": 1.350124478340149, "step": 1878 }, { "epoch": 0.34222262674069354, "grad_norm": 74.0, "learning_rate": 9.764875907734883e-06, "loss": 1.3060801029205322, "step": 1880 }, { "epoch": 0.3425866933648858, "grad_norm": 23.5, "learning_rate": 9.764358026002523e-06, "loss": 2.2640185356140137, "step": 1882 }, { "epoch": 0.342950759989078, "grad_norm": 30.375, "learning_rate": 9.763839591865881e-06, "loss": 1.190342903137207, "step": 1884 }, { "epoch": 0.34331482661327023, "grad_norm": 20.375, "learning_rate": 9.763320605401032e-06, "loss": 0.9115470051765442, "step": 1886 }, { "epoch": 0.3436788932374625, "grad_norm": 10.875, "learning_rate": 9.762801066684136e-06, "loss": 1.5014320611953735, "step": 1888 }, { "epoch": 0.3440429598616547, "grad_norm": 12.75, "learning_rate": 9.76228097579143e-06, "loss": 1.5600974559783936, "step": 1890 }, { "epoch": 0.3444070264858469, "grad_norm": 16.625, "learning_rate": 9.761760332799239e-06, "loss": 1.4339325428009033, "step": 1892 }, { "epoch": 0.3447710931100391, "grad_norm": 11.8125, "learning_rate": 9.761239137783964e-06, "loss": 1.575667381286621, "step": 1894 }, { "epoch": 0.34513515973423137, "grad_norm": 9.0625, "learning_rate": 9.76071739082209e-06, "loss": 1.4520823955535889, "step": 1896 }, { "epoch": 0.3454992263584236, "grad_norm": 11.3125, "learning_rate": 9.760195091990178e-06, "loss": 0.9596503376960754, "step": 1898 }, { "epoch": 0.3458632929826158, "grad_norm": 20.125, "learning_rate": 9.759672241364877e-06, "loss": 0.9361091256141663, "step": 1900 }, { "epoch": 0.34622735960680806, "grad_norm": 7.71875, "learning_rate": 9.759148839022912e-06, "loss": 0.8034407496452332, "step": 1902 }, { "epoch": 0.34659142623100025, "grad_norm": 6.625, "learning_rate": 9.758624885041087e-06, "loss": 1.4066028594970703, "step": 1904 }, { "epoch": 0.3469554928551925, "grad_norm": 12.6875, "learning_rate": 9.758100379496294e-06, "loss": 1.470516562461853, "step": 1906 }, { "epoch": 0.34731955947938475, "grad_norm": 18.5, "learning_rate": 9.757575322465498e-06, "loss": 1.3514392375946045, "step": 1908 }, { "epoch": 0.34768362610357695, "grad_norm": 9.3125, "learning_rate": 9.757049714025755e-06, "loss": 1.326603889465332, "step": 1910 }, { "epoch": 0.3480476927277692, "grad_norm": 48.75, "learning_rate": 9.75652355425419e-06, "loss": 1.2181487083435059, "step": 1912 }, { "epoch": 0.3484117593519614, "grad_norm": 2.703125, "learning_rate": 9.755996843228018e-06, "loss": 0.715548038482666, "step": 1914 }, { "epoch": 0.34877582597615364, "grad_norm": 5.4375, "learning_rate": 9.75546958102453e-06, "loss": 1.374284029006958, "step": 1916 }, { "epoch": 0.34913989260034584, "grad_norm": 5.75, "learning_rate": 9.754941767721103e-06, "loss": 1.1102149486541748, "step": 1918 }, { "epoch": 0.3495039592245381, "grad_norm": 11.5, "learning_rate": 9.754413403395187e-06, "loss": 1.2851014137268066, "step": 1920 }, { "epoch": 0.34986802584873034, "grad_norm": 15.0, "learning_rate": 9.753884488124321e-06, "loss": 1.2167428731918335, "step": 1922 }, { "epoch": 0.35023209247292253, "grad_norm": 4.9375, "learning_rate": 9.753355021986116e-06, "loss": 0.9661097526550293, "step": 1924 }, { "epoch": 0.3505961590971148, "grad_norm": 11.875, "learning_rate": 9.752825005058277e-06, "loss": 1.509225606918335, "step": 1926 }, { "epoch": 0.350960225721307, "grad_norm": 7.28125, "learning_rate": 9.752294437418575e-06, "loss": 1.3005475997924805, "step": 1928 }, { "epoch": 0.3513242923454992, "grad_norm": 8.125, "learning_rate": 9.751763319144871e-06, "loss": 1.2685401439666748, "step": 1930 }, { "epoch": 0.3516883589696915, "grad_norm": 5.09375, "learning_rate": 9.751231650315106e-06, "loss": 1.2690961360931396, "step": 1932 }, { "epoch": 0.35205242559388367, "grad_norm": 17.75, "learning_rate": 9.750699431007296e-06, "loss": 1.5271693468093872, "step": 1934 }, { "epoch": 0.3524164922180759, "grad_norm": 22.125, "learning_rate": 9.750166661299548e-06, "loss": 1.6990630626678467, "step": 1936 }, { "epoch": 0.3527805588422681, "grad_norm": 21.625, "learning_rate": 9.749633341270038e-06, "loss": 1.6855173110961914, "step": 1938 }, { "epoch": 0.35314462546646036, "grad_norm": 14.25, "learning_rate": 9.749099470997033e-06, "loss": 0.9684444069862366, "step": 1940 }, { "epoch": 0.3535086920906526, "grad_norm": 40.5, "learning_rate": 9.748565050558871e-06, "loss": 1.6541390419006348, "step": 1942 }, { "epoch": 0.3538727587148448, "grad_norm": 25.875, "learning_rate": 9.748030080033982e-06, "loss": 2.1384077072143555, "step": 1944 }, { "epoch": 0.35423682533903705, "grad_norm": 9.0, "learning_rate": 9.747494559500869e-06, "loss": 1.5122464895248413, "step": 1946 }, { "epoch": 0.35460089196322925, "grad_norm": 9.375, "learning_rate": 9.746958489038116e-06, "loss": 1.494816541671753, "step": 1948 }, { "epoch": 0.3549649585874215, "grad_norm": 18.375, "learning_rate": 9.74642186872439e-06, "loss": 1.6494117975234985, "step": 1950 }, { "epoch": 0.35532902521161375, "grad_norm": 13.25, "learning_rate": 9.745884698638437e-06, "loss": 1.7236353158950806, "step": 1952 }, { "epoch": 0.35569309183580594, "grad_norm": 10.0, "learning_rate": 9.745346978859084e-06, "loss": 1.5397441387176514, "step": 1954 }, { "epoch": 0.3560571584599982, "grad_norm": 13.5, "learning_rate": 9.744808709465243e-06, "loss": 1.2179691791534424, "step": 1956 }, { "epoch": 0.3564212250841904, "grad_norm": 14.3125, "learning_rate": 9.7442698905359e-06, "loss": 1.4973336458206177, "step": 1958 }, { "epoch": 0.35678529170838263, "grad_norm": 8.0625, "learning_rate": 9.743730522150123e-06, "loss": 1.3711413145065308, "step": 1960 }, { "epoch": 0.3571493583325749, "grad_norm": 14.875, "learning_rate": 9.743190604387066e-06, "loss": 1.5691016912460327, "step": 1962 }, { "epoch": 0.3575134249567671, "grad_norm": 16.25, "learning_rate": 9.742650137325956e-06, "loss": 1.8776967525482178, "step": 1964 }, { "epoch": 0.35787749158095933, "grad_norm": 9.375, "learning_rate": 9.742109121046106e-06, "loss": 1.3278359174728394, "step": 1966 }, { "epoch": 0.3582415582051515, "grad_norm": 9.4375, "learning_rate": 9.741567555626908e-06, "loss": 1.5787303447723389, "step": 1968 }, { "epoch": 0.35860562482934377, "grad_norm": 11.3125, "learning_rate": 9.741025441147836e-06, "loss": 1.1342029571533203, "step": 1970 }, { "epoch": 0.358969691453536, "grad_norm": 10.9375, "learning_rate": 9.74048277768844e-06, "loss": 0.539789080619812, "step": 1972 }, { "epoch": 0.3593337580777282, "grad_norm": 10.375, "learning_rate": 9.739939565328356e-06, "loss": 1.2300916910171509, "step": 1974 }, { "epoch": 0.35969782470192047, "grad_norm": 9.375, "learning_rate": 9.739395804147296e-06, "loss": 1.6378649473190308, "step": 1976 }, { "epoch": 0.36006189132611266, "grad_norm": 12.0, "learning_rate": 9.738851494225056e-06, "loss": 1.1425185203552246, "step": 1978 }, { "epoch": 0.3604259579503049, "grad_norm": 31.0, "learning_rate": 9.738306635641514e-06, "loss": 1.4143235683441162, "step": 1980 }, { "epoch": 0.36079002457449716, "grad_norm": 10.5625, "learning_rate": 9.737761228476621e-06, "loss": 1.4866759777069092, "step": 1982 }, { "epoch": 0.36115409119868935, "grad_norm": 11.125, "learning_rate": 9.737215272810417e-06, "loss": 1.732095718383789, "step": 1984 }, { "epoch": 0.3615181578228816, "grad_norm": 17.0, "learning_rate": 9.736668768723017e-06, "loss": 1.595767617225647, "step": 1986 }, { "epoch": 0.3618822244470738, "grad_norm": 9.375, "learning_rate": 9.736121716294617e-06, "loss": 1.0042572021484375, "step": 1988 }, { "epoch": 0.36224629107126605, "grad_norm": 24.625, "learning_rate": 9.735574115605499e-06, "loss": 1.4346060752868652, "step": 1990 }, { "epoch": 0.3626103576954583, "grad_norm": 10.75, "learning_rate": 9.735025966736019e-06, "loss": 0.6375983953475952, "step": 1992 }, { "epoch": 0.3629744243196505, "grad_norm": 18.25, "learning_rate": 9.73447726976661e-06, "loss": 1.3740500211715698, "step": 1994 }, { "epoch": 0.36333849094384274, "grad_norm": 24.75, "learning_rate": 9.7339280247778e-06, "loss": 1.8298540115356445, "step": 1996 }, { "epoch": 0.36370255756803493, "grad_norm": 10.9375, "learning_rate": 9.733378231850186e-06, "loss": 1.511721134185791, "step": 1998 }, { "epoch": 0.3640666241922272, "grad_norm": 11.25, "learning_rate": 9.732827891064442e-06, "loss": 1.127073049545288, "step": 2000 }, { "epoch": 0.3644306908164194, "grad_norm": 17.75, "learning_rate": 9.732277002501338e-06, "loss": 0.941523551940918, "step": 2002 }, { "epoch": 0.36479475744061163, "grad_norm": 7.0, "learning_rate": 9.731725566241705e-06, "loss": 1.2926925420761108, "step": 2004 }, { "epoch": 0.3651588240648039, "grad_norm": 4.4375, "learning_rate": 9.731173582366472e-06, "loss": 1.0289406776428223, "step": 2006 }, { "epoch": 0.36552289068899607, "grad_norm": 11.0625, "learning_rate": 9.730621050956635e-06, "loss": 1.2006124258041382, "step": 2008 }, { "epoch": 0.3658869573131883, "grad_norm": 9.3125, "learning_rate": 9.730067972093277e-06, "loss": 1.3921442031860352, "step": 2010 }, { "epoch": 0.3662510239373805, "grad_norm": 6.125, "learning_rate": 9.729514345857563e-06, "loss": 1.3311762809753418, "step": 2012 }, { "epoch": 0.36661509056157277, "grad_norm": 8.1875, "learning_rate": 9.72896017233073e-06, "loss": 1.4179387092590332, "step": 2014 }, { "epoch": 0.366979157185765, "grad_norm": 7.3125, "learning_rate": 9.728405451594107e-06, "loss": 1.2038400173187256, "step": 2016 }, { "epoch": 0.3673432238099572, "grad_norm": 29.25, "learning_rate": 9.727850183729094e-06, "loss": 1.4478435516357422, "step": 2018 }, { "epoch": 0.36770729043414946, "grad_norm": 17.875, "learning_rate": 9.72729436881717e-06, "loss": 1.4033602476119995, "step": 2020 }, { "epoch": 0.36807135705834165, "grad_norm": 15.875, "learning_rate": 9.726738006939907e-06, "loss": 1.0183924436569214, "step": 2022 }, { "epoch": 0.3684354236825339, "grad_norm": 8.8125, "learning_rate": 9.726181098178943e-06, "loss": 1.345322608947754, "step": 2024 }, { "epoch": 0.36879949030672615, "grad_norm": 6.28125, "learning_rate": 9.725623642616004e-06, "loss": 1.3124163150787354, "step": 2026 }, { "epoch": 0.36916355693091835, "grad_norm": 8.9375, "learning_rate": 9.725065640332893e-06, "loss": 1.3291378021240234, "step": 2028 }, { "epoch": 0.3695276235551106, "grad_norm": 11.375, "learning_rate": 9.7245070914115e-06, "loss": 1.4275076389312744, "step": 2030 }, { "epoch": 0.3698916901793028, "grad_norm": 18.25, "learning_rate": 9.723947995933781e-06, "loss": 1.418353796005249, "step": 2032 }, { "epoch": 0.37025575680349504, "grad_norm": 8.5, "learning_rate": 9.723388353981787e-06, "loss": 1.358997106552124, "step": 2034 }, { "epoch": 0.3706198234276873, "grad_norm": 9.3125, "learning_rate": 9.72282816563764e-06, "loss": 1.1635044813156128, "step": 2036 }, { "epoch": 0.3709838900518795, "grad_norm": 10.5, "learning_rate": 9.722267430983547e-06, "loss": 1.261069655418396, "step": 2038 }, { "epoch": 0.37134795667607173, "grad_norm": 15.375, "learning_rate": 9.721706150101797e-06, "loss": 1.3923027515411377, "step": 2040 }, { "epoch": 0.3717120233002639, "grad_norm": 17.25, "learning_rate": 9.721144323074749e-06, "loss": 1.47236967086792, "step": 2042 }, { "epoch": 0.3720760899244562, "grad_norm": 19.625, "learning_rate": 9.720581949984853e-06, "loss": 1.6056426763534546, "step": 2044 }, { "epoch": 0.3724401565486484, "grad_norm": 9.8125, "learning_rate": 9.720019030914633e-06, "loss": 1.3068888187408447, "step": 2046 }, { "epoch": 0.3728042231728406, "grad_norm": 65.5, "learning_rate": 9.719455565946698e-06, "loss": 1.5941617488861084, "step": 2048 }, { "epoch": 0.37316828979703287, "grad_norm": 6.71875, "learning_rate": 9.718891555163732e-06, "loss": 1.2337017059326172, "step": 2050 }, { "epoch": 0.37353235642122506, "grad_norm": 39.75, "learning_rate": 9.718326998648502e-06, "loss": 1.029727578163147, "step": 2052 }, { "epoch": 0.3738964230454173, "grad_norm": 10.125, "learning_rate": 9.717761896483853e-06, "loss": 1.1859915256500244, "step": 2054 }, { "epoch": 0.37426048966960956, "grad_norm": 8.0625, "learning_rate": 9.717196248752712e-06, "loss": 1.510350227355957, "step": 2056 }, { "epoch": 0.37462455629380176, "grad_norm": 6.96875, "learning_rate": 9.716630055538088e-06, "loss": 1.4485962390899658, "step": 2058 }, { "epoch": 0.374988622917994, "grad_norm": 17.125, "learning_rate": 9.716063316923063e-06, "loss": 1.7793612480163574, "step": 2060 }, { "epoch": 0.3753526895421862, "grad_norm": 13.4375, "learning_rate": 9.715496032990808e-06, "loss": 1.738204002380371, "step": 2062 }, { "epoch": 0.37571675616637845, "grad_norm": 9.4375, "learning_rate": 9.714928203824564e-06, "loss": 1.8245890140533447, "step": 2064 }, { "epoch": 0.3760808227905707, "grad_norm": 11.125, "learning_rate": 9.714359829507667e-06, "loss": 1.3317662477493286, "step": 2066 }, { "epoch": 0.3764448894147629, "grad_norm": 16.5, "learning_rate": 9.713790910123515e-06, "loss": 1.6469428539276123, "step": 2068 }, { "epoch": 0.37680895603895515, "grad_norm": 142.0, "learning_rate": 9.713221445755598e-06, "loss": 1.8514375686645508, "step": 2070 }, { "epoch": 0.37717302266314734, "grad_norm": 12.5, "learning_rate": 9.712651436487484e-06, "loss": 1.4966158866882324, "step": 2072 }, { "epoch": 0.3775370892873396, "grad_norm": 43.0, "learning_rate": 9.712080882402814e-06, "loss": 1.633608341217041, "step": 2074 }, { "epoch": 0.3779011559115318, "grad_norm": 14.25, "learning_rate": 9.71150978358532e-06, "loss": 0.921191394329071, "step": 2076 }, { "epoch": 0.37826522253572403, "grad_norm": 9.4375, "learning_rate": 9.710938140118807e-06, "loss": 0.6324692964553833, "step": 2078 }, { "epoch": 0.3786292891599163, "grad_norm": 4.59375, "learning_rate": 9.710365952087163e-06, "loss": 1.0460968017578125, "step": 2080 }, { "epoch": 0.3789933557841085, "grad_norm": 8.4375, "learning_rate": 9.709793219574347e-06, "loss": 1.064198613166809, "step": 2082 }, { "epoch": 0.3793574224083007, "grad_norm": 13.5, "learning_rate": 9.709219942664416e-06, "loss": 1.372009038925171, "step": 2084 }, { "epoch": 0.3797214890324929, "grad_norm": 10.4375, "learning_rate": 9.708646121441487e-06, "loss": 1.4485632181167603, "step": 2086 }, { "epoch": 0.38008555565668517, "grad_norm": 10.0625, "learning_rate": 9.708071755989772e-06, "loss": 1.4089813232421875, "step": 2088 }, { "epoch": 0.3804496222808774, "grad_norm": 43.25, "learning_rate": 9.707496846393553e-06, "loss": 0.8870041370391846, "step": 2090 }, { "epoch": 0.3808136889050696, "grad_norm": 11.875, "learning_rate": 9.706921392737198e-06, "loss": 1.2658190727233887, "step": 2092 }, { "epoch": 0.38117775552926186, "grad_norm": 14.625, "learning_rate": 9.70634539510515e-06, "loss": 1.7558362483978271, "step": 2094 }, { "epoch": 0.38154182215345406, "grad_norm": 13.5, "learning_rate": 9.705768853581937e-06, "loss": 1.5499252080917358, "step": 2096 }, { "epoch": 0.3819058887776463, "grad_norm": 8.25, "learning_rate": 9.705191768252163e-06, "loss": 1.5761405229568481, "step": 2098 }, { "epoch": 0.38226995540183856, "grad_norm": 9.8125, "learning_rate": 9.704614139200512e-06, "loss": 1.324540376663208, "step": 2100 }, { "epoch": 0.38263402202603075, "grad_norm": 12.25, "learning_rate": 9.704035966511748e-06, "loss": 1.4139460325241089, "step": 2102 }, { "epoch": 0.382998088650223, "grad_norm": 14.0625, "learning_rate": 9.703457250270721e-06, "loss": 1.282622218132019, "step": 2104 }, { "epoch": 0.3833621552744152, "grad_norm": 15.5, "learning_rate": 9.702877990562347e-06, "loss": 0.9831581115722656, "step": 2106 }, { "epoch": 0.38372622189860744, "grad_norm": 8.8125, "learning_rate": 9.702298187471637e-06, "loss": 1.4399539232254028, "step": 2108 }, { "epoch": 0.3840902885227997, "grad_norm": 12.375, "learning_rate": 9.701717841083671e-06, "loss": 1.5087871551513672, "step": 2110 }, { "epoch": 0.3844543551469919, "grad_norm": 14.9375, "learning_rate": 9.701136951483614e-06, "loss": 1.056596040725708, "step": 2112 }, { "epoch": 0.38481842177118414, "grad_norm": 59.5, "learning_rate": 9.70055551875671e-06, "loss": 1.0874437093734741, "step": 2114 }, { "epoch": 0.38518248839537633, "grad_norm": 11.8125, "learning_rate": 9.699973542988278e-06, "loss": 1.6652891635894775, "step": 2116 }, { "epoch": 0.3855465550195686, "grad_norm": 12.1875, "learning_rate": 9.699391024263727e-06, "loss": 1.5830656290054321, "step": 2118 }, { "epoch": 0.38591062164376083, "grad_norm": 16.875, "learning_rate": 9.698807962668534e-06, "loss": 1.6655910015106201, "step": 2120 }, { "epoch": 0.386274688267953, "grad_norm": 20.75, "learning_rate": 9.698224358288259e-06, "loss": 1.8198539018630981, "step": 2122 }, { "epoch": 0.3866387548921453, "grad_norm": 8.75, "learning_rate": 9.69764021120855e-06, "loss": 1.5100162029266357, "step": 2124 }, { "epoch": 0.38700282151633747, "grad_norm": 10.1875, "learning_rate": 9.697055521515127e-06, "loss": 1.4647231101989746, "step": 2126 }, { "epoch": 0.3873668881405297, "grad_norm": 14.25, "learning_rate": 9.696470289293785e-06, "loss": 1.421520471572876, "step": 2128 }, { "epoch": 0.38773095476472197, "grad_norm": 4.9375, "learning_rate": 9.695884514630411e-06, "loss": 1.1075094938278198, "step": 2130 }, { "epoch": 0.38809502138891416, "grad_norm": 8.3125, "learning_rate": 9.695298197610963e-06, "loss": 1.5915309190750122, "step": 2132 }, { "epoch": 0.3884590880131064, "grad_norm": 8.3125, "learning_rate": 9.694711338321479e-06, "loss": 1.5105695724487305, "step": 2134 }, { "epoch": 0.3888231546372986, "grad_norm": 15.6875, "learning_rate": 9.69412393684808e-06, "loss": 1.4969050884246826, "step": 2136 }, { "epoch": 0.38918722126149086, "grad_norm": 14.0, "learning_rate": 9.693535993276964e-06, "loss": 1.765354871749878, "step": 2138 }, { "epoch": 0.3895512878856831, "grad_norm": 33.0, "learning_rate": 9.692947507694408e-06, "loss": 1.1231988668441772, "step": 2140 }, { "epoch": 0.3899153545098753, "grad_norm": 12.75, "learning_rate": 9.692358480186775e-06, "loss": 1.1892549991607666, "step": 2142 }, { "epoch": 0.39027942113406755, "grad_norm": 18.125, "learning_rate": 9.691768910840495e-06, "loss": 1.389365792274475, "step": 2144 }, { "epoch": 0.39064348775825974, "grad_norm": 9.875, "learning_rate": 9.691178799742091e-06, "loss": 1.8792126178741455, "step": 2146 }, { "epoch": 0.391007554382452, "grad_norm": 9.1875, "learning_rate": 9.690588146978157e-06, "loss": 1.4509637355804443, "step": 2148 }, { "epoch": 0.39137162100664424, "grad_norm": 9.1875, "learning_rate": 9.68999695263537e-06, "loss": 1.119098424911499, "step": 2150 }, { "epoch": 0.39173568763083644, "grad_norm": 7.0, "learning_rate": 9.689405216800483e-06, "loss": 1.4772535562515259, "step": 2152 }, { "epoch": 0.3920997542550287, "grad_norm": 10.4375, "learning_rate": 9.688812939560332e-06, "loss": 1.401158332824707, "step": 2154 }, { "epoch": 0.3924638208792209, "grad_norm": 15.875, "learning_rate": 9.688220121001832e-06, "loss": 1.2952908277511597, "step": 2156 }, { "epoch": 0.39282788750341313, "grad_norm": 9.5625, "learning_rate": 9.687626761211979e-06, "loss": 1.1576765775680542, "step": 2158 }, { "epoch": 0.3931919541276053, "grad_norm": 18.625, "learning_rate": 9.68703286027784e-06, "loss": 1.9631154537200928, "step": 2160 }, { "epoch": 0.3935560207517976, "grad_norm": 8.0, "learning_rate": 9.686438418286572e-06, "loss": 1.681584358215332, "step": 2162 }, { "epoch": 0.3939200873759898, "grad_norm": 22.75, "learning_rate": 9.685843435325406e-06, "loss": 1.2069242000579834, "step": 2164 }, { "epoch": 0.394284154000182, "grad_norm": 36.5, "learning_rate": 9.685247911481652e-06, "loss": 1.852647304534912, "step": 2166 }, { "epoch": 0.39464822062437427, "grad_norm": 5.3125, "learning_rate": 9.684651846842705e-06, "loss": 1.0551722049713135, "step": 2168 }, { "epoch": 0.39501228724856646, "grad_norm": 13.9375, "learning_rate": 9.684055241496028e-06, "loss": 1.378616452217102, "step": 2170 }, { "epoch": 0.3953763538727587, "grad_norm": 10.3125, "learning_rate": 9.683458095529179e-06, "loss": 1.5073012113571167, "step": 2172 }, { "epoch": 0.39574042049695096, "grad_norm": 14.25, "learning_rate": 9.68286040902978e-06, "loss": 0.7733011245727539, "step": 2174 }, { "epoch": 0.39610448712114316, "grad_norm": 7.03125, "learning_rate": 9.682262182085541e-06, "loss": 0.7533259391784668, "step": 2176 }, { "epoch": 0.3964685537453354, "grad_norm": 10.625, "learning_rate": 9.68166341478425e-06, "loss": 1.4501830339431763, "step": 2178 }, { "epoch": 0.3968326203695276, "grad_norm": 19.0, "learning_rate": 9.681064107213774e-06, "loss": 1.541663408279419, "step": 2180 }, { "epoch": 0.39719668699371985, "grad_norm": 11.625, "learning_rate": 9.680464259462056e-06, "loss": 1.8518545627593994, "step": 2182 }, { "epoch": 0.3975607536179121, "grad_norm": 18.5, "learning_rate": 9.679863871617126e-06, "loss": 1.6773710250854492, "step": 2184 }, { "epoch": 0.3979248202421043, "grad_norm": 23.875, "learning_rate": 9.679262943767085e-06, "loss": 0.9477849006652832, "step": 2186 }, { "epoch": 0.39828888686629654, "grad_norm": 10.5, "learning_rate": 9.67866147600012e-06, "loss": 1.4844529628753662, "step": 2188 }, { "epoch": 0.39865295349048874, "grad_norm": 8.4375, "learning_rate": 9.678059468404488e-06, "loss": 1.2404515743255615, "step": 2190 }, { "epoch": 0.399017020114681, "grad_norm": 3.796875, "learning_rate": 9.677456921068538e-06, "loss": 1.119039535522461, "step": 2192 }, { "epoch": 0.39938108673887324, "grad_norm": 29.375, "learning_rate": 9.676853834080685e-06, "loss": 1.2887297868728638, "step": 2194 }, { "epoch": 0.39974515336306543, "grad_norm": 23.125, "learning_rate": 9.676250207529434e-06, "loss": 1.5945512056350708, "step": 2196 }, { "epoch": 0.4001092199872577, "grad_norm": 14.0625, "learning_rate": 9.675646041503366e-06, "loss": 0.6604722738265991, "step": 2198 }, { "epoch": 0.4004732866114499, "grad_norm": 10.25, "learning_rate": 9.675041336091135e-06, "loss": 1.4796134233474731, "step": 2200 }, { "epoch": 0.4008373532356421, "grad_norm": 10.5625, "learning_rate": 9.674436091381482e-06, "loss": 1.4008347988128662, "step": 2202 }, { "epoch": 0.4012014198598344, "grad_norm": 6.9375, "learning_rate": 9.673830307463225e-06, "loss": 1.2272157669067383, "step": 2204 }, { "epoch": 0.40156548648402657, "grad_norm": 4.96875, "learning_rate": 9.673223984425258e-06, "loss": 1.2955687046051025, "step": 2206 }, { "epoch": 0.4019295531082188, "grad_norm": 17.125, "learning_rate": 9.672617122356558e-06, "loss": 1.1196295022964478, "step": 2208 }, { "epoch": 0.402293619732411, "grad_norm": 20.125, "learning_rate": 9.672009721346178e-06, "loss": 1.329920768737793, "step": 2210 }, { "epoch": 0.40265768635660326, "grad_norm": 8.125, "learning_rate": 9.671401781483254e-06, "loss": 1.5276387929916382, "step": 2212 }, { "epoch": 0.4030217529807955, "grad_norm": 21.625, "learning_rate": 9.670793302856998e-06, "loss": 1.7229703664779663, "step": 2214 }, { "epoch": 0.4033858196049877, "grad_norm": 16.5, "learning_rate": 9.670184285556698e-06, "loss": 1.8548630475997925, "step": 2216 }, { "epoch": 0.40374988622917996, "grad_norm": 27.625, "learning_rate": 9.669574729671732e-06, "loss": 1.9700976610183716, "step": 2218 }, { "epoch": 0.40411395285337215, "grad_norm": 9.5625, "learning_rate": 9.668964635291544e-06, "loss": 1.4679195880889893, "step": 2220 }, { "epoch": 0.4044780194775644, "grad_norm": 15.375, "learning_rate": 9.668354002505664e-06, "loss": 1.486782193183899, "step": 2222 }, { "epoch": 0.40484208610175665, "grad_norm": 20.75, "learning_rate": 9.667742831403704e-06, "loss": 1.2617809772491455, "step": 2224 }, { "epoch": 0.40520615272594884, "grad_norm": 9.6875, "learning_rate": 9.667131122075345e-06, "loss": 1.2526487112045288, "step": 2226 }, { "epoch": 0.4055702193501411, "grad_norm": 8.75, "learning_rate": 9.666518874610355e-06, "loss": 1.2761998176574707, "step": 2228 }, { "epoch": 0.4059342859743333, "grad_norm": 14.75, "learning_rate": 9.66590608909858e-06, "loss": 1.1356678009033203, "step": 2230 }, { "epoch": 0.40629835259852554, "grad_norm": 7.5, "learning_rate": 9.665292765629944e-06, "loss": 1.4666557312011719, "step": 2232 }, { "epoch": 0.40666241922271773, "grad_norm": 18.0, "learning_rate": 9.664678904294447e-06, "loss": 1.1282765865325928, "step": 2234 }, { "epoch": 0.40702648584691, "grad_norm": 13.3125, "learning_rate": 9.664064505182174e-06, "loss": 1.6259841918945312, "step": 2236 }, { "epoch": 0.40739055247110223, "grad_norm": 7.4375, "learning_rate": 9.663449568383282e-06, "loss": 1.4740502834320068, "step": 2238 }, { "epoch": 0.4077546190952944, "grad_norm": 10.8125, "learning_rate": 9.662834093988014e-06, "loss": 1.4135972261428833, "step": 2240 }, { "epoch": 0.4081186857194867, "grad_norm": 9.75, "learning_rate": 9.662218082086688e-06, "loss": 1.4066673517227173, "step": 2242 }, { "epoch": 0.40848275234367887, "grad_norm": 12.5625, "learning_rate": 9.661601532769697e-06, "loss": 1.3687559366226196, "step": 2244 }, { "epoch": 0.4088468189678711, "grad_norm": 14.875, "learning_rate": 9.66098444612752e-06, "loss": 1.6247143745422363, "step": 2246 }, { "epoch": 0.40921088559206337, "grad_norm": 7.46875, "learning_rate": 9.660366822250717e-06, "loss": 1.6085401773452759, "step": 2248 }, { "epoch": 0.40957495221625556, "grad_norm": 8.875, "learning_rate": 9.659748661229912e-06, "loss": 1.3979735374450684, "step": 2250 }, { "epoch": 0.4099390188404478, "grad_norm": 36.0, "learning_rate": 9.659129963155826e-06, "loss": 1.095952033996582, "step": 2252 }, { "epoch": 0.41030308546464, "grad_norm": 11.5, "learning_rate": 9.658510728119245e-06, "loss": 1.641335368156433, "step": 2254 }, { "epoch": 0.41066715208883225, "grad_norm": 16.125, "learning_rate": 9.657890956211043e-06, "loss": 1.5438529253005981, "step": 2256 }, { "epoch": 0.4110312187130245, "grad_norm": 10.0625, "learning_rate": 9.657270647522166e-06, "loss": 1.3864002227783203, "step": 2258 }, { "epoch": 0.4113952853372167, "grad_norm": 12.5625, "learning_rate": 9.656649802143646e-06, "loss": 1.7747098207473755, "step": 2260 }, { "epoch": 0.41175935196140895, "grad_norm": 7.875, "learning_rate": 9.656028420166584e-06, "loss": 1.4297223091125488, "step": 2262 }, { "epoch": 0.41212341858560114, "grad_norm": 13.9375, "learning_rate": 9.655406501682167e-06, "loss": 1.1971871852874756, "step": 2264 }, { "epoch": 0.4124874852097934, "grad_norm": 130.0, "learning_rate": 9.654784046781661e-06, "loss": 1.7497791051864624, "step": 2266 }, { "epoch": 0.41285155183398564, "grad_norm": 6.8125, "learning_rate": 9.654161055556408e-06, "loss": 1.375710129737854, "step": 2268 }, { "epoch": 0.41321561845817784, "grad_norm": 9.1875, "learning_rate": 9.653537528097827e-06, "loss": 1.2497614622116089, "step": 2270 }, { "epoch": 0.4135796850823701, "grad_norm": 12.4375, "learning_rate": 9.652913464497423e-06, "loss": 1.4670453071594238, "step": 2272 }, { "epoch": 0.4139437517065623, "grad_norm": 19.125, "learning_rate": 9.652288864846773e-06, "loss": 1.6561918258666992, "step": 2274 }, { "epoch": 0.41430781833075453, "grad_norm": 20.0, "learning_rate": 9.65166372923753e-06, "loss": 1.6369695663452148, "step": 2276 }, { "epoch": 0.4146718849549468, "grad_norm": 12.3125, "learning_rate": 9.651038057761435e-06, "loss": 1.2019511461257935, "step": 2278 }, { "epoch": 0.415035951579139, "grad_norm": 11.4375, "learning_rate": 9.650411850510302e-06, "loss": 1.3143885135650635, "step": 2280 }, { "epoch": 0.4154000182033312, "grad_norm": 13.3125, "learning_rate": 9.649785107576025e-06, "loss": 1.4684603214263916, "step": 2282 }, { "epoch": 0.4157640848275234, "grad_norm": 9.875, "learning_rate": 9.649157829050573e-06, "loss": 1.6451226472854614, "step": 2284 }, { "epoch": 0.41612815145171567, "grad_norm": 6.3125, "learning_rate": 9.648530015025998e-06, "loss": 1.2578718662261963, "step": 2286 }, { "epoch": 0.4164922180759079, "grad_norm": 11.125, "learning_rate": 9.64790166559443e-06, "loss": 1.4395158290863037, "step": 2288 }, { "epoch": 0.4168562847001001, "grad_norm": 6.9375, "learning_rate": 9.647272780848076e-06, "loss": 1.3975218534469604, "step": 2290 }, { "epoch": 0.41722035132429236, "grad_norm": 8.4375, "learning_rate": 9.646643360879222e-06, "loss": 1.0757924318313599, "step": 2292 }, { "epoch": 0.41758441794848455, "grad_norm": 11.3125, "learning_rate": 9.646013405780235e-06, "loss": 1.3886827230453491, "step": 2294 }, { "epoch": 0.4179484845726768, "grad_norm": 3.734375, "learning_rate": 9.645382915643554e-06, "loss": 1.2899380922317505, "step": 2296 }, { "epoch": 0.41831255119686905, "grad_norm": 48.5, "learning_rate": 9.644751890561708e-06, "loss": 1.2573360204696655, "step": 2298 }, { "epoch": 0.41867661782106125, "grad_norm": 8.5625, "learning_rate": 9.64412033062729e-06, "loss": 1.3277581930160522, "step": 2300 }, { "epoch": 0.4190406844452535, "grad_norm": 71.5, "learning_rate": 9.643488235932981e-06, "loss": 1.5124677419662476, "step": 2302 }, { "epoch": 0.4194047510694457, "grad_norm": 13.5625, "learning_rate": 9.642855606571541e-06, "loss": 1.6766806840896606, "step": 2304 }, { "epoch": 0.41976881769363794, "grad_norm": 10.0625, "learning_rate": 9.642222442635802e-06, "loss": 1.4139695167541504, "step": 2306 }, { "epoch": 0.42013288431783014, "grad_norm": 86.0, "learning_rate": 9.641588744218684e-06, "loss": 1.4274101257324219, "step": 2308 }, { "epoch": 0.4204969509420224, "grad_norm": 8.5, "learning_rate": 9.640954511413171e-06, "loss": 1.2447904348373413, "step": 2310 }, { "epoch": 0.42086101756621463, "grad_norm": 8.5625, "learning_rate": 9.640319744312344e-06, "loss": 0.9159026145935059, "step": 2312 }, { "epoch": 0.42122508419040683, "grad_norm": 5.78125, "learning_rate": 9.639684443009343e-06, "loss": 1.0781114101409912, "step": 2314 }, { "epoch": 0.4215891508145991, "grad_norm": 43.5, "learning_rate": 9.6390486075974e-06, "loss": 1.3290879726409912, "step": 2316 }, { "epoch": 0.4219532174387913, "grad_norm": 13.5625, "learning_rate": 9.638412238169825e-06, "loss": 0.6265374422073364, "step": 2318 }, { "epoch": 0.4223172840629835, "grad_norm": 10.0625, "learning_rate": 9.637775334819999e-06, "loss": 1.3973679542541504, "step": 2320 }, { "epoch": 0.42268135068717577, "grad_norm": 4.46875, "learning_rate": 9.637137897641385e-06, "loss": 1.2640068531036377, "step": 2322 }, { "epoch": 0.42304541731136797, "grad_norm": 4.15625, "learning_rate": 9.63649992672752e-06, "loss": 0.9691958427429199, "step": 2324 }, { "epoch": 0.4234094839355602, "grad_norm": 19.375, "learning_rate": 9.635861422172034e-06, "loss": 1.5447862148284912, "step": 2326 }, { "epoch": 0.4237735505597524, "grad_norm": 7.6875, "learning_rate": 9.635222384068617e-06, "loss": 1.8208239078521729, "step": 2328 }, { "epoch": 0.42413761718394466, "grad_norm": 3.984375, "learning_rate": 9.634582812511049e-06, "loss": 1.0587718486785889, "step": 2330 }, { "epoch": 0.4245016838081369, "grad_norm": 9.625, "learning_rate": 9.63394270759318e-06, "loss": 1.0779318809509277, "step": 2332 }, { "epoch": 0.4248657504323291, "grad_norm": 9.5625, "learning_rate": 9.633302069408948e-06, "loss": 1.7597366571426392, "step": 2334 }, { "epoch": 0.42522981705652135, "grad_norm": 10.625, "learning_rate": 9.63266089805236e-06, "loss": 1.526566982269287, "step": 2336 }, { "epoch": 0.42559388368071355, "grad_norm": 12.375, "learning_rate": 9.632019193617507e-06, "loss": 1.5721865892410278, "step": 2338 }, { "epoch": 0.4259579503049058, "grad_norm": 74.0, "learning_rate": 9.631376956198559e-06, "loss": 0.6878648400306702, "step": 2340 }, { "epoch": 0.42632201692909805, "grad_norm": 53.0, "learning_rate": 9.630734185889756e-06, "loss": 0.45763444900512695, "step": 2342 }, { "epoch": 0.42668608355329024, "grad_norm": 15.8125, "learning_rate": 9.630090882785431e-06, "loss": 0.8998202085494995, "step": 2344 }, { "epoch": 0.4270501501774825, "grad_norm": 8.5, "learning_rate": 9.629447046979974e-06, "loss": 1.385998010635376, "step": 2346 }, { "epoch": 0.4274142168016747, "grad_norm": 9.9375, "learning_rate": 9.628802678567874e-06, "loss": 1.6371874809265137, "step": 2348 }, { "epoch": 0.42777828342586693, "grad_norm": 9.5, "learning_rate": 9.628157777643687e-06, "loss": 1.8729541301727295, "step": 2350 }, { "epoch": 0.4281423500500592, "grad_norm": 25.625, "learning_rate": 9.627512344302052e-06, "loss": 1.4513651132583618, "step": 2352 }, { "epoch": 0.4285064166742514, "grad_norm": 78.5, "learning_rate": 9.626866378637681e-06, "loss": 1.4546449184417725, "step": 2354 }, { "epoch": 0.4288704832984436, "grad_norm": 6.6875, "learning_rate": 9.626219880745365e-06, "loss": 1.1034903526306152, "step": 2356 }, { "epoch": 0.4292345499226358, "grad_norm": 36.5, "learning_rate": 9.625572850719978e-06, "loss": 0.9412211179733276, "step": 2358 }, { "epoch": 0.42959861654682807, "grad_norm": 8.4375, "learning_rate": 9.624925288656469e-06, "loss": 1.3844599723815918, "step": 2360 }, { "epoch": 0.4299626831710203, "grad_norm": 8.875, "learning_rate": 9.624277194649864e-06, "loss": 1.4201388359069824, "step": 2362 }, { "epoch": 0.4303267497952125, "grad_norm": 5.09375, "learning_rate": 9.623628568795269e-06, "loss": 1.3087117671966553, "step": 2364 }, { "epoch": 0.43069081641940477, "grad_norm": 9.5625, "learning_rate": 9.622979411187867e-06, "loss": 1.365851879119873, "step": 2366 }, { "epoch": 0.43105488304359696, "grad_norm": 8.25, "learning_rate": 9.622329721922919e-06, "loss": 1.383945107460022, "step": 2368 }, { "epoch": 0.4314189496677892, "grad_norm": 5.3125, "learning_rate": 9.621679501095764e-06, "loss": 1.314424753189087, "step": 2370 }, { "epoch": 0.43178301629198146, "grad_norm": 11.0, "learning_rate": 9.62102874880182e-06, "loss": 1.4450368881225586, "step": 2372 }, { "epoch": 0.43214708291617365, "grad_norm": 30.25, "learning_rate": 9.620377465136582e-06, "loss": 1.0981154441833496, "step": 2374 }, { "epoch": 0.4325111495403659, "grad_norm": 14.75, "learning_rate": 9.619725650195621e-06, "loss": 0.416414350271225, "step": 2376 }, { "epoch": 0.4328752161645581, "grad_norm": 6.90625, "learning_rate": 9.619073304074591e-06, "loss": 1.3250190019607544, "step": 2378 }, { "epoch": 0.43323928278875035, "grad_norm": 11.0625, "learning_rate": 9.618420426869222e-06, "loss": 1.3769481182098389, "step": 2380 }, { "epoch": 0.4336033494129426, "grad_norm": 54.25, "learning_rate": 9.617767018675319e-06, "loss": 1.4135537147521973, "step": 2382 }, { "epoch": 0.4339674160371348, "grad_norm": 13.75, "learning_rate": 9.617113079588766e-06, "loss": 1.5625133514404297, "step": 2384 }, { "epoch": 0.43433148266132704, "grad_norm": 9.6875, "learning_rate": 9.61645860970553e-06, "loss": 1.414522647857666, "step": 2386 }, { "epoch": 0.43469554928551923, "grad_norm": 47.75, "learning_rate": 9.615803609121649e-06, "loss": 1.3832049369812012, "step": 2388 }, { "epoch": 0.4350596159097115, "grad_norm": 34.25, "learning_rate": 9.61514807793324e-06, "loss": 1.6867897510528564, "step": 2390 }, { "epoch": 0.4354236825339037, "grad_norm": 18.0, "learning_rate": 9.614492016236502e-06, "loss": 2.0114636421203613, "step": 2392 }, { "epoch": 0.4357877491580959, "grad_norm": 9.3125, "learning_rate": 9.613835424127711e-06, "loss": 1.6052539348602295, "step": 2394 }, { "epoch": 0.4361518157822882, "grad_norm": 12.3125, "learning_rate": 9.613178301703215e-06, "loss": 1.3947083950042725, "step": 2396 }, { "epoch": 0.43651588240648037, "grad_norm": 11.9375, "learning_rate": 9.612520649059444e-06, "loss": 1.4418244361877441, "step": 2398 }, { "epoch": 0.4368799490306726, "grad_norm": 18.5, "learning_rate": 9.611862466292914e-06, "loss": 1.5792224407196045, "step": 2400 }, { "epoch": 0.4372440156548648, "grad_norm": 11.875, "learning_rate": 9.6112037535002e-06, "loss": 1.3662711381912231, "step": 2402 }, { "epoch": 0.43760808227905706, "grad_norm": 26.5, "learning_rate": 9.61054451077797e-06, "loss": 1.4911441802978516, "step": 2404 }, { "epoch": 0.4379721489032493, "grad_norm": 19.5, "learning_rate": 9.609884738222968e-06, "loss": 1.6761562824249268, "step": 2406 }, { "epoch": 0.4383362155274415, "grad_norm": 11.75, "learning_rate": 9.60922443593201e-06, "loss": 1.0963339805603027, "step": 2408 }, { "epoch": 0.43870028215163376, "grad_norm": 79.0, "learning_rate": 9.60856360400199e-06, "loss": 1.265076994895935, "step": 2410 }, { "epoch": 0.43906434877582595, "grad_norm": 12.5, "learning_rate": 9.607902242529887e-06, "loss": 1.6833877563476562, "step": 2412 }, { "epoch": 0.4394284154000182, "grad_norm": 16.75, "learning_rate": 9.607240351612754e-06, "loss": 1.2937580347061157, "step": 2414 }, { "epoch": 0.43979248202421045, "grad_norm": 26.375, "learning_rate": 9.606577931347714e-06, "loss": 1.891810417175293, "step": 2416 }, { "epoch": 0.44015654864840265, "grad_norm": 16.25, "learning_rate": 9.60591498183198e-06, "loss": 1.2742931842803955, "step": 2418 }, { "epoch": 0.4405206152725949, "grad_norm": 66.5, "learning_rate": 9.605251503162838e-06, "loss": 0.997144341468811, "step": 2420 }, { "epoch": 0.4408846818967871, "grad_norm": 22.25, "learning_rate": 9.604587495437647e-06, "loss": 2.052816390991211, "step": 2422 }, { "epoch": 0.44124874852097934, "grad_norm": 21.25, "learning_rate": 9.60392295875385e-06, "loss": 1.2586523294448853, "step": 2424 }, { "epoch": 0.4416128151451716, "grad_norm": 18.0, "learning_rate": 9.603257893208964e-06, "loss": 1.330334186553955, "step": 2426 }, { "epoch": 0.4419768817693638, "grad_norm": 10.3125, "learning_rate": 9.602592298900587e-06, "loss": 1.0452346801757812, "step": 2428 }, { "epoch": 0.44234094839355603, "grad_norm": 9.3125, "learning_rate": 9.601926175926386e-06, "loss": 1.7862670421600342, "step": 2430 }, { "epoch": 0.4427050150177482, "grad_norm": 11.3125, "learning_rate": 9.601259524384117e-06, "loss": 1.168826937675476, "step": 2432 }, { "epoch": 0.4430690816419405, "grad_norm": 6.65625, "learning_rate": 9.600592344371608e-06, "loss": 1.142594575881958, "step": 2434 }, { "epoch": 0.4434331482661327, "grad_norm": 9.0625, "learning_rate": 9.599924635986764e-06, "loss": 1.5105631351470947, "step": 2436 }, { "epoch": 0.4437972148903249, "grad_norm": 15.9375, "learning_rate": 9.59925639932757e-06, "loss": 1.7031049728393555, "step": 2438 }, { "epoch": 0.44416128151451717, "grad_norm": 13.5, "learning_rate": 9.598587634492087e-06, "loss": 1.6640347242355347, "step": 2440 }, { "epoch": 0.44452534813870936, "grad_norm": 3.875, "learning_rate": 9.59791834157845e-06, "loss": 1.2849209308624268, "step": 2442 }, { "epoch": 0.4448894147629016, "grad_norm": 5.21875, "learning_rate": 9.597248520684878e-06, "loss": 1.2987831830978394, "step": 2444 }, { "epoch": 0.44525348138709386, "grad_norm": 7.78125, "learning_rate": 9.596578171909665e-06, "loss": 1.2319151163101196, "step": 2446 }, { "epoch": 0.44561754801128606, "grad_norm": 6.0625, "learning_rate": 9.59590729535118e-06, "loss": 1.290708303451538, "step": 2448 }, { "epoch": 0.4459816146354783, "grad_norm": 9.3125, "learning_rate": 9.595235891107873e-06, "loss": 1.2799159288406372, "step": 2450 }, { "epoch": 0.4463456812596705, "grad_norm": 11.625, "learning_rate": 9.594563959278267e-06, "loss": 2.1087443828582764, "step": 2452 }, { "epoch": 0.44670974788386275, "grad_norm": 7.96875, "learning_rate": 9.59389149996097e-06, "loss": 1.3041532039642334, "step": 2454 }, { "epoch": 0.447073814508055, "grad_norm": 8.5, "learning_rate": 9.59321851325466e-06, "loss": 1.4496657848358154, "step": 2456 }, { "epoch": 0.4474378811322472, "grad_norm": 11.75, "learning_rate": 9.592544999258096e-06, "loss": 1.35652756690979, "step": 2458 }, { "epoch": 0.44780194775643944, "grad_norm": 7.125, "learning_rate": 9.59187095807011e-06, "loss": 1.4326095581054688, "step": 2460 }, { "epoch": 0.44816601438063164, "grad_norm": 9.0, "learning_rate": 9.591196389789619e-06, "loss": 1.1687068939208984, "step": 2462 }, { "epoch": 0.4485300810048239, "grad_norm": 21.375, "learning_rate": 9.59052129451561e-06, "loss": 1.209844946861267, "step": 2464 }, { "epoch": 0.4488941476290161, "grad_norm": 20.0, "learning_rate": 9.589845672347153e-06, "loss": 1.7998831272125244, "step": 2466 }, { "epoch": 0.44925821425320833, "grad_norm": 6.4375, "learning_rate": 9.589169523383393e-06, "loss": 1.1922943592071533, "step": 2468 }, { "epoch": 0.4496222808774006, "grad_norm": 6.9375, "learning_rate": 9.588492847723551e-06, "loss": 1.3234403133392334, "step": 2470 }, { "epoch": 0.4499863475015928, "grad_norm": 6.84375, "learning_rate": 9.587815645466927e-06, "loss": 1.366612434387207, "step": 2472 }, { "epoch": 0.450350414125785, "grad_norm": 13.3125, "learning_rate": 9.587137916712896e-06, "loss": 1.3607661724090576, "step": 2474 }, { "epoch": 0.4507144807499772, "grad_norm": 22.875, "learning_rate": 9.586459661560913e-06, "loss": 1.454066514968872, "step": 2476 }, { "epoch": 0.45107854737416947, "grad_norm": 34.0, "learning_rate": 9.58578088011051e-06, "loss": 1.3590030670166016, "step": 2478 }, { "epoch": 0.4514426139983617, "grad_norm": 11.8125, "learning_rate": 9.585101572461293e-06, "loss": 1.268595576286316, "step": 2480 }, { "epoch": 0.4518066806225539, "grad_norm": 20.25, "learning_rate": 9.584421738712953e-06, "loss": 1.4530681371688843, "step": 2482 }, { "epoch": 0.45217074724674616, "grad_norm": 9.25, "learning_rate": 9.583741378965246e-06, "loss": 1.4298181533813477, "step": 2484 }, { "epoch": 0.45253481387093836, "grad_norm": 7.03125, "learning_rate": 9.583060493318015e-06, "loss": 1.1290262937545776, "step": 2486 }, { "epoch": 0.4528988804951306, "grad_norm": 8.8125, "learning_rate": 9.582379081871178e-06, "loss": 1.4337834119796753, "step": 2488 }, { "epoch": 0.45326294711932286, "grad_norm": 11.625, "learning_rate": 9.58169714472473e-06, "loss": 1.0604631900787354, "step": 2490 }, { "epoch": 0.45362701374351505, "grad_norm": 10.9375, "learning_rate": 9.581014681978742e-06, "loss": 1.4299339056015015, "step": 2492 }, { "epoch": 0.4539910803677073, "grad_norm": 9.5, "learning_rate": 9.58033169373336e-06, "loss": 1.0158357620239258, "step": 2494 }, { "epoch": 0.4543551469918995, "grad_norm": 15.9375, "learning_rate": 9.579648180088814e-06, "loss": 0.505800724029541, "step": 2496 }, { "epoch": 0.45471921361609174, "grad_norm": 11.25, "learning_rate": 9.578964141145404e-06, "loss": 1.334416389465332, "step": 2498 }, { "epoch": 0.455083280240284, "grad_norm": 15.75, "learning_rate": 9.57827957700351e-06, "loss": 1.4583282470703125, "step": 2500 }, { "epoch": 0.4554473468644762, "grad_norm": 96.0, "learning_rate": 9.577594487763589e-06, "loss": 1.6680463552474976, "step": 2502 }, { "epoch": 0.45581141348866844, "grad_norm": 32.0, "learning_rate": 9.576908873526176e-06, "loss": 1.1395843029022217, "step": 2504 }, { "epoch": 0.45617548011286063, "grad_norm": 9.9375, "learning_rate": 9.576222734391882e-06, "loss": 1.6111465692520142, "step": 2506 }, { "epoch": 0.4565395467370529, "grad_norm": 12.9375, "learning_rate": 9.575536070461393e-06, "loss": 2.0324113368988037, "step": 2508 }, { "epoch": 0.45690361336124513, "grad_norm": 22.875, "learning_rate": 9.57484888183548e-06, "loss": 1.335809350013733, "step": 2510 }, { "epoch": 0.4572676799854373, "grad_norm": 22.5, "learning_rate": 9.57416116861498e-06, "loss": 1.6101138591766357, "step": 2512 }, { "epoch": 0.4576317466096296, "grad_norm": 15.4375, "learning_rate": 9.57347293090081e-06, "loss": 1.3389840126037598, "step": 2514 }, { "epoch": 0.45799581323382177, "grad_norm": 14.0625, "learning_rate": 9.572784168793972e-06, "loss": 1.2342911958694458, "step": 2516 }, { "epoch": 0.458359879858014, "grad_norm": 4.5, "learning_rate": 9.572094882395537e-06, "loss": 1.1682186126708984, "step": 2518 }, { "epoch": 0.45872394648220627, "grad_norm": 7.09375, "learning_rate": 9.571405071806652e-06, "loss": 1.5737831592559814, "step": 2520 }, { "epoch": 0.45908801310639846, "grad_norm": 14.875, "learning_rate": 9.57071473712855e-06, "loss": 1.350905179977417, "step": 2522 }, { "epoch": 0.4594520797305907, "grad_norm": 6.875, "learning_rate": 9.57002387846253e-06, "loss": 1.218022346496582, "step": 2524 }, { "epoch": 0.4598161463547829, "grad_norm": 13.0, "learning_rate": 9.569332495909972e-06, "loss": 1.3421099185943604, "step": 2526 }, { "epoch": 0.46018021297897516, "grad_norm": 19.75, "learning_rate": 9.568640589572336e-06, "loss": 1.123950481414795, "step": 2528 }, { "epoch": 0.4605442796031674, "grad_norm": 13.5, "learning_rate": 9.567948159551158e-06, "loss": 1.0482577085494995, "step": 2530 }, { "epoch": 0.4609083462273596, "grad_norm": 21.25, "learning_rate": 9.567255205948046e-06, "loss": 1.5531996488571167, "step": 2532 }, { "epoch": 0.46127241285155185, "grad_norm": 14.25, "learning_rate": 9.566561728864688e-06, "loss": 1.5404894351959229, "step": 2534 }, { "epoch": 0.46163647947574404, "grad_norm": 8.0, "learning_rate": 9.565867728402851e-06, "loss": 1.4660441875457764, "step": 2536 }, { "epoch": 0.4620005460999363, "grad_norm": 5.4375, "learning_rate": 9.565173204664375e-06, "loss": 1.4605087041854858, "step": 2538 }, { "epoch": 0.46236461272412854, "grad_norm": 7.1875, "learning_rate": 9.564478157751182e-06, "loss": 1.2721070051193237, "step": 2540 }, { "epoch": 0.46272867934832074, "grad_norm": 12.125, "learning_rate": 9.563782587765263e-06, "loss": 1.4563473463058472, "step": 2542 }, { "epoch": 0.463092745972513, "grad_norm": 25.875, "learning_rate": 9.563086494808694e-06, "loss": 1.4034043550491333, "step": 2544 }, { "epoch": 0.4634568125967052, "grad_norm": 13.9375, "learning_rate": 9.56238987898362e-06, "loss": 1.4014735221862793, "step": 2546 }, { "epoch": 0.46382087922089743, "grad_norm": 8.75, "learning_rate": 9.561692740392268e-06, "loss": 1.389622449874878, "step": 2548 }, { "epoch": 0.4641849458450896, "grad_norm": 12.125, "learning_rate": 9.560995079136942e-06, "loss": 1.4004762172698975, "step": 2550 }, { "epoch": 0.4645490124692819, "grad_norm": 5.34375, "learning_rate": 9.56029689532002e-06, "loss": 1.3371957540512085, "step": 2552 }, { "epoch": 0.4649130790934741, "grad_norm": 13.875, "learning_rate": 9.559598189043958e-06, "loss": 1.3006490468978882, "step": 2554 }, { "epoch": 0.4652771457176663, "grad_norm": 21.0, "learning_rate": 9.558898960411284e-06, "loss": 1.2520625591278076, "step": 2556 }, { "epoch": 0.46564121234185857, "grad_norm": 30.25, "learning_rate": 9.558199209524613e-06, "loss": 2.064906120300293, "step": 2558 }, { "epoch": 0.46600527896605076, "grad_norm": 10.6875, "learning_rate": 9.557498936486627e-06, "loss": 1.0734522342681885, "step": 2560 }, { "epoch": 0.466369345590243, "grad_norm": 7.6875, "learning_rate": 9.55679814140009e-06, "loss": 1.417957067489624, "step": 2562 }, { "epoch": 0.46673341221443526, "grad_norm": 11.0, "learning_rate": 9.55609682436784e-06, "loss": 1.2966688871383667, "step": 2564 }, { "epoch": 0.46709747883862746, "grad_norm": 6.96875, "learning_rate": 9.555394985492794e-06, "loss": 1.1198680400848389, "step": 2566 }, { "epoch": 0.4674615454628197, "grad_norm": 19.25, "learning_rate": 9.55469262487794e-06, "loss": 1.3935253620147705, "step": 2568 }, { "epoch": 0.4678256120870119, "grad_norm": 14.6875, "learning_rate": 9.55398974262635e-06, "loss": 1.3144853115081787, "step": 2570 }, { "epoch": 0.46818967871120415, "grad_norm": 11.4375, "learning_rate": 9.55328633884117e-06, "loss": 1.0120824575424194, "step": 2572 }, { "epoch": 0.4685537453353964, "grad_norm": 13.0625, "learning_rate": 9.552582413625619e-06, "loss": 1.8596464395523071, "step": 2574 }, { "epoch": 0.4689178119595886, "grad_norm": 16.5, "learning_rate": 9.551877967082996e-06, "loss": 1.6003751754760742, "step": 2576 }, { "epoch": 0.46928187858378084, "grad_norm": 8.375, "learning_rate": 9.551172999316675e-06, "loss": 1.1527396440505981, "step": 2578 }, { "epoch": 0.46964594520797304, "grad_norm": 11.75, "learning_rate": 9.55046751043011e-06, "loss": 1.185123324394226, "step": 2580 }, { "epoch": 0.4700100118321653, "grad_norm": 11.125, "learning_rate": 9.549761500526827e-06, "loss": 1.2402656078338623, "step": 2582 }, { "epoch": 0.47037407845635754, "grad_norm": 20.125, "learning_rate": 9.549054969710427e-06, "loss": 1.3258576393127441, "step": 2584 }, { "epoch": 0.47073814508054973, "grad_norm": 15.6875, "learning_rate": 9.548347918084595e-06, "loss": 1.0669920444488525, "step": 2586 }, { "epoch": 0.471102211704742, "grad_norm": 20.875, "learning_rate": 9.547640345753087e-06, "loss": 1.544162631034851, "step": 2588 }, { "epoch": 0.4714662783289342, "grad_norm": 6.53125, "learning_rate": 9.546932252819732e-06, "loss": 1.3497596979141235, "step": 2590 }, { "epoch": 0.4718303449531264, "grad_norm": 14.3125, "learning_rate": 9.546223639388448e-06, "loss": 1.206697940826416, "step": 2592 }, { "epoch": 0.4721944115773187, "grad_norm": 3.703125, "learning_rate": 9.545514505563214e-06, "loss": 1.1008825302124023, "step": 2594 }, { "epoch": 0.47255847820151087, "grad_norm": 6.15625, "learning_rate": 9.544804851448094e-06, "loss": 1.3963027000427246, "step": 2596 }, { "epoch": 0.4729225448257031, "grad_norm": 36.5, "learning_rate": 9.54409467714723e-06, "loss": 1.3875467777252197, "step": 2598 }, { "epoch": 0.4732866114498953, "grad_norm": 7.1875, "learning_rate": 9.543383982764833e-06, "loss": 1.1912996768951416, "step": 2600 }, { "epoch": 0.47365067807408756, "grad_norm": 3.046875, "learning_rate": 9.542672768405199e-06, "loss": 1.1987833976745605, "step": 2602 }, { "epoch": 0.4740147446982798, "grad_norm": 2.703125, "learning_rate": 9.541961034172692e-06, "loss": 0.8739246726036072, "step": 2604 }, { "epoch": 0.474378811322472, "grad_norm": 9.125, "learning_rate": 9.541248780171757e-06, "loss": 0.3598101735115051, "step": 2606 }, { "epoch": 0.47474287794666425, "grad_norm": 8.25, "learning_rate": 9.540536006506917e-06, "loss": 0.901496171951294, "step": 2608 }, { "epoch": 0.47510694457085645, "grad_norm": 6.25, "learning_rate": 9.539822713282765e-06, "loss": 1.2627109289169312, "step": 2610 }, { "epoch": 0.4754710111950487, "grad_norm": 9.5625, "learning_rate": 9.539108900603975e-06, "loss": 1.4826383590698242, "step": 2612 }, { "epoch": 0.47583507781924095, "grad_norm": 21.25, "learning_rate": 9.538394568575298e-06, "loss": 1.5900036096572876, "step": 2614 }, { "epoch": 0.47619914444343314, "grad_norm": 4.84375, "learning_rate": 9.537679717301558e-06, "loss": 1.3371520042419434, "step": 2616 }, { "epoch": 0.4765632110676254, "grad_norm": 3.1875, "learning_rate": 9.536964346887656e-06, "loss": 0.876775324344635, "step": 2618 }, { "epoch": 0.4769272776918176, "grad_norm": 14.125, "learning_rate": 9.536248457438568e-06, "loss": 1.3802015781402588, "step": 2620 }, { "epoch": 0.47729134431600984, "grad_norm": 12.8125, "learning_rate": 9.535532049059353e-06, "loss": 1.3966376781463623, "step": 2622 }, { "epoch": 0.47765541094020203, "grad_norm": 15.75, "learning_rate": 9.534815121855137e-06, "loss": 1.4105513095855713, "step": 2624 }, { "epoch": 0.4780194775643943, "grad_norm": 24.5, "learning_rate": 9.534097675931127e-06, "loss": 1.8112739324569702, "step": 2626 }, { "epoch": 0.47838354418858653, "grad_norm": 10.625, "learning_rate": 9.533379711392605e-06, "loss": 1.3103275299072266, "step": 2628 }, { "epoch": 0.4787476108127787, "grad_norm": 12.0, "learning_rate": 9.53266122834493e-06, "loss": 1.1173889636993408, "step": 2630 }, { "epoch": 0.479111677436971, "grad_norm": 7.0, "learning_rate": 9.531942226893537e-06, "loss": 1.347985863685608, "step": 2632 }, { "epoch": 0.47947574406116317, "grad_norm": 7.65625, "learning_rate": 9.531222707143936e-06, "loss": 1.4570860862731934, "step": 2634 }, { "epoch": 0.4798398106853554, "grad_norm": 2.671875, "learning_rate": 9.530502669201716e-06, "loss": 0.9980091452598572, "step": 2636 }, { "epoch": 0.48020387730954767, "grad_norm": 3.25, "learning_rate": 9.529782113172532e-06, "loss": 0.9306013584136963, "step": 2638 }, { "epoch": 0.48056794393373986, "grad_norm": 7.15625, "learning_rate": 9.529061039162131e-06, "loss": 1.2191468477249146, "step": 2640 }, { "epoch": 0.4809320105579321, "grad_norm": 6.34375, "learning_rate": 9.528339447276325e-06, "loss": 1.4673439264297485, "step": 2642 }, { "epoch": 0.4812960771821243, "grad_norm": 8.0625, "learning_rate": 9.527617337621002e-06, "loss": 1.4516894817352295, "step": 2644 }, { "epoch": 0.48166014380631655, "grad_norm": 5.53125, "learning_rate": 9.526894710302133e-06, "loss": 1.3467499017715454, "step": 2646 }, { "epoch": 0.4820242104305088, "grad_norm": 8.4375, "learning_rate": 9.526171565425757e-06, "loss": 1.574650526046753, "step": 2648 }, { "epoch": 0.482388277054701, "grad_norm": 19.75, "learning_rate": 9.525447903097996e-06, "loss": 1.150609016418457, "step": 2650 }, { "epoch": 0.48275234367889325, "grad_norm": 8.125, "learning_rate": 9.52472372342504e-06, "loss": 1.2525157928466797, "step": 2652 }, { "epoch": 0.48311641030308544, "grad_norm": 10.5, "learning_rate": 9.523999026513164e-06, "loss": 1.4114595651626587, "step": 2654 }, { "epoch": 0.4834804769272777, "grad_norm": 10.3125, "learning_rate": 9.523273812468713e-06, "loss": 1.3584535121917725, "step": 2656 }, { "epoch": 0.48384454355146994, "grad_norm": 22.0, "learning_rate": 9.522548081398106e-06, "loss": 1.293147325515747, "step": 2658 }, { "epoch": 0.48420861017566214, "grad_norm": 21.25, "learning_rate": 9.521821833407845e-06, "loss": 1.4591537714004517, "step": 2660 }, { "epoch": 0.4845726767998544, "grad_norm": 7.75, "learning_rate": 9.521095068604504e-06, "loss": 1.287800908088684, "step": 2662 }, { "epoch": 0.4849367434240466, "grad_norm": 10.4375, "learning_rate": 9.520367787094728e-06, "loss": 1.5418848991394043, "step": 2664 }, { "epoch": 0.48530081004823883, "grad_norm": 12.4375, "learning_rate": 9.51963998898525e-06, "loss": 1.5308781862258911, "step": 2666 }, { "epoch": 0.4856648766724311, "grad_norm": 9.25, "learning_rate": 9.518911674382865e-06, "loss": 1.5935556888580322, "step": 2668 }, { "epoch": 0.4860289432966233, "grad_norm": 17.25, "learning_rate": 9.518182843394455e-06, "loss": 1.4845272302627563, "step": 2670 }, { "epoch": 0.4863930099208155, "grad_norm": 22.5, "learning_rate": 9.517453496126967e-06, "loss": 1.4654523134231567, "step": 2672 }, { "epoch": 0.4867570765450077, "grad_norm": 9.1875, "learning_rate": 9.516723632687434e-06, "loss": 1.3871456384658813, "step": 2674 }, { "epoch": 0.48712114316919997, "grad_norm": 9.875, "learning_rate": 9.515993253182962e-06, "loss": 1.3925297260284424, "step": 2676 }, { "epoch": 0.4874852097933922, "grad_norm": 10.8125, "learning_rate": 9.51526235772073e-06, "loss": 1.2590036392211914, "step": 2678 }, { "epoch": 0.4878492764175844, "grad_norm": 22.625, "learning_rate": 9.514530946407992e-06, "loss": 1.259558916091919, "step": 2680 }, { "epoch": 0.48821334304177666, "grad_norm": 10.625, "learning_rate": 9.51379901935208e-06, "loss": 0.9127806425094604, "step": 2682 }, { "epoch": 0.48857740966596885, "grad_norm": 11.4375, "learning_rate": 9.513066576660404e-06, "loss": 1.3500789403915405, "step": 2684 }, { "epoch": 0.4889414762901611, "grad_norm": 13.0625, "learning_rate": 9.512333618440441e-06, "loss": 1.4418582916259766, "step": 2686 }, { "epoch": 0.48930554291435335, "grad_norm": 15.125, "learning_rate": 9.511600144799758e-06, "loss": 1.7348606586456299, "step": 2688 }, { "epoch": 0.48966960953854555, "grad_norm": 14.625, "learning_rate": 9.510866155845984e-06, "loss": 1.5145047903060913, "step": 2690 }, { "epoch": 0.4900336761627378, "grad_norm": 21.5, "learning_rate": 9.510131651686826e-06, "loss": 1.2467162609100342, "step": 2692 }, { "epoch": 0.49039774278693, "grad_norm": 20.5, "learning_rate": 9.509396632430079e-06, "loss": 0.8618804216384888, "step": 2694 }, { "epoch": 0.49076180941112224, "grad_norm": 5.5625, "learning_rate": 9.508661098183596e-06, "loss": 1.282824158668518, "step": 2696 }, { "epoch": 0.49112587603531443, "grad_norm": 5.4375, "learning_rate": 9.507925049055316e-06, "loss": 1.109830379486084, "step": 2698 }, { "epoch": 0.4914899426595067, "grad_norm": 8.4375, "learning_rate": 9.507188485153252e-06, "loss": 1.4412422180175781, "step": 2700 }, { "epoch": 0.49185400928369893, "grad_norm": 7.1875, "learning_rate": 9.50645140658549e-06, "loss": 1.339979648590088, "step": 2702 }, { "epoch": 0.49221807590789113, "grad_norm": 6.59375, "learning_rate": 9.505713813460195e-06, "loss": 1.2000776529312134, "step": 2704 }, { "epoch": 0.4925821425320834, "grad_norm": 11.6875, "learning_rate": 9.504975705885606e-06, "loss": 1.5046942234039307, "step": 2706 }, { "epoch": 0.49294620915627557, "grad_norm": 9.5625, "learning_rate": 9.504237083970038e-06, "loss": 1.4285600185394287, "step": 2708 }, { "epoch": 0.4933102757804678, "grad_norm": 6.375, "learning_rate": 9.503497947821879e-06, "loss": 1.1578221321105957, "step": 2710 }, { "epoch": 0.49367434240466007, "grad_norm": 15.75, "learning_rate": 9.502758297549593e-06, "loss": 1.4977189302444458, "step": 2712 }, { "epoch": 0.49403840902885227, "grad_norm": 23.125, "learning_rate": 9.50201813326172e-06, "loss": 1.241639256477356, "step": 2714 }, { "epoch": 0.4944024756530445, "grad_norm": 10.625, "learning_rate": 9.501277455066884e-06, "loss": 0.7195055484771729, "step": 2716 }, { "epoch": 0.4947665422772367, "grad_norm": 21.125, "learning_rate": 9.500536263073768e-06, "loss": 1.4854540824890137, "step": 2718 }, { "epoch": 0.49513060890142896, "grad_norm": 10.4375, "learning_rate": 9.49979455739114e-06, "loss": 1.5270678997039795, "step": 2720 }, { "epoch": 0.4954946755256212, "grad_norm": 7.09375, "learning_rate": 9.499052338127845e-06, "loss": 1.1615458726882935, "step": 2722 }, { "epoch": 0.4958587421498134, "grad_norm": 10.125, "learning_rate": 9.4983096053928e-06, "loss": 1.2840888500213623, "step": 2724 }, { "epoch": 0.49622280877400565, "grad_norm": 7.5625, "learning_rate": 9.497566359295e-06, "loss": 1.2300231456756592, "step": 2726 }, { "epoch": 0.49658687539819785, "grad_norm": 118.5, "learning_rate": 9.49682259994351e-06, "loss": 1.7407028675079346, "step": 2728 }, { "epoch": 0.4969509420223901, "grad_norm": 18.875, "learning_rate": 9.496078327447476e-06, "loss": 1.362412929534912, "step": 2730 }, { "epoch": 0.49731500864658235, "grad_norm": 8.375, "learning_rate": 9.495333541916114e-06, "loss": 1.2506790161132812, "step": 2732 }, { "epoch": 0.49767907527077454, "grad_norm": 8.0625, "learning_rate": 9.49458824345872e-06, "loss": 1.0845894813537598, "step": 2734 }, { "epoch": 0.4980431418949668, "grad_norm": 6.5, "learning_rate": 9.493842432184664e-06, "loss": 1.4774537086486816, "step": 2736 }, { "epoch": 0.498407208519159, "grad_norm": 9.375, "learning_rate": 9.49309610820339e-06, "loss": 1.234175443649292, "step": 2738 }, { "epoch": 0.49877127514335123, "grad_norm": 15.0, "learning_rate": 9.49234927162442e-06, "loss": 1.5128930807113647, "step": 2740 }, { "epoch": 0.4991353417675435, "grad_norm": 7.6875, "learning_rate": 9.491601922557346e-06, "loss": 1.6760063171386719, "step": 2742 }, { "epoch": 0.4994994083917357, "grad_norm": 28.125, "learning_rate": 9.490854061111838e-06, "loss": 1.2086949348449707, "step": 2744 }, { "epoch": 0.4998634750159279, "grad_norm": 9.5, "learning_rate": 9.490105687397648e-06, "loss": 1.7749273777008057, "step": 2746 }, { "epoch": 0.5002275416401202, "grad_norm": 14.6875, "learning_rate": 9.489356801524592e-06, "loss": 1.8374942541122437, "step": 2748 }, { "epoch": 0.5005916082643124, "grad_norm": 7.0, "learning_rate": 9.488607403602563e-06, "loss": 1.4177943468093872, "step": 2750 }, { "epoch": 0.5009556748885046, "grad_norm": 19.375, "learning_rate": 9.48785749374154e-06, "loss": 1.1259864568710327, "step": 2752 }, { "epoch": 0.5013197415126969, "grad_norm": 44.25, "learning_rate": 9.487107072051562e-06, "loss": 1.4717812538146973, "step": 2754 }, { "epoch": 0.5016838081368891, "grad_norm": 14.8125, "learning_rate": 9.486356138642753e-06, "loss": 1.581669569015503, "step": 2756 }, { "epoch": 0.5020478747610813, "grad_norm": 20.25, "learning_rate": 9.485604693625311e-06, "loss": 1.6590213775634766, "step": 2758 }, { "epoch": 0.5024119413852735, "grad_norm": 12.5, "learning_rate": 9.484852737109504e-06, "loss": 1.4304105043411255, "step": 2760 }, { "epoch": 0.5027760080094658, "grad_norm": 22.375, "learning_rate": 9.484100269205685e-06, "loss": 1.0655628442764282, "step": 2762 }, { "epoch": 0.503140074633658, "grad_norm": 17.75, "learning_rate": 9.483347290024267e-06, "loss": 1.2159664630889893, "step": 2764 }, { "epoch": 0.5035041412578501, "grad_norm": 7.4375, "learning_rate": 9.482593799675754e-06, "loss": 1.2526659965515137, "step": 2766 }, { "epoch": 0.5038682078820425, "grad_norm": 13.375, "learning_rate": 9.481839798270714e-06, "loss": 1.2676928043365479, "step": 2768 }, { "epoch": 0.5042322745062346, "grad_norm": 6.90625, "learning_rate": 9.481085285919794e-06, "loss": 1.1000837087631226, "step": 2770 }, { "epoch": 0.5045963411304268, "grad_norm": 9.625, "learning_rate": 9.480330262733715e-06, "loss": 1.4362484216690063, "step": 2772 }, { "epoch": 0.5049604077546191, "grad_norm": 11.1875, "learning_rate": 9.479574728823276e-06, "loss": 1.7629673480987549, "step": 2774 }, { "epoch": 0.5053244743788113, "grad_norm": 17.5, "learning_rate": 9.478818684299345e-06, "loss": 1.3608262538909912, "step": 2776 }, { "epoch": 0.5056885410030035, "grad_norm": 6.5625, "learning_rate": 9.478062129272872e-06, "loss": 0.9855809807777405, "step": 2778 }, { "epoch": 0.5060526076271957, "grad_norm": 8.0, "learning_rate": 9.477305063854877e-06, "loss": 1.3332241773605347, "step": 2780 }, { "epoch": 0.506416674251388, "grad_norm": 33.0, "learning_rate": 9.476547488156453e-06, "loss": 1.4193193912506104, "step": 2782 }, { "epoch": 0.5067807408755802, "grad_norm": 7.3125, "learning_rate": 9.475789402288778e-06, "loss": 1.3930517435073853, "step": 2784 }, { "epoch": 0.5071448074997724, "grad_norm": 4.25, "learning_rate": 9.475030806363093e-06, "loss": 1.0984879732131958, "step": 2786 }, { "epoch": 0.5075088741239647, "grad_norm": 13.75, "learning_rate": 9.47427170049072e-06, "loss": 1.243513584136963, "step": 2788 }, { "epoch": 0.5078729407481569, "grad_norm": 18.625, "learning_rate": 9.473512084783054e-06, "loss": 1.5150600671768188, "step": 2790 }, { "epoch": 0.5082370073723491, "grad_norm": 9.75, "learning_rate": 9.472751959351569e-06, "loss": 1.5471346378326416, "step": 2792 }, { "epoch": 0.5086010739965414, "grad_norm": 5.46875, "learning_rate": 9.471991324307808e-06, "loss": 1.2661476135253906, "step": 2794 }, { "epoch": 0.5089651406207336, "grad_norm": 10.1875, "learning_rate": 9.471230179763389e-06, "loss": 1.4615060091018677, "step": 2796 }, { "epoch": 0.5093292072449258, "grad_norm": 11.8125, "learning_rate": 9.470468525830008e-06, "loss": 1.3388614654541016, "step": 2798 }, { "epoch": 0.509693273869118, "grad_norm": 8.875, "learning_rate": 9.469706362619438e-06, "loss": 1.2015575170516968, "step": 2800 }, { "epoch": 0.5100573404933103, "grad_norm": 16.75, "learning_rate": 9.468943690243518e-06, "loss": 0.8708786964416504, "step": 2802 }, { "epoch": 0.5104214071175025, "grad_norm": 11.75, "learning_rate": 9.468180508814173e-06, "loss": 1.2993409633636475, "step": 2804 }, { "epoch": 0.5107854737416947, "grad_norm": 16.5, "learning_rate": 9.46741681844339e-06, "loss": 1.4893848896026611, "step": 2806 }, { "epoch": 0.511149540365887, "grad_norm": 22.5, "learning_rate": 9.466652619243244e-06, "loss": 1.7756671905517578, "step": 2808 }, { "epoch": 0.5115136069900792, "grad_norm": 8.1875, "learning_rate": 9.465887911325875e-06, "loss": 1.0276234149932861, "step": 2810 }, { "epoch": 0.5118776736142714, "grad_norm": 6.09375, "learning_rate": 9.465122694803502e-06, "loss": 1.35276460647583, "step": 2812 }, { "epoch": 0.5122417402384637, "grad_norm": 8.75, "learning_rate": 9.464356969788413e-06, "loss": 0.9041693210601807, "step": 2814 }, { "epoch": 0.5126058068626559, "grad_norm": 24.625, "learning_rate": 9.46359073639298e-06, "loss": 0.9895302653312683, "step": 2816 }, { "epoch": 0.5129698734868481, "grad_norm": 12.5625, "learning_rate": 9.462823994729643e-06, "loss": 1.7848765850067139, "step": 2818 }, { "epoch": 0.5133339401110403, "grad_norm": 14.75, "learning_rate": 9.46205674491092e-06, "loss": 1.70426607131958, "step": 2820 }, { "epoch": 0.5136980067352326, "grad_norm": 17.875, "learning_rate": 9.4612889870494e-06, "loss": 2.081897258758545, "step": 2822 }, { "epoch": 0.5140620733594248, "grad_norm": 16.875, "learning_rate": 9.460520721257747e-06, "loss": 2.092416763305664, "step": 2824 }, { "epoch": 0.514426139983617, "grad_norm": 29.625, "learning_rate": 9.459751947648701e-06, "loss": 1.7710515260696411, "step": 2826 }, { "epoch": 0.5147902066078093, "grad_norm": 64.0, "learning_rate": 9.458982666335081e-06, "loss": 1.5571202039718628, "step": 2828 }, { "epoch": 0.5151542732320015, "grad_norm": 22.5, "learning_rate": 9.458212877429771e-06, "loss": 0.5372373461723328, "step": 2830 }, { "epoch": 0.5155183398561937, "grad_norm": 190.0, "learning_rate": 9.457442581045737e-06, "loss": 1.3746743202209473, "step": 2832 }, { "epoch": 0.5158824064803859, "grad_norm": 27.75, "learning_rate": 9.456671777296016e-06, "loss": 1.4118127822875977, "step": 2834 }, { "epoch": 0.5162464731045782, "grad_norm": 10.5625, "learning_rate": 9.45590046629372e-06, "loss": 1.48917818069458, "step": 2836 }, { "epoch": 0.5166105397287704, "grad_norm": 19.5, "learning_rate": 9.455128648152037e-06, "loss": 1.4884352684020996, "step": 2838 }, { "epoch": 0.5169746063529626, "grad_norm": 32.5, "learning_rate": 9.454356322984225e-06, "loss": 0.9315375685691833, "step": 2840 }, { "epoch": 0.5173386729771549, "grad_norm": 24.375, "learning_rate": 9.453583490903624e-06, "loss": 1.5715090036392212, "step": 2842 }, { "epoch": 0.517702739601347, "grad_norm": 7.4375, "learning_rate": 9.452810152023641e-06, "loss": 1.3380179405212402, "step": 2844 }, { "epoch": 0.5180668062255392, "grad_norm": 5.96875, "learning_rate": 9.45203630645776e-06, "loss": 1.4265273809432983, "step": 2846 }, { "epoch": 0.5184308728497315, "grad_norm": 10.5, "learning_rate": 9.451261954319543e-06, "loss": 0.9977113008499146, "step": 2848 }, { "epoch": 0.5187949394739237, "grad_norm": 21.0, "learning_rate": 9.45048709572262e-06, "loss": 1.4064290523529053, "step": 2850 }, { "epoch": 0.5191590060981159, "grad_norm": 10.0625, "learning_rate": 9.4497117307807e-06, "loss": 1.428276777267456, "step": 2852 }, { "epoch": 0.5195230727223081, "grad_norm": 7.4375, "learning_rate": 9.448935859607564e-06, "loss": 1.0143812894821167, "step": 2854 }, { "epoch": 0.5198871393465004, "grad_norm": 49.0, "learning_rate": 9.448159482317067e-06, "loss": 1.3358533382415771, "step": 2856 }, { "epoch": 0.5202512059706926, "grad_norm": 20.0, "learning_rate": 9.44738259902314e-06, "loss": 1.1163548231124878, "step": 2858 }, { "epoch": 0.5206152725948848, "grad_norm": 36.5, "learning_rate": 9.44660520983979e-06, "loss": 1.5802149772644043, "step": 2860 }, { "epoch": 0.5209793392190771, "grad_norm": 11.1875, "learning_rate": 9.44582731488109e-06, "loss": 1.4620944261550903, "step": 2862 }, { "epoch": 0.5213434058432693, "grad_norm": 17.375, "learning_rate": 9.445048914261198e-06, "loss": 1.5336461067199707, "step": 2864 }, { "epoch": 0.5217074724674615, "grad_norm": 27.625, "learning_rate": 9.44427000809434e-06, "loss": 1.6409046649932861, "step": 2866 }, { "epoch": 0.5220715390916538, "grad_norm": 33.0, "learning_rate": 9.443490596494816e-06, "loss": 1.742478847503662, "step": 2868 }, { "epoch": 0.522435605715846, "grad_norm": 20.75, "learning_rate": 9.442710679577003e-06, "loss": 1.4603480100631714, "step": 2870 }, { "epoch": 0.5227996723400382, "grad_norm": 13.1875, "learning_rate": 9.441930257455348e-06, "loss": 1.4711697101593018, "step": 2872 }, { "epoch": 0.5231637389642304, "grad_norm": 17.125, "learning_rate": 9.44114933024438e-06, "loss": 1.5037627220153809, "step": 2874 }, { "epoch": 0.5235278055884227, "grad_norm": 33.75, "learning_rate": 9.440367898058688e-06, "loss": 1.4397495985031128, "step": 2876 }, { "epoch": 0.5238918722126149, "grad_norm": 17.125, "learning_rate": 9.439585961012954e-06, "loss": 1.0517516136169434, "step": 2878 }, { "epoch": 0.5242559388368071, "grad_norm": 12.875, "learning_rate": 9.438803519221917e-06, "loss": 1.8930423259735107, "step": 2880 }, { "epoch": 0.5246200054609994, "grad_norm": 17.125, "learning_rate": 9.438020572800401e-06, "loss": 1.791634202003479, "step": 2882 }, { "epoch": 0.5249840720851916, "grad_norm": 9.3125, "learning_rate": 9.4372371218633e-06, "loss": 1.128345012664795, "step": 2884 }, { "epoch": 0.5253481387093838, "grad_norm": 14.4375, "learning_rate": 9.436453166525581e-06, "loss": 1.4580602645874023, "step": 2886 }, { "epoch": 0.5257122053335761, "grad_norm": 47.25, "learning_rate": 9.435668706902286e-06, "loss": 1.5464787483215332, "step": 2888 }, { "epoch": 0.5260762719577683, "grad_norm": 7.03125, "learning_rate": 9.434883743108532e-06, "loss": 1.2771549224853516, "step": 2890 }, { "epoch": 0.5264403385819605, "grad_norm": 29.5, "learning_rate": 9.434098275259507e-06, "loss": 1.0110841989517212, "step": 2892 }, { "epoch": 0.5268044052061527, "grad_norm": 218.0, "learning_rate": 9.433312303470481e-06, "loss": 0.9715045094490051, "step": 2894 }, { "epoch": 0.527168471830345, "grad_norm": 6.78125, "learning_rate": 9.432525827856787e-06, "loss": 1.327849268913269, "step": 2896 }, { "epoch": 0.5275325384545372, "grad_norm": 5.78125, "learning_rate": 9.431738848533838e-06, "loss": 1.2787411212921143, "step": 2898 }, { "epoch": 0.5278966050787294, "grad_norm": 11.375, "learning_rate": 9.43095136561712e-06, "loss": 1.470754861831665, "step": 2900 }, { "epoch": 0.5282606717029217, "grad_norm": 12.8125, "learning_rate": 9.430163379222194e-06, "loss": 1.4690332412719727, "step": 2902 }, { "epoch": 0.5286247383271139, "grad_norm": 17.375, "learning_rate": 9.429374889464696e-06, "loss": 1.7040634155273438, "step": 2904 }, { "epoch": 0.5289888049513061, "grad_norm": 35.0, "learning_rate": 9.428585896460327e-06, "loss": 1.6173440217971802, "step": 2906 }, { "epoch": 0.5293528715754983, "grad_norm": 18.875, "learning_rate": 9.427796400324873e-06, "loss": 1.873227834701538, "step": 2908 }, { "epoch": 0.5297169381996906, "grad_norm": 14.0, "learning_rate": 9.42700640117419e-06, "loss": 1.515747308731079, "step": 2910 }, { "epoch": 0.5300810048238828, "grad_norm": 10.5625, "learning_rate": 9.426215899124207e-06, "loss": 1.3949079513549805, "step": 2912 }, { "epoch": 0.530445071448075, "grad_norm": 8.1875, "learning_rate": 9.425424894290925e-06, "loss": 1.3828617334365845, "step": 2914 }, { "epoch": 0.5308091380722673, "grad_norm": 14.3125, "learning_rate": 9.424633386790422e-06, "loss": 1.37305748462677, "step": 2916 }, { "epoch": 0.5311732046964595, "grad_norm": 10.4375, "learning_rate": 9.423841376738849e-06, "loss": 1.4214001893997192, "step": 2918 }, { "epoch": 0.5315372713206516, "grad_norm": 16.75, "learning_rate": 9.423048864252428e-06, "loss": 1.463394284248352, "step": 2920 }, { "epoch": 0.531901337944844, "grad_norm": 15.375, "learning_rate": 9.422255849447459e-06, "loss": 1.5125001668930054, "step": 2922 }, { "epoch": 0.5322654045690361, "grad_norm": 11.9375, "learning_rate": 9.421462332440314e-06, "loss": 1.5132102966308594, "step": 2924 }, { "epoch": 0.5326294711932283, "grad_norm": 3.390625, "learning_rate": 9.420668313347439e-06, "loss": 0.818705677986145, "step": 2926 }, { "epoch": 0.5329935378174205, "grad_norm": 59.25, "learning_rate": 9.41987379228535e-06, "loss": 0.3280879259109497, "step": 2928 }, { "epoch": 0.5333576044416128, "grad_norm": 48.25, "learning_rate": 9.419078769370642e-06, "loss": 0.5533462166786194, "step": 2930 }, { "epoch": 0.533721671065805, "grad_norm": 39.5, "learning_rate": 9.41828324471998e-06, "loss": 0.6874912977218628, "step": 2932 }, { "epoch": 0.5340857376899972, "grad_norm": 11.3125, "learning_rate": 9.417487218450106e-06, "loss": 1.105255365371704, "step": 2934 }, { "epoch": 0.5344498043141895, "grad_norm": 17.875, "learning_rate": 9.416690690677833e-06, "loss": 1.3310904502868652, "step": 2936 }, { "epoch": 0.5348138709383817, "grad_norm": 19.0, "learning_rate": 9.415893661520047e-06, "loss": 1.4222571849822998, "step": 2938 }, { "epoch": 0.5351779375625739, "grad_norm": 10.875, "learning_rate": 9.415096131093708e-06, "loss": 1.4867838621139526, "step": 2940 }, { "epoch": 0.5355420041867662, "grad_norm": 8.9375, "learning_rate": 9.414298099515853e-06, "loss": 1.5833359956741333, "step": 2942 }, { "epoch": 0.5359060708109584, "grad_norm": 9.3125, "learning_rate": 9.41349956690359e-06, "loss": 1.4017274379730225, "step": 2944 }, { "epoch": 0.5362701374351506, "grad_norm": 10.375, "learning_rate": 9.412700533374098e-06, "loss": 1.6258537769317627, "step": 2946 }, { "epoch": 0.5366342040593428, "grad_norm": 10.8125, "learning_rate": 9.411900999044635e-06, "loss": 1.232919692993164, "step": 2948 }, { "epoch": 0.5369982706835351, "grad_norm": 20.5, "learning_rate": 9.411100964032524e-06, "loss": 1.5530389547348022, "step": 2950 }, { "epoch": 0.5373623373077273, "grad_norm": 9.0625, "learning_rate": 9.410300428455174e-06, "loss": 1.3744899034500122, "step": 2952 }, { "epoch": 0.5377264039319195, "grad_norm": 24.0, "learning_rate": 9.409499392430057e-06, "loss": 1.5849263668060303, "step": 2954 }, { "epoch": 0.5380904705561118, "grad_norm": 55.25, "learning_rate": 9.40869785607472e-06, "loss": 1.63058602809906, "step": 2956 }, { "epoch": 0.538454537180304, "grad_norm": 9.125, "learning_rate": 9.407895819506787e-06, "loss": 1.4717212915420532, "step": 2958 }, { "epoch": 0.5388186038044962, "grad_norm": 5.40625, "learning_rate": 9.407093282843953e-06, "loss": 1.30521821975708, "step": 2960 }, { "epoch": 0.5391826704286885, "grad_norm": 62.5, "learning_rate": 9.406290246203988e-06, "loss": 1.3819918632507324, "step": 2962 }, { "epoch": 0.5395467370528807, "grad_norm": 12.625, "learning_rate": 9.405486709704734e-06, "loss": 1.3954023122787476, "step": 2964 }, { "epoch": 0.5399108036770729, "grad_norm": 6.34375, "learning_rate": 9.404682673464108e-06, "loss": 1.1037077903747559, "step": 2966 }, { "epoch": 0.5402748703012651, "grad_norm": 60.25, "learning_rate": 9.403878137600095e-06, "loss": 1.2842849493026733, "step": 2968 }, { "epoch": 0.5406389369254574, "grad_norm": 22.375, "learning_rate": 9.403073102230762e-06, "loss": 1.7027831077575684, "step": 2970 }, { "epoch": 0.5410030035496496, "grad_norm": 8.1875, "learning_rate": 9.402267567474242e-06, "loss": 1.579690933227539, "step": 2972 }, { "epoch": 0.5413670701738418, "grad_norm": 48.0, "learning_rate": 9.401461533448744e-06, "loss": 1.7669086456298828, "step": 2974 }, { "epoch": 0.5417311367980341, "grad_norm": 6.78125, "learning_rate": 9.400655000272551e-06, "loss": 1.4734055995941162, "step": 2976 }, { "epoch": 0.5420952034222263, "grad_norm": 13.375, "learning_rate": 9.39984796806402e-06, "loss": 1.4527418613433838, "step": 2978 }, { "epoch": 0.5424592700464185, "grad_norm": 9.6875, "learning_rate": 9.399040436941577e-06, "loss": 1.5013947486877441, "step": 2980 }, { "epoch": 0.5428233366706107, "grad_norm": 14.75, "learning_rate": 9.398232407023724e-06, "loss": 1.5700006484985352, "step": 2982 }, { "epoch": 0.543187403294803, "grad_norm": 32.5, "learning_rate": 9.397423878429037e-06, "loss": 2.04573655128479, "step": 2984 }, { "epoch": 0.5435514699189952, "grad_norm": 45.0, "learning_rate": 9.396614851276166e-06, "loss": 1.7349227666854858, "step": 2986 }, { "epoch": 0.5439155365431874, "grad_norm": 15.3125, "learning_rate": 9.39580532568383e-06, "loss": 1.1179993152618408, "step": 2988 }, { "epoch": 0.5442796031673797, "grad_norm": 13.0625, "learning_rate": 9.394995301770826e-06, "loss": 1.492805004119873, "step": 2990 }, { "epoch": 0.5446436697915719, "grad_norm": 12.0, "learning_rate": 9.39418477965602e-06, "loss": 1.3820233345031738, "step": 2992 }, { "epoch": 0.545007736415764, "grad_norm": 44.5, "learning_rate": 9.393373759458351e-06, "loss": 1.1499322652816772, "step": 2994 }, { "epoch": 0.5453718030399564, "grad_norm": 7.03125, "learning_rate": 9.392562241296837e-06, "loss": 0.344651997089386, "step": 2996 }, { "epoch": 0.5457358696641486, "grad_norm": 7.0, "learning_rate": 9.391750225290561e-06, "loss": 1.1447712182998657, "step": 2998 }, { "epoch": 0.5460999362883407, "grad_norm": 10.375, "learning_rate": 9.390937711558685e-06, "loss": 1.3334729671478271, "step": 3000 }, { "epoch": 0.5464640029125329, "grad_norm": 14.125, "learning_rate": 9.390124700220442e-06, "loss": 1.356000304222107, "step": 3002 }, { "epoch": 0.5468280695367252, "grad_norm": 10.125, "learning_rate": 9.389311191395141e-06, "loss": 1.5714797973632812, "step": 3004 }, { "epoch": 0.5471921361609174, "grad_norm": 12.875, "learning_rate": 9.388497185202155e-06, "loss": 1.2656877040863037, "step": 3006 }, { "epoch": 0.5475562027851096, "grad_norm": 11.125, "learning_rate": 9.387682681760941e-06, "loss": 0.8870418071746826, "step": 3008 }, { "epoch": 0.5479202694093019, "grad_norm": 5.46875, "learning_rate": 9.386867681191023e-06, "loss": 1.3088362216949463, "step": 3010 }, { "epoch": 0.5482843360334941, "grad_norm": 14.9375, "learning_rate": 9.386052183611998e-06, "loss": 1.3964647054672241, "step": 3012 }, { "epoch": 0.5486484026576863, "grad_norm": 5.34375, "learning_rate": 9.385236189143538e-06, "loss": 1.2509205341339111, "step": 3014 }, { "epoch": 0.5490124692818786, "grad_norm": 24.75, "learning_rate": 9.384419697905385e-06, "loss": 1.3296830654144287, "step": 3016 }, { "epoch": 0.5493765359060708, "grad_norm": 12.0625, "learning_rate": 9.383602710017358e-06, "loss": 1.932960033416748, "step": 3018 }, { "epoch": 0.549740602530263, "grad_norm": 7.21875, "learning_rate": 9.382785225599346e-06, "loss": 1.3641269207000732, "step": 3020 }, { "epoch": 0.5501046691544552, "grad_norm": 9.1875, "learning_rate": 9.381967244771311e-06, "loss": 1.4412264823913574, "step": 3022 }, { "epoch": 0.5504687357786475, "grad_norm": 7.8125, "learning_rate": 9.38114876765329e-06, "loss": 1.327149748802185, "step": 3024 }, { "epoch": 0.5508328024028397, "grad_norm": 13.75, "learning_rate": 9.380329794365389e-06, "loss": 1.6417878866195679, "step": 3026 }, { "epoch": 0.5511968690270319, "grad_norm": 10.0, "learning_rate": 9.37951032502779e-06, "loss": 1.334957480430603, "step": 3028 }, { "epoch": 0.5515609356512242, "grad_norm": 3.875, "learning_rate": 9.378690359760747e-06, "loss": 1.0583679676055908, "step": 3030 }, { "epoch": 0.5519250022754164, "grad_norm": 8.375, "learning_rate": 9.377869898684587e-06, "loss": 1.087319016456604, "step": 3032 }, { "epoch": 0.5522890688996086, "grad_norm": 25.375, "learning_rate": 9.377048941919706e-06, "loss": 1.6336027383804321, "step": 3034 }, { "epoch": 0.5526531355238009, "grad_norm": 25.375, "learning_rate": 9.37622748958658e-06, "loss": 1.5905839204788208, "step": 3036 }, { "epoch": 0.5530172021479931, "grad_norm": 9.5625, "learning_rate": 9.375405541805753e-06, "loss": 1.389833688735962, "step": 3038 }, { "epoch": 0.5533812687721853, "grad_norm": 7.78125, "learning_rate": 9.374583098697843e-06, "loss": 1.4018820524215698, "step": 3040 }, { "epoch": 0.5537453353963775, "grad_norm": 12.875, "learning_rate": 9.373760160383538e-06, "loss": 1.2264093160629272, "step": 3042 }, { "epoch": 0.5541094020205698, "grad_norm": 13.8125, "learning_rate": 9.372936726983604e-06, "loss": 0.9025511741638184, "step": 3044 }, { "epoch": 0.554473468644762, "grad_norm": 20.25, "learning_rate": 9.372112798618872e-06, "loss": 1.039534568786621, "step": 3046 }, { "epoch": 0.5548375352689542, "grad_norm": 16.125, "learning_rate": 9.371288375410254e-06, "loss": 1.5039215087890625, "step": 3048 }, { "epoch": 0.5552016018931465, "grad_norm": 11.8125, "learning_rate": 9.370463457478729e-06, "loss": 1.6696616411209106, "step": 3050 }, { "epoch": 0.5555656685173387, "grad_norm": 12.0, "learning_rate": 9.369638044945354e-06, "loss": 1.4033293724060059, "step": 3052 }, { "epoch": 0.5559297351415309, "grad_norm": 23.25, "learning_rate": 9.368812137931247e-06, "loss": 1.0766596794128418, "step": 3054 }, { "epoch": 0.5562938017657231, "grad_norm": 9.25, "learning_rate": 9.367985736557614e-06, "loss": 0.966900646686554, "step": 3056 }, { "epoch": 0.5566578683899154, "grad_norm": 20.875, "learning_rate": 9.367158840945722e-06, "loss": 0.7809659242630005, "step": 3058 }, { "epoch": 0.5570219350141076, "grad_norm": 8.375, "learning_rate": 9.36633145121692e-06, "loss": 0.9630832076072693, "step": 3060 }, { "epoch": 0.5573860016382998, "grad_norm": 28.25, "learning_rate": 9.365503567492615e-06, "loss": 1.0313334465026855, "step": 3062 }, { "epoch": 0.5577500682624921, "grad_norm": 24.5, "learning_rate": 9.364675189894304e-06, "loss": 1.5308561325073242, "step": 3064 }, { "epoch": 0.5581141348866843, "grad_norm": 23.0, "learning_rate": 9.36384631854354e-06, "loss": 1.5186749696731567, "step": 3066 }, { "epoch": 0.5584782015108765, "grad_norm": 11.875, "learning_rate": 9.363016953561967e-06, "loss": 1.5328071117401123, "step": 3068 }, { "epoch": 0.5588422681350688, "grad_norm": 16.0, "learning_rate": 9.362187095071282e-06, "loss": 1.3448283672332764, "step": 3070 }, { "epoch": 0.559206334759261, "grad_norm": 117.5, "learning_rate": 9.361356743193269e-06, "loss": 1.5575233697891235, "step": 3072 }, { "epoch": 0.5595704013834532, "grad_norm": 8.25, "learning_rate": 9.360525898049772e-06, "loss": 1.38517165184021, "step": 3074 }, { "epoch": 0.5599344680076453, "grad_norm": 32.5, "learning_rate": 9.359694559762722e-06, "loss": 2.0679996013641357, "step": 3076 }, { "epoch": 0.5602985346318377, "grad_norm": 8.875, "learning_rate": 9.35886272845411e-06, "loss": 1.4488943815231323, "step": 3078 }, { "epoch": 0.5606626012560298, "grad_norm": 10.6875, "learning_rate": 9.358030404246006e-06, "loss": 1.427444577217102, "step": 3080 }, { "epoch": 0.561026667880222, "grad_norm": 9.375, "learning_rate": 9.357197587260549e-06, "loss": 1.4008798599243164, "step": 3082 }, { "epoch": 0.5613907345044143, "grad_norm": 7.5, "learning_rate": 9.356364277619952e-06, "loss": 1.3714756965637207, "step": 3084 }, { "epoch": 0.5617548011286065, "grad_norm": 14.25, "learning_rate": 9.355530475446494e-06, "loss": 0.8201741576194763, "step": 3086 }, { "epoch": 0.5621188677527987, "grad_norm": 14.3125, "learning_rate": 9.354696180862543e-06, "loss": 0.45517778396606445, "step": 3088 }, { "epoch": 0.562482934376991, "grad_norm": 15.4375, "learning_rate": 9.353861393990522e-06, "loss": 1.5810420513153076, "step": 3090 }, { "epoch": 0.5628470010011832, "grad_norm": 80.5, "learning_rate": 9.353026114952935e-06, "loss": 1.2946951389312744, "step": 3092 }, { "epoch": 0.5632110676253754, "grad_norm": 6.0625, "learning_rate": 9.352190343872352e-06, "loss": 1.4217023849487305, "step": 3094 }, { "epoch": 0.5635751342495676, "grad_norm": 61.5, "learning_rate": 9.35135408087142e-06, "loss": 1.041006326675415, "step": 3096 }, { "epoch": 0.5639392008737599, "grad_norm": 8.75, "learning_rate": 9.350517326072861e-06, "loss": 1.5310918092727661, "step": 3098 }, { "epoch": 0.5643032674979521, "grad_norm": 4.9375, "learning_rate": 9.349680079599462e-06, "loss": 1.3874033689498901, "step": 3100 }, { "epoch": 0.5646673341221443, "grad_norm": 11.625, "learning_rate": 9.348842341574085e-06, "loss": 1.1787211894989014, "step": 3102 }, { "epoch": 0.5650314007463366, "grad_norm": 10.5625, "learning_rate": 9.348004112119666e-06, "loss": 1.3066539764404297, "step": 3104 }, { "epoch": 0.5653954673705288, "grad_norm": 15.5, "learning_rate": 9.347165391359214e-06, "loss": 1.4900389909744263, "step": 3106 }, { "epoch": 0.565759533994721, "grad_norm": 20.125, "learning_rate": 9.346326179415805e-06, "loss": 1.2628798484802246, "step": 3108 }, { "epoch": 0.5661236006189133, "grad_norm": 17.375, "learning_rate": 9.34548647641259e-06, "loss": 1.6182929277420044, "step": 3110 }, { "epoch": 0.5664876672431055, "grad_norm": 24.375, "learning_rate": 9.344646282472794e-06, "loss": 1.743363618850708, "step": 3112 }, { "epoch": 0.5668517338672977, "grad_norm": 11.1875, "learning_rate": 9.343805597719711e-06, "loss": 1.09340500831604, "step": 3114 }, { "epoch": 0.5672158004914899, "grad_norm": 13.5625, "learning_rate": 9.342964422276705e-06, "loss": 1.409019112586975, "step": 3116 }, { "epoch": 0.5675798671156822, "grad_norm": 7.21875, "learning_rate": 9.34212275626722e-06, "loss": 1.4036802053451538, "step": 3118 }, { "epoch": 0.5679439337398744, "grad_norm": 4.1875, "learning_rate": 9.341280599814764e-06, "loss": 1.0053377151489258, "step": 3120 }, { "epoch": 0.5683080003640666, "grad_norm": 4.84375, "learning_rate": 9.340437953042923e-06, "loss": 1.2106422185897827, "step": 3122 }, { "epoch": 0.5686720669882589, "grad_norm": 12.75, "learning_rate": 9.339594816075348e-06, "loss": 1.238559365272522, "step": 3124 }, { "epoch": 0.5690361336124511, "grad_norm": 14.8125, "learning_rate": 9.338751189035769e-06, "loss": 1.2562676668167114, "step": 3126 }, { "epoch": 0.5694002002366433, "grad_norm": 8.4375, "learning_rate": 9.337907072047982e-06, "loss": 1.0469386577606201, "step": 3128 }, { "epoch": 0.5697642668608355, "grad_norm": 13.6875, "learning_rate": 9.337062465235862e-06, "loss": 1.5876796245574951, "step": 3130 }, { "epoch": 0.5701283334850278, "grad_norm": 18.625, "learning_rate": 9.336217368723346e-06, "loss": 0.9844861626625061, "step": 3132 }, { "epoch": 0.57049240010922, "grad_norm": 14.6875, "learning_rate": 9.335371782634455e-06, "loss": 1.2141327857971191, "step": 3134 }, { "epoch": 0.5708564667334122, "grad_norm": 17.375, "learning_rate": 9.334525707093269e-06, "loss": 1.1805936098098755, "step": 3136 }, { "epoch": 0.5712205333576045, "grad_norm": 14.375, "learning_rate": 9.33367914222395e-06, "loss": 1.9679069519042969, "step": 3138 }, { "epoch": 0.5715845999817967, "grad_norm": 12.3125, "learning_rate": 9.33283208815073e-06, "loss": 1.9321767091751099, "step": 3140 }, { "epoch": 0.5719486666059889, "grad_norm": 16.125, "learning_rate": 9.331984544997904e-06, "loss": 1.0903077125549316, "step": 3142 }, { "epoch": 0.5723127332301812, "grad_norm": 6.03125, "learning_rate": 9.331136512889852e-06, "loss": 1.1446666717529297, "step": 3144 }, { "epoch": 0.5726767998543734, "grad_norm": 103.0, "learning_rate": 9.330287991951015e-06, "loss": 1.2337350845336914, "step": 3146 }, { "epoch": 0.5730408664785656, "grad_norm": 11.375, "learning_rate": 9.329438982305911e-06, "loss": 1.6238542795181274, "step": 3148 }, { "epoch": 0.5734049331027578, "grad_norm": 8.875, "learning_rate": 9.328589484079134e-06, "loss": 1.4405121803283691, "step": 3150 }, { "epoch": 0.5737689997269501, "grad_norm": 2.796875, "learning_rate": 9.327739497395333e-06, "loss": 1.1899447441101074, "step": 3152 }, { "epoch": 0.5741330663511423, "grad_norm": 11.625, "learning_rate": 9.326889022379253e-06, "loss": 0.8332608342170715, "step": 3154 }, { "epoch": 0.5744971329753344, "grad_norm": 13.5625, "learning_rate": 9.326038059155689e-06, "loss": 1.0307068824768066, "step": 3156 }, { "epoch": 0.5748611995995268, "grad_norm": 8.625, "learning_rate": 9.325186607849518e-06, "loss": 1.2585155963897705, "step": 3158 }, { "epoch": 0.575225266223719, "grad_norm": 12.0, "learning_rate": 9.32433466858569e-06, "loss": 1.5540335178375244, "step": 3160 }, { "epoch": 0.5755893328479111, "grad_norm": 27.375, "learning_rate": 9.323482241489221e-06, "loss": 1.4413390159606934, "step": 3162 }, { "epoch": 0.5759533994721034, "grad_norm": 73.5, "learning_rate": 9.322629326685202e-06, "loss": 0.5993403196334839, "step": 3164 }, { "epoch": 0.5763174660962956, "grad_norm": 11.375, "learning_rate": 9.321775924298794e-06, "loss": 1.529767394065857, "step": 3166 }, { "epoch": 0.5766815327204878, "grad_norm": 6.5, "learning_rate": 9.320922034455233e-06, "loss": 1.3970431089401245, "step": 3168 }, { "epoch": 0.57704559934468, "grad_norm": 4.875, "learning_rate": 9.320067657279819e-06, "loss": 1.2680381536483765, "step": 3170 }, { "epoch": 0.5774096659688723, "grad_norm": 16.75, "learning_rate": 9.319212792897933e-06, "loss": 1.3864164352416992, "step": 3172 }, { "epoch": 0.5777737325930645, "grad_norm": 17.625, "learning_rate": 9.318357441435021e-06, "loss": 1.337921380996704, "step": 3174 }, { "epoch": 0.5781377992172567, "grad_norm": 23.75, "learning_rate": 9.317501603016604e-06, "loss": 1.7786891460418701, "step": 3176 }, { "epoch": 0.578501865841449, "grad_norm": 12.375, "learning_rate": 9.31664527776827e-06, "loss": 1.1613082885742188, "step": 3178 }, { "epoch": 0.5788659324656412, "grad_norm": 11.6875, "learning_rate": 9.315788465815683e-06, "loss": 1.5284299850463867, "step": 3180 }, { "epoch": 0.5792299990898334, "grad_norm": 7.375, "learning_rate": 9.314931167284575e-06, "loss": 1.23740553855896, "step": 3182 }, { "epoch": 0.5795940657140257, "grad_norm": 3.96875, "learning_rate": 9.314073382300754e-06, "loss": 1.274847388267517, "step": 3184 }, { "epoch": 0.5799581323382179, "grad_norm": 6.125, "learning_rate": 9.313215110990097e-06, "loss": 1.0943043231964111, "step": 3186 }, { "epoch": 0.5803221989624101, "grad_norm": 22.0, "learning_rate": 9.312356353478547e-06, "loss": 1.4497488737106323, "step": 3188 }, { "epoch": 0.5806862655866023, "grad_norm": 4.4375, "learning_rate": 9.311497109892127e-06, "loss": 1.0539054870605469, "step": 3190 }, { "epoch": 0.5810503322107946, "grad_norm": 63.25, "learning_rate": 9.310637380356924e-06, "loss": 1.0131137371063232, "step": 3192 }, { "epoch": 0.5814143988349868, "grad_norm": 14.0625, "learning_rate": 9.309777164999103e-06, "loss": 0.5613528490066528, "step": 3194 }, { "epoch": 0.581778465459179, "grad_norm": 10.375, "learning_rate": 9.3089164639449e-06, "loss": 1.598435878753662, "step": 3196 }, { "epoch": 0.5821425320833713, "grad_norm": 7.21875, "learning_rate": 9.308055277320611e-06, "loss": 1.3231985569000244, "step": 3198 }, { "epoch": 0.5825065987075635, "grad_norm": 14.375, "learning_rate": 9.30719360525262e-06, "loss": 1.137812852859497, "step": 3200 }, { "epoch": 0.5828706653317557, "grad_norm": 11.4375, "learning_rate": 9.306331447867369e-06, "loss": 1.5650297403335571, "step": 3202 }, { "epoch": 0.583234731955948, "grad_norm": 7.78125, "learning_rate": 9.305468805291377e-06, "loss": 1.8017995357513428, "step": 3204 }, { "epoch": 0.5835987985801402, "grad_norm": 4.28125, "learning_rate": 9.304605677651234e-06, "loss": 1.0382485389709473, "step": 3206 }, { "epoch": 0.5839628652043324, "grad_norm": 10.375, "learning_rate": 9.3037420650736e-06, "loss": 1.7024339437484741, "step": 3208 }, { "epoch": 0.5843269318285246, "grad_norm": 19.625, "learning_rate": 9.302877967685209e-06, "loss": 1.4393378496170044, "step": 3210 }, { "epoch": 0.5846909984527169, "grad_norm": 15.0625, "learning_rate": 9.302013385612858e-06, "loss": 1.771433711051941, "step": 3212 }, { "epoch": 0.5850550650769091, "grad_norm": 6.90625, "learning_rate": 9.301148318983425e-06, "loss": 1.4224319458007812, "step": 3214 }, { "epoch": 0.5854191317011013, "grad_norm": 11.625, "learning_rate": 9.300282767923858e-06, "loss": 1.3252739906311035, "step": 3216 }, { "epoch": 0.5857831983252936, "grad_norm": 12.125, "learning_rate": 9.299416732561169e-06, "loss": 1.5036948919296265, "step": 3218 }, { "epoch": 0.5861472649494858, "grad_norm": 8.125, "learning_rate": 9.298550213022443e-06, "loss": 1.2930983304977417, "step": 3220 }, { "epoch": 0.586511331573678, "grad_norm": 13.125, "learning_rate": 9.297683209434842e-06, "loss": 1.3540523052215576, "step": 3222 }, { "epoch": 0.5868753981978702, "grad_norm": 24.25, "learning_rate": 9.296815721925596e-06, "loss": 1.4716181755065918, "step": 3224 }, { "epoch": 0.5872394648220625, "grad_norm": 7.84375, "learning_rate": 9.295947750622003e-06, "loss": 1.4196969270706177, "step": 3226 }, { "epoch": 0.5876035314462547, "grad_norm": 26.375, "learning_rate": 9.295079295651432e-06, "loss": 1.579772710800171, "step": 3228 }, { "epoch": 0.5879675980704469, "grad_norm": 34.25, "learning_rate": 9.294210357141333e-06, "loss": 1.7943522930145264, "step": 3230 }, { "epoch": 0.5883316646946392, "grad_norm": 28.0, "learning_rate": 9.29334093521921e-06, "loss": 1.5552234649658203, "step": 3232 }, { "epoch": 0.5886957313188314, "grad_norm": 7.34375, "learning_rate": 9.292471030012656e-06, "loss": 1.3278955221176147, "step": 3234 }, { "epoch": 0.5890597979430235, "grad_norm": 20.375, "learning_rate": 9.291600641649319e-06, "loss": 1.1258277893066406, "step": 3236 }, { "epoch": 0.5894238645672158, "grad_norm": 39.75, "learning_rate": 9.290729770256925e-06, "loss": 1.332985281944275, "step": 3238 }, { "epoch": 0.589787931191408, "grad_norm": 5.25, "learning_rate": 9.289858415963278e-06, "loss": 1.2252001762390137, "step": 3240 }, { "epoch": 0.5901519978156002, "grad_norm": 9.0625, "learning_rate": 9.288986578896237e-06, "loss": 1.2293524742126465, "step": 3242 }, { "epoch": 0.5905160644397924, "grad_norm": 12.0625, "learning_rate": 9.288114259183747e-06, "loss": 0.7665857076644897, "step": 3244 }, { "epoch": 0.5908801310639847, "grad_norm": 6.4375, "learning_rate": 9.287241456953814e-06, "loss": 1.017699956893921, "step": 3246 }, { "epoch": 0.5912441976881769, "grad_norm": 9.6875, "learning_rate": 9.28636817233452e-06, "loss": 1.5378084182739258, "step": 3248 }, { "epoch": 0.5916082643123691, "grad_norm": 9.9375, "learning_rate": 9.285494405454016e-06, "loss": 1.4946824312210083, "step": 3250 }, { "epoch": 0.5919723309365614, "grad_norm": 29.875, "learning_rate": 9.284620156440523e-06, "loss": 1.6368951797485352, "step": 3252 }, { "epoch": 0.5923363975607536, "grad_norm": 15.1875, "learning_rate": 9.283745425422332e-06, "loss": 1.6273603439331055, "step": 3254 }, { "epoch": 0.5927004641849458, "grad_norm": 23.875, "learning_rate": 9.282870212527809e-06, "loss": 1.7658079862594604, "step": 3256 }, { "epoch": 0.5930645308091381, "grad_norm": 12.625, "learning_rate": 9.281994517885384e-06, "loss": 1.7158167362213135, "step": 3258 }, { "epoch": 0.5934285974333303, "grad_norm": 14.8125, "learning_rate": 9.281118341623567e-06, "loss": 1.436009168624878, "step": 3260 }, { "epoch": 0.5937926640575225, "grad_norm": 7.21875, "learning_rate": 9.28024168387093e-06, "loss": 1.3976292610168457, "step": 3262 }, { "epoch": 0.5941567306817147, "grad_norm": 22.125, "learning_rate": 9.27936454475612e-06, "loss": 1.0721571445465088, "step": 3264 }, { "epoch": 0.594520797305907, "grad_norm": 11.625, "learning_rate": 9.278486924407853e-06, "loss": 0.6458092331886292, "step": 3266 }, { "epoch": 0.5948848639300992, "grad_norm": 8.3125, "learning_rate": 9.277608822954914e-06, "loss": 1.586045742034912, "step": 3268 }, { "epoch": 0.5952489305542914, "grad_norm": 4.96875, "learning_rate": 9.276730240526167e-06, "loss": 1.1255725622177124, "step": 3270 }, { "epoch": 0.5956129971784837, "grad_norm": 14.1875, "learning_rate": 9.275851177250533e-06, "loss": 1.4628227949142456, "step": 3272 }, { "epoch": 0.5959770638026759, "grad_norm": 28.375, "learning_rate": 9.274971633257014e-06, "loss": 1.8455495834350586, "step": 3274 }, { "epoch": 0.5963411304268681, "grad_norm": 4.6875, "learning_rate": 9.274091608674685e-06, "loss": 0.8988832235336304, "step": 3276 }, { "epoch": 0.5967051970510604, "grad_norm": 7.8125, "learning_rate": 9.273211103632678e-06, "loss": 1.1780006885528564, "step": 3278 }, { "epoch": 0.5970692636752526, "grad_norm": 12.125, "learning_rate": 9.272330118260207e-06, "loss": 1.5535714626312256, "step": 3280 }, { "epoch": 0.5974333302994448, "grad_norm": 15.875, "learning_rate": 9.271448652686552e-06, "loss": 1.5006792545318604, "step": 3282 }, { "epoch": 0.597797396923637, "grad_norm": 16.375, "learning_rate": 9.270566707041067e-06, "loss": 0.8949829339981079, "step": 3284 }, { "epoch": 0.5981614635478293, "grad_norm": 22.25, "learning_rate": 9.269684281453172e-06, "loss": 1.794499158859253, "step": 3286 }, { "epoch": 0.5985255301720215, "grad_norm": 11.25, "learning_rate": 9.268801376052358e-06, "loss": 1.3735519647598267, "step": 3288 }, { "epoch": 0.5988895967962137, "grad_norm": 21.5, "learning_rate": 9.26791799096819e-06, "loss": 1.250535249710083, "step": 3290 }, { "epoch": 0.599253663420406, "grad_norm": 28.75, "learning_rate": 9.267034126330301e-06, "loss": 1.287517786026001, "step": 3292 }, { "epoch": 0.5996177300445982, "grad_norm": 53.25, "learning_rate": 9.266149782268395e-06, "loss": 1.5526132583618164, "step": 3294 }, { "epoch": 0.5999817966687904, "grad_norm": 8.625, "learning_rate": 9.265264958912243e-06, "loss": 1.298661231994629, "step": 3296 }, { "epoch": 0.6003458632929826, "grad_norm": 17.875, "learning_rate": 9.264379656391694e-06, "loss": 1.7273969650268555, "step": 3298 }, { "epoch": 0.6007099299171749, "grad_norm": 11.6875, "learning_rate": 9.263493874836656e-06, "loss": 1.4521604776382446, "step": 3300 }, { "epoch": 0.6010739965413671, "grad_norm": 4.75, "learning_rate": 9.26260761437712e-06, "loss": 1.0998653173446655, "step": 3302 }, { "epoch": 0.6014380631655593, "grad_norm": 9.75, "learning_rate": 9.26172087514314e-06, "loss": 1.437475323677063, "step": 3304 }, { "epoch": 0.6018021297897516, "grad_norm": 6.5, "learning_rate": 9.260833657264836e-06, "loss": 1.114046335220337, "step": 3306 }, { "epoch": 0.6021661964139438, "grad_norm": 8.6875, "learning_rate": 9.259945960872409e-06, "loss": 1.4759052991867065, "step": 3308 }, { "epoch": 0.602530263038136, "grad_norm": 8.875, "learning_rate": 9.259057786096126e-06, "loss": 1.436745047569275, "step": 3310 }, { "epoch": 0.6028943296623283, "grad_norm": 20.25, "learning_rate": 9.258169133066322e-06, "loss": 1.6244862079620361, "step": 3312 }, { "epoch": 0.6032583962865204, "grad_norm": 10.0, "learning_rate": 9.257280001913397e-06, "loss": 1.7222747802734375, "step": 3314 }, { "epoch": 0.6036224629107126, "grad_norm": 3.4375, "learning_rate": 9.256390392767835e-06, "loss": 0.9695895910263062, "step": 3316 }, { "epoch": 0.6039865295349048, "grad_norm": 3.734375, "learning_rate": 9.255500305760181e-06, "loss": 0.9903299808502197, "step": 3318 }, { "epoch": 0.6043505961590971, "grad_norm": 6.75, "learning_rate": 9.254609741021047e-06, "loss": 1.1582101583480835, "step": 3320 }, { "epoch": 0.6047146627832893, "grad_norm": 11.625, "learning_rate": 9.253718698681127e-06, "loss": 1.4803917407989502, "step": 3322 }, { "epoch": 0.6050787294074815, "grad_norm": 18.375, "learning_rate": 9.252827178871172e-06, "loss": 1.4825176000595093, "step": 3324 }, { "epoch": 0.6054427960316738, "grad_norm": 29.375, "learning_rate": 9.251935181722014e-06, "loss": 1.2989507913589478, "step": 3326 }, { "epoch": 0.605806862655866, "grad_norm": 33.75, "learning_rate": 9.251042707364544e-06, "loss": 1.761730432510376, "step": 3328 }, { "epoch": 0.6061709292800582, "grad_norm": 12.0, "learning_rate": 9.250149755929733e-06, "loss": 1.545709490776062, "step": 3330 }, { "epoch": 0.6065349959042505, "grad_norm": 138.0, "learning_rate": 9.249256327548617e-06, "loss": 1.9335663318634033, "step": 3332 }, { "epoch": 0.6068990625284427, "grad_norm": 9.3125, "learning_rate": 9.248362422352302e-06, "loss": 1.6645519733428955, "step": 3334 }, { "epoch": 0.6072631291526349, "grad_norm": 16.75, "learning_rate": 9.247468040471968e-06, "loss": 1.3717955350875854, "step": 3336 }, { "epoch": 0.6076271957768271, "grad_norm": 18.125, "learning_rate": 9.246573182038858e-06, "loss": 1.4449280500411987, "step": 3338 }, { "epoch": 0.6079912624010194, "grad_norm": 152.0, "learning_rate": 9.24567784718429e-06, "loss": 1.7102694511413574, "step": 3340 }, { "epoch": 0.6083553290252116, "grad_norm": 14.6875, "learning_rate": 9.244782036039655e-06, "loss": 1.5718226432800293, "step": 3342 }, { "epoch": 0.6087193956494038, "grad_norm": 7.625, "learning_rate": 9.243885748736404e-06, "loss": 0.9751600623130798, "step": 3344 }, { "epoch": 0.6090834622735961, "grad_norm": 7.15625, "learning_rate": 9.242988985406065e-06, "loss": 0.9214791059494019, "step": 3346 }, { "epoch": 0.6094475288977883, "grad_norm": 10.0625, "learning_rate": 9.242091746180237e-06, "loss": 0.6493130922317505, "step": 3348 }, { "epoch": 0.6098115955219805, "grad_norm": 11.9375, "learning_rate": 9.241194031190581e-06, "loss": 1.479999303817749, "step": 3350 }, { "epoch": 0.6101756621461728, "grad_norm": 9.8125, "learning_rate": 9.24029584056884e-06, "loss": 1.6433162689208984, "step": 3352 }, { "epoch": 0.610539728770365, "grad_norm": 21.875, "learning_rate": 9.239397174446815e-06, "loss": 1.396240472793579, "step": 3354 }, { "epoch": 0.6109037953945572, "grad_norm": 11.5, "learning_rate": 9.238498032956383e-06, "loss": 1.2425209283828735, "step": 3356 }, { "epoch": 0.6112678620187494, "grad_norm": 9.375, "learning_rate": 9.237598416229487e-06, "loss": 1.254563808441162, "step": 3358 }, { "epoch": 0.6116319286429417, "grad_norm": 15.4375, "learning_rate": 9.236698324398147e-06, "loss": 1.3713421821594238, "step": 3360 }, { "epoch": 0.6119959952671339, "grad_norm": 15.625, "learning_rate": 9.235797757594443e-06, "loss": 1.393410563468933, "step": 3362 }, { "epoch": 0.6123600618913261, "grad_norm": 7.40625, "learning_rate": 9.234896715950534e-06, "loss": 1.4773261547088623, "step": 3364 }, { "epoch": 0.6127241285155184, "grad_norm": 12.375, "learning_rate": 9.23399519959864e-06, "loss": 1.3584930896759033, "step": 3366 }, { "epoch": 0.6130881951397106, "grad_norm": 17.25, "learning_rate": 9.233093208671058e-06, "loss": 1.3461037874221802, "step": 3368 }, { "epoch": 0.6134522617639028, "grad_norm": 10.0625, "learning_rate": 9.23219074330015e-06, "loss": 1.556216835975647, "step": 3370 }, { "epoch": 0.613816328388095, "grad_norm": 12.75, "learning_rate": 9.231287803618347e-06, "loss": 1.533297061920166, "step": 3372 }, { "epoch": 0.6141803950122873, "grad_norm": 17.125, "learning_rate": 9.230384389758155e-06, "loss": 1.4484983682632446, "step": 3374 }, { "epoch": 0.6145444616364795, "grad_norm": 15.1875, "learning_rate": 9.229480501852148e-06, "loss": 1.4798980951309204, "step": 3376 }, { "epoch": 0.6149085282606717, "grad_norm": 6.25, "learning_rate": 9.228576140032963e-06, "loss": 0.9490472674369812, "step": 3378 }, { "epoch": 0.615272594884864, "grad_norm": 13.4375, "learning_rate": 9.227671304433315e-06, "loss": 1.0812265872955322, "step": 3380 }, { "epoch": 0.6156366615090562, "grad_norm": 16.0, "learning_rate": 9.226765995185983e-06, "loss": 1.0332449674606323, "step": 3382 }, { "epoch": 0.6160007281332484, "grad_norm": 29.875, "learning_rate": 9.225860212423816e-06, "loss": 1.5068589448928833, "step": 3384 }, { "epoch": 0.6163647947574407, "grad_norm": 11.625, "learning_rate": 9.224953956279739e-06, "loss": 1.6702684164047241, "step": 3386 }, { "epoch": 0.6167288613816329, "grad_norm": 7.09375, "learning_rate": 9.224047226886737e-06, "loss": 1.2706372737884521, "step": 3388 }, { "epoch": 0.617092928005825, "grad_norm": 18.625, "learning_rate": 9.223140024377872e-06, "loss": 1.6000615358352661, "step": 3390 }, { "epoch": 0.6174569946300172, "grad_norm": 17.375, "learning_rate": 9.222232348886268e-06, "loss": 1.853668212890625, "step": 3392 }, { "epoch": 0.6178210612542095, "grad_norm": 12.3125, "learning_rate": 9.221324200545128e-06, "loss": 1.2229450941085815, "step": 3394 }, { "epoch": 0.6181851278784017, "grad_norm": 9.125, "learning_rate": 9.220415579487716e-06, "loss": 0.6639102101325989, "step": 3396 }, { "epoch": 0.6185491945025939, "grad_norm": 12.625, "learning_rate": 9.219506485847367e-06, "loss": 1.2761417627334595, "step": 3398 }, { "epoch": 0.6189132611267862, "grad_norm": 11.125, "learning_rate": 9.21859691975749e-06, "loss": 1.670899510383606, "step": 3400 }, { "epoch": 0.6192773277509784, "grad_norm": 6.78125, "learning_rate": 9.21768688135156e-06, "loss": 0.8308597207069397, "step": 3402 }, { "epoch": 0.6196413943751706, "grad_norm": 11.25, "learning_rate": 9.216776370763118e-06, "loss": 1.4510284662246704, "step": 3404 }, { "epoch": 0.6200054609993629, "grad_norm": 3.09375, "learning_rate": 9.215865388125782e-06, "loss": 0.9976418614387512, "step": 3406 }, { "epoch": 0.6203695276235551, "grad_norm": 17.125, "learning_rate": 9.214953933573232e-06, "loss": 1.0372593402862549, "step": 3408 }, { "epoch": 0.6207335942477473, "grad_norm": 15.875, "learning_rate": 9.214042007239223e-06, "loss": 1.172553539276123, "step": 3410 }, { "epoch": 0.6210976608719395, "grad_norm": 7.9375, "learning_rate": 9.213129609257574e-06, "loss": 1.231721043586731, "step": 3412 }, { "epoch": 0.6214617274961318, "grad_norm": 7.0, "learning_rate": 9.212216739762174e-06, "loss": 1.1757313013076782, "step": 3414 }, { "epoch": 0.621825794120324, "grad_norm": 262.0, "learning_rate": 9.211303398886988e-06, "loss": 1.4580628871917725, "step": 3416 }, { "epoch": 0.6221898607445162, "grad_norm": 9.25, "learning_rate": 9.210389586766042e-06, "loss": 1.1615405082702637, "step": 3418 }, { "epoch": 0.6225539273687085, "grad_norm": 3.3125, "learning_rate": 9.209475303533435e-06, "loss": 0.8812665939331055, "step": 3420 }, { "epoch": 0.6229179939929007, "grad_norm": 18.375, "learning_rate": 9.208560549323334e-06, "loss": 1.1766237020492554, "step": 3422 }, { "epoch": 0.6232820606170929, "grad_norm": 12.75, "learning_rate": 9.207645324269977e-06, "loss": 1.4328970909118652, "step": 3424 }, { "epoch": 0.6236461272412852, "grad_norm": 5.90625, "learning_rate": 9.206729628507665e-06, "loss": 1.4967670440673828, "step": 3426 }, { "epoch": 0.6240101938654774, "grad_norm": 11.75, "learning_rate": 9.205813462170776e-06, "loss": 1.3680963516235352, "step": 3428 }, { "epoch": 0.6243742604896696, "grad_norm": 20.75, "learning_rate": 9.204896825393754e-06, "loss": 1.1024887561798096, "step": 3430 }, { "epoch": 0.6247383271138618, "grad_norm": 8.8125, "learning_rate": 9.203979718311113e-06, "loss": 0.7795675992965698, "step": 3432 }, { "epoch": 0.6251023937380541, "grad_norm": 7.8125, "learning_rate": 9.203062141057431e-06, "loss": 1.5429625511169434, "step": 3434 }, { "epoch": 0.6254664603622463, "grad_norm": 11.125, "learning_rate": 9.202144093767362e-06, "loss": 1.4142343997955322, "step": 3436 }, { "epoch": 0.6258305269864385, "grad_norm": 15.75, "learning_rate": 9.201225576575623e-06, "loss": 1.8705246448516846, "step": 3438 }, { "epoch": 0.6261945936106308, "grad_norm": 10.375, "learning_rate": 9.200306589617006e-06, "loss": 1.3505696058273315, "step": 3440 }, { "epoch": 0.626558660234823, "grad_norm": 27.5, "learning_rate": 9.199387133026365e-06, "loss": 1.9452415704727173, "step": 3442 }, { "epoch": 0.6269227268590152, "grad_norm": 11.0625, "learning_rate": 9.198467206938628e-06, "loss": 1.3088587522506714, "step": 3444 }, { "epoch": 0.6272867934832074, "grad_norm": 82.5, "learning_rate": 9.197546811488794e-06, "loss": 1.4850128889083862, "step": 3446 }, { "epoch": 0.6276508601073997, "grad_norm": 15.375, "learning_rate": 9.196625946811918e-06, "loss": 1.3140945434570312, "step": 3448 }, { "epoch": 0.6280149267315919, "grad_norm": 13.25, "learning_rate": 9.195704613043143e-06, "loss": 0.8738912343978882, "step": 3450 }, { "epoch": 0.6283789933557841, "grad_norm": 8.875, "learning_rate": 9.194782810317667e-06, "loss": 1.1542036533355713, "step": 3452 }, { "epoch": 0.6287430599799764, "grad_norm": 11.125, "learning_rate": 9.19386053877076e-06, "loss": 1.4024494886398315, "step": 3454 }, { "epoch": 0.6291071266041686, "grad_norm": 13.25, "learning_rate": 9.192937798537764e-06, "loss": 1.2433480024337769, "step": 3456 }, { "epoch": 0.6294711932283608, "grad_norm": 11.9375, "learning_rate": 9.192014589754083e-06, "loss": 1.5170204639434814, "step": 3458 }, { "epoch": 0.6298352598525531, "grad_norm": 9.9375, "learning_rate": 9.191090912555201e-06, "loss": 1.5104825496673584, "step": 3460 }, { "epoch": 0.6301993264767453, "grad_norm": 11.1875, "learning_rate": 9.190166767076658e-06, "loss": 1.1128158569335938, "step": 3462 }, { "epoch": 0.6305633931009375, "grad_norm": 15.625, "learning_rate": 9.18924215345407e-06, "loss": 1.4777809381484985, "step": 3464 }, { "epoch": 0.6309274597251296, "grad_norm": 6.78125, "learning_rate": 9.18831707182312e-06, "loss": 1.2820425033569336, "step": 3466 }, { "epoch": 0.631291526349322, "grad_norm": 5.28125, "learning_rate": 9.187391522319562e-06, "loss": 1.0210084915161133, "step": 3468 }, { "epoch": 0.6316555929735141, "grad_norm": 10.1875, "learning_rate": 9.186465505079216e-06, "loss": 1.4332205057144165, "step": 3470 }, { "epoch": 0.6320196595977063, "grad_norm": 14.0, "learning_rate": 9.18553902023797e-06, "loss": 1.7701436281204224, "step": 3472 }, { "epoch": 0.6323837262218986, "grad_norm": 20.125, "learning_rate": 9.184612067931784e-06, "loss": 1.5954891443252563, "step": 3474 }, { "epoch": 0.6327477928460908, "grad_norm": 12.25, "learning_rate": 9.183684648296683e-06, "loss": 1.7572270631790161, "step": 3476 }, { "epoch": 0.633111859470283, "grad_norm": 10.4375, "learning_rate": 9.182756761468761e-06, "loss": 1.467654824256897, "step": 3478 }, { "epoch": 0.6334759260944753, "grad_norm": 19.0, "learning_rate": 9.181828407584181e-06, "loss": 1.4725662469863892, "step": 3480 }, { "epoch": 0.6338399927186675, "grad_norm": 12.25, "learning_rate": 9.18089958677918e-06, "loss": 1.3248523473739624, "step": 3482 }, { "epoch": 0.6342040593428597, "grad_norm": 4.65625, "learning_rate": 9.179970299190055e-06, "loss": 1.4300415515899658, "step": 3484 }, { "epoch": 0.6345681259670519, "grad_norm": 7.4375, "learning_rate": 9.179040544953176e-06, "loss": 0.9890764355659485, "step": 3486 }, { "epoch": 0.6349321925912442, "grad_norm": 107.0, "learning_rate": 9.178110324204981e-06, "loss": 1.3034687042236328, "step": 3488 }, { "epoch": 0.6352962592154364, "grad_norm": 10.25, "learning_rate": 9.177179637081974e-06, "loss": 0.8682063817977905, "step": 3490 }, { "epoch": 0.6356603258396286, "grad_norm": 10.1875, "learning_rate": 9.176248483720731e-06, "loss": 1.4274332523345947, "step": 3492 }, { "epoch": 0.6360243924638209, "grad_norm": 7.625, "learning_rate": 9.175316864257896e-06, "loss": 1.0677056312561035, "step": 3494 }, { "epoch": 0.6363884590880131, "grad_norm": 7.15625, "learning_rate": 9.17438477883018e-06, "loss": 1.1895233392715454, "step": 3496 }, { "epoch": 0.6367525257122053, "grad_norm": 12.25, "learning_rate": 9.173452227574365e-06, "loss": 1.4755933284759521, "step": 3498 }, { "epoch": 0.6371165923363976, "grad_norm": 42.25, "learning_rate": 9.172519210627293e-06, "loss": 1.356042742729187, "step": 3500 }, { "epoch": 0.6374806589605898, "grad_norm": 13.6875, "learning_rate": 9.171585728125886e-06, "loss": 1.1299794912338257, "step": 3502 }, { "epoch": 0.637844725584782, "grad_norm": 5.9375, "learning_rate": 9.170651780207123e-06, "loss": 1.3135566711425781, "step": 3504 }, { "epoch": 0.6382087922089742, "grad_norm": 10.5, "learning_rate": 9.169717367008064e-06, "loss": 1.4078896045684814, "step": 3506 }, { "epoch": 0.6385728588331665, "grad_norm": 9.75, "learning_rate": 9.168782488665827e-06, "loss": 1.4428621530532837, "step": 3508 }, { "epoch": 0.6389369254573587, "grad_norm": 6.25, "learning_rate": 9.167847145317602e-06, "loss": 1.2242130041122437, "step": 3510 }, { "epoch": 0.6393009920815509, "grad_norm": 9.0625, "learning_rate": 9.166911337100643e-06, "loss": 1.3666901588439941, "step": 3512 }, { "epoch": 0.6396650587057432, "grad_norm": 9.25, "learning_rate": 9.165975064152283e-06, "loss": 1.274733304977417, "step": 3514 }, { "epoch": 0.6400291253299354, "grad_norm": 13.0, "learning_rate": 9.165038326609913e-06, "loss": 1.3937983512878418, "step": 3516 }, { "epoch": 0.6403931919541276, "grad_norm": 13.5, "learning_rate": 9.164101124610993e-06, "loss": 1.6175740957260132, "step": 3518 }, { "epoch": 0.6407572585783198, "grad_norm": 26.125, "learning_rate": 9.163163458293059e-06, "loss": 1.2710429430007935, "step": 3520 }, { "epoch": 0.6411213252025121, "grad_norm": 18.125, "learning_rate": 9.162225327793706e-06, "loss": 1.8064460754394531, "step": 3522 }, { "epoch": 0.6414853918267043, "grad_norm": 9.625, "learning_rate": 9.161286733250601e-06, "loss": 1.56912100315094, "step": 3524 }, { "epoch": 0.6418494584508965, "grad_norm": 7.625, "learning_rate": 9.160347674801477e-06, "loss": 1.2091684341430664, "step": 3526 }, { "epoch": 0.6422135250750888, "grad_norm": 10.1875, "learning_rate": 9.159408152584143e-06, "loss": 1.3263113498687744, "step": 3528 }, { "epoch": 0.642577591699281, "grad_norm": 11.125, "learning_rate": 9.158468166736465e-06, "loss": 1.3968076705932617, "step": 3530 }, { "epoch": 0.6429416583234732, "grad_norm": 6.0625, "learning_rate": 9.157527717396383e-06, "loss": 1.2390064001083374, "step": 3532 }, { "epoch": 0.6433057249476655, "grad_norm": 11.0, "learning_rate": 9.156586804701908e-06, "loss": 1.1571060419082642, "step": 3534 }, { "epoch": 0.6436697915718577, "grad_norm": 18.75, "learning_rate": 9.15564542879111e-06, "loss": 1.441100001335144, "step": 3536 }, { "epoch": 0.6440338581960499, "grad_norm": 13.6875, "learning_rate": 9.154703589802132e-06, "loss": 1.4943218231201172, "step": 3538 }, { "epoch": 0.644397924820242, "grad_norm": 6.875, "learning_rate": 9.153761287873189e-06, "loss": 1.366776466369629, "step": 3540 }, { "epoch": 0.6447619914444344, "grad_norm": 7.59375, "learning_rate": 9.152818523142557e-06, "loss": 1.526563286781311, "step": 3542 }, { "epoch": 0.6451260580686266, "grad_norm": 8.25, "learning_rate": 9.151875295748587e-06, "loss": 1.320901870727539, "step": 3544 }, { "epoch": 0.6454901246928187, "grad_norm": 16.125, "learning_rate": 9.150931605829688e-06, "loss": 1.2511060237884521, "step": 3546 }, { "epoch": 0.645854191317011, "grad_norm": 12.375, "learning_rate": 9.149987453524345e-06, "loss": 1.428252100944519, "step": 3548 }, { "epoch": 0.6462182579412032, "grad_norm": 8.5, "learning_rate": 9.149042838971111e-06, "loss": 1.5096672773361206, "step": 3550 }, { "epoch": 0.6465823245653954, "grad_norm": 20.5, "learning_rate": 9.1480977623086e-06, "loss": 1.6028022766113281, "step": 3552 }, { "epoch": 0.6469463911895877, "grad_norm": 5.21875, "learning_rate": 9.147152223675504e-06, "loss": 1.3338195085525513, "step": 3554 }, { "epoch": 0.6473104578137799, "grad_norm": 9.5, "learning_rate": 9.146206223210572e-06, "loss": 1.3280006647109985, "step": 3556 }, { "epoch": 0.6476745244379721, "grad_norm": 3.234375, "learning_rate": 9.145259761052625e-06, "loss": 1.1066222190856934, "step": 3558 }, { "epoch": 0.6480385910621643, "grad_norm": 25.125, "learning_rate": 9.144312837340557e-06, "loss": 1.0565403699874878, "step": 3560 }, { "epoch": 0.6484026576863566, "grad_norm": 12.4375, "learning_rate": 9.143365452213322e-06, "loss": 0.8064821362495422, "step": 3562 }, { "epoch": 0.6487667243105488, "grad_norm": 5.09375, "learning_rate": 9.142417605809945e-06, "loss": 1.0452728271484375, "step": 3564 }, { "epoch": 0.649130790934741, "grad_norm": 15.6875, "learning_rate": 9.141469298269522e-06, "loss": 1.308931827545166, "step": 3566 }, { "epoch": 0.6494948575589333, "grad_norm": 18.0, "learning_rate": 9.14052052973121e-06, "loss": 1.3293876647949219, "step": 3568 }, { "epoch": 0.6498589241831255, "grad_norm": 8.625, "learning_rate": 9.139571300334238e-06, "loss": 1.2970260381698608, "step": 3570 }, { "epoch": 0.6502229908073177, "grad_norm": 2.625, "learning_rate": 9.138621610217899e-06, "loss": 1.0420702695846558, "step": 3572 }, { "epoch": 0.65058705743151, "grad_norm": 8.375, "learning_rate": 9.13767145952156e-06, "loss": 0.9667816162109375, "step": 3574 }, { "epoch": 0.6509511240557022, "grad_norm": 16.625, "learning_rate": 9.13672084838465e-06, "loss": 1.4660228490829468, "step": 3576 }, { "epoch": 0.6513151906798944, "grad_norm": 30.625, "learning_rate": 9.135769776946666e-06, "loss": 1.392289400100708, "step": 3578 }, { "epoch": 0.6516792573040866, "grad_norm": 15.1875, "learning_rate": 9.134818245347176e-06, "loss": 1.5141706466674805, "step": 3580 }, { "epoch": 0.6520433239282789, "grad_norm": 14.625, "learning_rate": 9.133866253725813e-06, "loss": 1.7205846309661865, "step": 3582 }, { "epoch": 0.6524073905524711, "grad_norm": 10.3125, "learning_rate": 9.132913802222278e-06, "loss": 1.2909421920776367, "step": 3584 }, { "epoch": 0.6527714571766633, "grad_norm": 12.25, "learning_rate": 9.131960890976341e-06, "loss": 1.3672901391983032, "step": 3586 }, { "epoch": 0.6531355238008556, "grad_norm": 28.0, "learning_rate": 9.131007520127836e-06, "loss": 1.5591942071914673, "step": 3588 }, { "epoch": 0.6534995904250478, "grad_norm": 78.0, "learning_rate": 9.130053689816662e-06, "loss": 1.5478873252868652, "step": 3590 }, { "epoch": 0.65386365704924, "grad_norm": 7.46875, "learning_rate": 9.129099400182797e-06, "loss": 1.6789886951446533, "step": 3592 }, { "epoch": 0.6542277236734323, "grad_norm": 11.0625, "learning_rate": 9.128144651366277e-06, "loss": 1.4372708797454834, "step": 3594 }, { "epoch": 0.6545917902976245, "grad_norm": 11.3125, "learning_rate": 9.127189443507205e-06, "loss": 1.0014150142669678, "step": 3596 }, { "epoch": 0.6549558569218167, "grad_norm": 39.0, "learning_rate": 9.126233776745756e-06, "loss": 1.4076837301254272, "step": 3598 }, { "epoch": 0.6553199235460089, "grad_norm": 6.28125, "learning_rate": 9.125277651222168e-06, "loss": 1.1437819004058838, "step": 3600 }, { "epoch": 0.6556839901702012, "grad_norm": 11.25, "learning_rate": 9.124321067076753e-06, "loss": 1.3973109722137451, "step": 3602 }, { "epoch": 0.6560480567943934, "grad_norm": 11.0, "learning_rate": 9.123364024449883e-06, "loss": 1.3617955446243286, "step": 3604 }, { "epoch": 0.6564121234185856, "grad_norm": 23.0, "learning_rate": 9.122406523482e-06, "loss": 1.5899008512496948, "step": 3606 }, { "epoch": 0.6567761900427779, "grad_norm": 19.375, "learning_rate": 9.121448564313612e-06, "loss": 1.8046138286590576, "step": 3608 }, { "epoch": 0.6571402566669701, "grad_norm": 9.1875, "learning_rate": 9.120490147085299e-06, "loss": 1.3494240045547485, "step": 3610 }, { "epoch": 0.6575043232911623, "grad_norm": 7.5625, "learning_rate": 9.119531271937703e-06, "loss": 1.0726373195648193, "step": 3612 }, { "epoch": 0.6578683899153545, "grad_norm": 6.53125, "learning_rate": 9.118571939011535e-06, "loss": 0.8167750239372253, "step": 3614 }, { "epoch": 0.6582324565395468, "grad_norm": 10.1875, "learning_rate": 9.117612148447574e-06, "loss": 1.3734815120697021, "step": 3616 }, { "epoch": 0.658596523163739, "grad_norm": 13.875, "learning_rate": 9.116651900386665e-06, "loss": 1.6158146858215332, "step": 3618 }, { "epoch": 0.6589605897879312, "grad_norm": 21.875, "learning_rate": 9.115691194969719e-06, "loss": 1.5301512479782104, "step": 3620 }, { "epoch": 0.6593246564121235, "grad_norm": 5.1875, "learning_rate": 9.114730032337717e-06, "loss": 0.2065422236919403, "step": 3622 }, { "epoch": 0.6596887230363156, "grad_norm": 18.75, "learning_rate": 9.113768412631705e-06, "loss": 1.3237202167510986, "step": 3624 }, { "epoch": 0.6600527896605078, "grad_norm": 17.625, "learning_rate": 9.112806335992798e-06, "loss": 1.7446810007095337, "step": 3626 }, { "epoch": 0.6604168562847001, "grad_norm": 21.5, "learning_rate": 9.111843802562178e-06, "loss": 1.4303135871887207, "step": 3628 }, { "epoch": 0.6607809229088923, "grad_norm": 11.0, "learning_rate": 9.110880812481089e-06, "loss": 1.577548861503601, "step": 3630 }, { "epoch": 0.6611449895330845, "grad_norm": 11.8125, "learning_rate": 9.10991736589085e-06, "loss": 1.5449628829956055, "step": 3632 }, { "epoch": 0.6615090561572767, "grad_norm": 10.625, "learning_rate": 9.108953462932839e-06, "loss": 1.6511759757995605, "step": 3634 }, { "epoch": 0.661873122781469, "grad_norm": 18.625, "learning_rate": 9.107989103748508e-06, "loss": 1.8719981908798218, "step": 3636 }, { "epoch": 0.6622371894056612, "grad_norm": 18.75, "learning_rate": 9.107024288479371e-06, "loss": 1.4480392932891846, "step": 3638 }, { "epoch": 0.6626012560298534, "grad_norm": 21.375, "learning_rate": 9.106059017267013e-06, "loss": 1.9537347555160522, "step": 3640 }, { "epoch": 0.6629653226540457, "grad_norm": 17.75, "learning_rate": 9.10509329025308e-06, "loss": 1.4034967422485352, "step": 3642 }, { "epoch": 0.6633293892782379, "grad_norm": 20.75, "learning_rate": 9.10412710757929e-06, "loss": 1.2126206159591675, "step": 3644 }, { "epoch": 0.6636934559024301, "grad_norm": 9.625, "learning_rate": 9.10316046938743e-06, "loss": 0.8758245706558228, "step": 3646 }, { "epoch": 0.6640575225266224, "grad_norm": 14.0625, "learning_rate": 9.102193375819344e-06, "loss": 1.525219440460205, "step": 3648 }, { "epoch": 0.6644215891508146, "grad_norm": 18.25, "learning_rate": 9.101225827016956e-06, "loss": 2.0014102458953857, "step": 3650 }, { "epoch": 0.6647856557750068, "grad_norm": 9.25, "learning_rate": 9.100257823122241e-06, "loss": 1.4483447074890137, "step": 3652 }, { "epoch": 0.665149722399199, "grad_norm": 13.125, "learning_rate": 9.099289364277257e-06, "loss": 1.418277382850647, "step": 3654 }, { "epoch": 0.6655137890233913, "grad_norm": 12.9375, "learning_rate": 9.09832045062412e-06, "loss": 1.392439842224121, "step": 3656 }, { "epoch": 0.6658778556475835, "grad_norm": 16.5, "learning_rate": 9.097351082305012e-06, "loss": 1.9772604703903198, "step": 3658 }, { "epoch": 0.6662419222717757, "grad_norm": 11.3125, "learning_rate": 9.096381259462187e-06, "loss": 1.629359483718872, "step": 3660 }, { "epoch": 0.666605988895968, "grad_norm": 3.640625, "learning_rate": 9.095410982237957e-06, "loss": 0.9926884174346924, "step": 3662 }, { "epoch": 0.6669700555201602, "grad_norm": 10.125, "learning_rate": 9.094440250774712e-06, "loss": 1.2300328016281128, "step": 3664 }, { "epoch": 0.6673341221443524, "grad_norm": 9.875, "learning_rate": 9.093469065214903e-06, "loss": 1.2856507301330566, "step": 3666 }, { "epoch": 0.6676981887685447, "grad_norm": 30.0, "learning_rate": 9.092497425701043e-06, "loss": 1.8362234830856323, "step": 3668 }, { "epoch": 0.6680622553927369, "grad_norm": 15.0625, "learning_rate": 9.09152533237572e-06, "loss": 1.8722002506256104, "step": 3670 }, { "epoch": 0.6684263220169291, "grad_norm": 37.75, "learning_rate": 9.09055278538158e-06, "loss": 1.195242166519165, "step": 3672 }, { "epoch": 0.6687903886411213, "grad_norm": 21.0, "learning_rate": 9.089579784861348e-06, "loss": 1.375098466873169, "step": 3674 }, { "epoch": 0.6691544552653136, "grad_norm": 14.0, "learning_rate": 9.088606330957803e-06, "loss": 1.2577656507492065, "step": 3676 }, { "epoch": 0.6695185218895058, "grad_norm": 10.375, "learning_rate": 9.087632423813792e-06, "loss": 1.4363480806350708, "step": 3678 }, { "epoch": 0.669882588513698, "grad_norm": 23.375, "learning_rate": 9.08665806357224e-06, "loss": 1.4257678985595703, "step": 3680 }, { "epoch": 0.6702466551378903, "grad_norm": 6.5, "learning_rate": 9.085683250376124e-06, "loss": 1.5641443729400635, "step": 3682 }, { "epoch": 0.6706107217620825, "grad_norm": 23.0, "learning_rate": 9.084707984368496e-06, "loss": 1.6387417316436768, "step": 3684 }, { "epoch": 0.6709747883862747, "grad_norm": 36.5, "learning_rate": 9.083732265692475e-06, "loss": 2.193417549133301, "step": 3686 }, { "epoch": 0.6713388550104669, "grad_norm": 15.375, "learning_rate": 9.08275609449124e-06, "loss": 1.682459831237793, "step": 3688 }, { "epoch": 0.6717029216346592, "grad_norm": 27.375, "learning_rate": 9.08177947090804e-06, "loss": 1.7272855043411255, "step": 3690 }, { "epoch": 0.6720669882588514, "grad_norm": 42.5, "learning_rate": 9.080802395086194e-06, "loss": 0.8551642894744873, "step": 3692 }, { "epoch": 0.6724310548830436, "grad_norm": 9.8125, "learning_rate": 9.079824867169082e-06, "loss": 1.4094687700271606, "step": 3694 }, { "epoch": 0.6727951215072359, "grad_norm": 10.125, "learning_rate": 9.078846887300153e-06, "loss": 0.6505305171012878, "step": 3696 }, { "epoch": 0.673159188131428, "grad_norm": 18.0, "learning_rate": 9.077868455622918e-06, "loss": 1.4937613010406494, "step": 3698 }, { "epoch": 0.6735232547556202, "grad_norm": 12.125, "learning_rate": 9.076889572280961e-06, "loss": 1.5752208232879639, "step": 3700 }, { "epoch": 0.6738873213798126, "grad_norm": 17.5, "learning_rate": 9.07591023741793e-06, "loss": 1.1342289447784424, "step": 3702 }, { "epoch": 0.6742513880040047, "grad_norm": 12.125, "learning_rate": 9.074930451177538e-06, "loss": 0.5768145322799683, "step": 3704 }, { "epoch": 0.6746154546281969, "grad_norm": 41.0, "learning_rate": 9.073950213703561e-06, "loss": 1.5090428590774536, "step": 3706 }, { "epoch": 0.6749795212523891, "grad_norm": 12.25, "learning_rate": 9.072969525139849e-06, "loss": 1.757763385772705, "step": 3708 }, { "epoch": 0.6753435878765814, "grad_norm": 6.8125, "learning_rate": 9.071988385630316e-06, "loss": 1.5267927646636963, "step": 3710 }, { "epoch": 0.6757076545007736, "grad_norm": 16.75, "learning_rate": 9.071006795318933e-06, "loss": 1.0630428791046143, "step": 3712 }, { "epoch": 0.6760717211249658, "grad_norm": 11.875, "learning_rate": 9.07002475434975e-06, "loss": 1.157039761543274, "step": 3714 }, { "epoch": 0.6764357877491581, "grad_norm": 18.0, "learning_rate": 9.069042262866876e-06, "loss": 1.0162842273712158, "step": 3716 }, { "epoch": 0.6767998543733503, "grad_norm": 16.25, "learning_rate": 9.068059321014489e-06, "loss": 1.2372663021087646, "step": 3718 }, { "epoch": 0.6771639209975425, "grad_norm": 8.6875, "learning_rate": 9.067075928936829e-06, "loss": 1.5702767372131348, "step": 3720 }, { "epoch": 0.6775279876217348, "grad_norm": 16.875, "learning_rate": 9.066092086778205e-06, "loss": 2.2045841217041016, "step": 3722 }, { "epoch": 0.677892054245927, "grad_norm": 4.5625, "learning_rate": 9.065107794682994e-06, "loss": 1.0491530895233154, "step": 3724 }, { "epoch": 0.6782561208701192, "grad_norm": 6.25, "learning_rate": 9.064123052795636e-06, "loss": 1.4416320323944092, "step": 3726 }, { "epoch": 0.6786201874943114, "grad_norm": 12.5, "learning_rate": 9.063137861260639e-06, "loss": 1.3921982049942017, "step": 3728 }, { "epoch": 0.6789842541185037, "grad_norm": 14.125, "learning_rate": 9.062152220222572e-06, "loss": 1.5467690229415894, "step": 3730 }, { "epoch": 0.6793483207426959, "grad_norm": 8.0, "learning_rate": 9.06116612982608e-06, "loss": 1.4096386432647705, "step": 3732 }, { "epoch": 0.6797123873668881, "grad_norm": 6.40625, "learning_rate": 9.060179590215862e-06, "loss": 1.107706904411316, "step": 3734 }, { "epoch": 0.6800764539910804, "grad_norm": 8.8125, "learning_rate": 9.059192601536691e-06, "loss": 1.1766926050186157, "step": 3736 }, { "epoch": 0.6804405206152726, "grad_norm": 4.65625, "learning_rate": 9.058205163933404e-06, "loss": 0.695428192615509, "step": 3738 }, { "epoch": 0.6808045872394648, "grad_norm": 20.5, "learning_rate": 9.057217277550903e-06, "loss": 1.3012020587921143, "step": 3740 }, { "epoch": 0.6811686538636571, "grad_norm": 22.625, "learning_rate": 9.056228942534158e-06, "loss": 0.7615830302238464, "step": 3742 }, { "epoch": 0.6815327204878493, "grad_norm": 17.125, "learning_rate": 9.055240159028198e-06, "loss": 1.4642246961593628, "step": 3744 }, { "epoch": 0.6818967871120415, "grad_norm": 7.34375, "learning_rate": 9.05425092717813e-06, "loss": 1.075600266456604, "step": 3746 }, { "epoch": 0.6822608537362337, "grad_norm": 16.625, "learning_rate": 9.053261247129113e-06, "loss": 1.5341835021972656, "step": 3748 }, { "epoch": 0.682624920360426, "grad_norm": 12.125, "learning_rate": 9.052271119026383e-06, "loss": 1.0074820518493652, "step": 3750 }, { "epoch": 0.6829889869846182, "grad_norm": 11.9375, "learning_rate": 9.051280543015238e-06, "loss": 1.3445367813110352, "step": 3752 }, { "epoch": 0.6833530536088104, "grad_norm": 10.625, "learning_rate": 9.050289519241036e-06, "loss": 1.3881233930587769, "step": 3754 }, { "epoch": 0.6837171202330027, "grad_norm": 6.9375, "learning_rate": 9.04929804784921e-06, "loss": 1.3323017358779907, "step": 3756 }, { "epoch": 0.6840811868571949, "grad_norm": 10.6875, "learning_rate": 9.048306128985253e-06, "loss": 1.4439759254455566, "step": 3758 }, { "epoch": 0.6844452534813871, "grad_norm": 50.25, "learning_rate": 9.047313762794727e-06, "loss": 1.2597166299819946, "step": 3760 }, { "epoch": 0.6848093201055793, "grad_norm": 6.4375, "learning_rate": 9.046320949423254e-06, "loss": 1.0838193893432617, "step": 3762 }, { "epoch": 0.6851733867297716, "grad_norm": 11.6875, "learning_rate": 9.045327689016527e-06, "loss": 1.339174747467041, "step": 3764 }, { "epoch": 0.6855374533539638, "grad_norm": 11.125, "learning_rate": 9.044333981720306e-06, "loss": 1.1985255479812622, "step": 3766 }, { "epoch": 0.685901519978156, "grad_norm": 15.75, "learning_rate": 9.043339827680408e-06, "loss": 1.1497808694839478, "step": 3768 }, { "epoch": 0.6862655866023483, "grad_norm": 11.3125, "learning_rate": 9.042345227042726e-06, "loss": 1.723909616470337, "step": 3770 }, { "epoch": 0.6866296532265405, "grad_norm": 9.8125, "learning_rate": 9.04135017995321e-06, "loss": 1.6765867471694946, "step": 3772 }, { "epoch": 0.6869937198507327, "grad_norm": 6.625, "learning_rate": 9.040354686557881e-06, "loss": 1.2211887836456299, "step": 3774 }, { "epoch": 0.687357786474925, "grad_norm": 7.9375, "learning_rate": 9.039358747002824e-06, "loss": 0.9901754856109619, "step": 3776 }, { "epoch": 0.6877218530991172, "grad_norm": 30.75, "learning_rate": 9.038362361434186e-06, "loss": 1.4585082530975342, "step": 3778 }, { "epoch": 0.6880859197233093, "grad_norm": 14.9375, "learning_rate": 9.037365529998185e-06, "loss": 1.7494745254516602, "step": 3780 }, { "epoch": 0.6884499863475015, "grad_norm": 12.625, "learning_rate": 9.036368252841106e-06, "loss": 1.340724229812622, "step": 3782 }, { "epoch": 0.6888140529716938, "grad_norm": 7.03125, "learning_rate": 9.035370530109288e-06, "loss": 1.5212626457214355, "step": 3784 }, { "epoch": 0.689178119595886, "grad_norm": 8.1875, "learning_rate": 9.034372361949146e-06, "loss": 1.4433444738388062, "step": 3786 }, { "epoch": 0.6895421862200782, "grad_norm": 12.875, "learning_rate": 9.033373748507157e-06, "loss": 1.622555136680603, "step": 3788 }, { "epoch": 0.6899062528442705, "grad_norm": 10.0625, "learning_rate": 9.032374689929864e-06, "loss": 1.35758376121521, "step": 3790 }, { "epoch": 0.6902703194684627, "grad_norm": 6.625, "learning_rate": 9.031375186363875e-06, "loss": 1.3290655612945557, "step": 3792 }, { "epoch": 0.6906343860926549, "grad_norm": 7.09375, "learning_rate": 9.030375237955862e-06, "loss": 1.209246277809143, "step": 3794 }, { "epoch": 0.6909984527168472, "grad_norm": 17.125, "learning_rate": 9.029374844852565e-06, "loss": 1.3822636604309082, "step": 3796 }, { "epoch": 0.6913625193410394, "grad_norm": 4.84375, "learning_rate": 9.028374007200785e-06, "loss": 1.1453665494918823, "step": 3798 }, { "epoch": 0.6917265859652316, "grad_norm": 11.0625, "learning_rate": 9.027372725147392e-06, "loss": 1.2516539096832275, "step": 3800 }, { "epoch": 0.6920906525894238, "grad_norm": 6.15625, "learning_rate": 9.026370998839322e-06, "loss": 1.0178422927856445, "step": 3802 }, { "epoch": 0.6924547192136161, "grad_norm": 20.375, "learning_rate": 9.025368828423573e-06, "loss": 1.292572259902954, "step": 3804 }, { "epoch": 0.6928187858378083, "grad_norm": 23.25, "learning_rate": 9.024366214047206e-06, "loss": 1.7667193412780762, "step": 3806 }, { "epoch": 0.6931828524620005, "grad_norm": 23.875, "learning_rate": 9.023363155857357e-06, "loss": 1.1921138763427734, "step": 3808 }, { "epoch": 0.6935469190861928, "grad_norm": 19.875, "learning_rate": 9.022359654001216e-06, "loss": 0.8805833458900452, "step": 3810 }, { "epoch": 0.693910985710385, "grad_norm": 13.25, "learning_rate": 9.021355708626046e-06, "loss": 1.5749866962432861, "step": 3812 }, { "epoch": 0.6942750523345772, "grad_norm": 4.3125, "learning_rate": 9.020351319879169e-06, "loss": 1.334184169769287, "step": 3814 }, { "epoch": 0.6946391189587695, "grad_norm": 6.8125, "learning_rate": 9.019346487907977e-06, "loss": 1.4905041456222534, "step": 3816 }, { "epoch": 0.6950031855829617, "grad_norm": 22.0, "learning_rate": 9.018341212859922e-06, "loss": 1.3692952394485474, "step": 3818 }, { "epoch": 0.6953672522071539, "grad_norm": 16.25, "learning_rate": 9.017335494882528e-06, "loss": 1.5577635765075684, "step": 3820 }, { "epoch": 0.6957313188313461, "grad_norm": 20.375, "learning_rate": 9.016329334123377e-06, "loss": 1.3878321647644043, "step": 3822 }, { "epoch": 0.6960953854555384, "grad_norm": 22.25, "learning_rate": 9.015322730730123e-06, "loss": 1.6337788105010986, "step": 3824 }, { "epoch": 0.6964594520797306, "grad_norm": 13.5, "learning_rate": 9.014315684850477e-06, "loss": 1.3739981651306152, "step": 3826 }, { "epoch": 0.6968235187039228, "grad_norm": 9.875, "learning_rate": 9.013308196632218e-06, "loss": 1.1458913087844849, "step": 3828 }, { "epoch": 0.6971875853281151, "grad_norm": 44.0, "learning_rate": 9.012300266223196e-06, "loss": 0.8456411361694336, "step": 3830 }, { "epoch": 0.6975516519523073, "grad_norm": 11.625, "learning_rate": 9.011291893771317e-06, "loss": 1.2588180303573608, "step": 3832 }, { "epoch": 0.6979157185764995, "grad_norm": 8.9375, "learning_rate": 9.010283079424556e-06, "loss": 1.327724814414978, "step": 3834 }, { "epoch": 0.6982797852006917, "grad_norm": 12.625, "learning_rate": 9.009273823330951e-06, "loss": 1.160435438156128, "step": 3836 }, { "epoch": 0.698643851824884, "grad_norm": 23.875, "learning_rate": 9.008264125638611e-06, "loss": 0.8293675184249878, "step": 3838 }, { "epoch": 0.6990079184490762, "grad_norm": 193.0, "learning_rate": 9.007253986495701e-06, "loss": 0.7560205459594727, "step": 3840 }, { "epoch": 0.6993719850732684, "grad_norm": 6.96875, "learning_rate": 9.006243406050454e-06, "loss": 1.1688603162765503, "step": 3842 }, { "epoch": 0.6997360516974607, "grad_norm": 12.0, "learning_rate": 9.005232384451172e-06, "loss": 1.6441826820373535, "step": 3844 }, { "epoch": 0.7001001183216529, "grad_norm": 10.6875, "learning_rate": 9.004220921846217e-06, "loss": 1.4990862607955933, "step": 3846 }, { "epoch": 0.7004641849458451, "grad_norm": 8.8125, "learning_rate": 9.003209018384017e-06, "loss": 1.2281641960144043, "step": 3848 }, { "epoch": 0.7008282515700374, "grad_norm": 9.0, "learning_rate": 9.002196674213065e-06, "loss": 1.1953091621398926, "step": 3850 }, { "epoch": 0.7011923181942296, "grad_norm": 7.375, "learning_rate": 9.001183889481915e-06, "loss": 1.1360362768173218, "step": 3852 }, { "epoch": 0.7015563848184218, "grad_norm": 6.1875, "learning_rate": 9.000170664339191e-06, "loss": 1.31300950050354, "step": 3854 }, { "epoch": 0.701920451442614, "grad_norm": 18.125, "learning_rate": 8.999156998933585e-06, "loss": 1.570727825164795, "step": 3856 }, { "epoch": 0.7022845180668063, "grad_norm": 17.5, "learning_rate": 8.998142893413842e-06, "loss": 1.5525283813476562, "step": 3858 }, { "epoch": 0.7026485846909984, "grad_norm": 25.75, "learning_rate": 8.99712834792878e-06, "loss": 1.5696039199829102, "step": 3860 }, { "epoch": 0.7030126513151906, "grad_norm": 7.9375, "learning_rate": 8.996113362627279e-06, "loss": 1.2184580564498901, "step": 3862 }, { "epoch": 0.703376717939383, "grad_norm": 7.0625, "learning_rate": 8.995097937658286e-06, "loss": 1.4596872329711914, "step": 3864 }, { "epoch": 0.7037407845635751, "grad_norm": 12.8125, "learning_rate": 8.994082073170807e-06, "loss": 1.5328449010849, "step": 3866 }, { "epoch": 0.7041048511877673, "grad_norm": 9.75, "learning_rate": 8.993065769313915e-06, "loss": 1.5280065536499023, "step": 3868 }, { "epoch": 0.7044689178119596, "grad_norm": 17.125, "learning_rate": 8.992049026236756e-06, "loss": 1.651501178741455, "step": 3870 }, { "epoch": 0.7048329844361518, "grad_norm": 5.40625, "learning_rate": 8.991031844088528e-06, "loss": 1.0046759843826294, "step": 3872 }, { "epoch": 0.705197051060344, "grad_norm": 11.3125, "learning_rate": 8.990014223018495e-06, "loss": 1.5836578607559204, "step": 3874 }, { "epoch": 0.7055611176845362, "grad_norm": 16.0, "learning_rate": 8.988996163175994e-06, "loss": 1.4356216192245483, "step": 3876 }, { "epoch": 0.7059251843087285, "grad_norm": 8.25, "learning_rate": 8.98797766471042e-06, "loss": 1.3767637014389038, "step": 3878 }, { "epoch": 0.7062892509329207, "grad_norm": 14.875, "learning_rate": 8.98695872777123e-06, "loss": 1.3272817134857178, "step": 3880 }, { "epoch": 0.7066533175571129, "grad_norm": 7.875, "learning_rate": 8.985939352507955e-06, "loss": 1.3519479036331177, "step": 3882 }, { "epoch": 0.7070173841813052, "grad_norm": 10.875, "learning_rate": 8.98491953907018e-06, "loss": 1.2547365427017212, "step": 3884 }, { "epoch": 0.7073814508054974, "grad_norm": 11.125, "learning_rate": 8.98389928760756e-06, "loss": 1.3547316789627075, "step": 3886 }, { "epoch": 0.7077455174296896, "grad_norm": 17.125, "learning_rate": 8.982878598269811e-06, "loss": 1.4341374635696411, "step": 3888 }, { "epoch": 0.7081095840538819, "grad_norm": 13.4375, "learning_rate": 8.981857471206716e-06, "loss": 1.963982343673706, "step": 3890 }, { "epoch": 0.7084736506780741, "grad_norm": 15.3125, "learning_rate": 8.980835906568125e-06, "loss": 1.3414711952209473, "step": 3892 }, { "epoch": 0.7088377173022663, "grad_norm": 15.5625, "learning_rate": 8.97981390450394e-06, "loss": 1.1575798988342285, "step": 3894 }, { "epoch": 0.7092017839264585, "grad_norm": 9.75, "learning_rate": 8.978791465164145e-06, "loss": 1.2165459394454956, "step": 3896 }, { "epoch": 0.7095658505506508, "grad_norm": 13.0, "learning_rate": 8.977768588698772e-06, "loss": 1.3495299816131592, "step": 3898 }, { "epoch": 0.709929917174843, "grad_norm": 22.875, "learning_rate": 8.976745275257925e-06, "loss": 1.4521515369415283, "step": 3900 }, { "epoch": 0.7102939837990352, "grad_norm": 15.5625, "learning_rate": 8.975721524991777e-06, "loss": 1.0500729084014893, "step": 3902 }, { "epoch": 0.7106580504232275, "grad_norm": 9.6875, "learning_rate": 8.97469733805055e-06, "loss": 0.7728732824325562, "step": 3904 }, { "epoch": 0.7110221170474197, "grad_norm": 14.25, "learning_rate": 8.973672714584547e-06, "loss": 1.131068468093872, "step": 3906 }, { "epoch": 0.7113861836716119, "grad_norm": 10.9375, "learning_rate": 8.972647654744125e-06, "loss": 1.590749740600586, "step": 3908 }, { "epoch": 0.7117502502958041, "grad_norm": 8.3125, "learning_rate": 8.971622158679704e-06, "loss": 1.2511940002441406, "step": 3910 }, { "epoch": 0.7121143169199964, "grad_norm": 8.1875, "learning_rate": 8.970596226541775e-06, "loss": 1.4950616359710693, "step": 3912 }, { "epoch": 0.7124783835441886, "grad_norm": 11.5625, "learning_rate": 8.96956985848089e-06, "loss": 1.2371429204940796, "step": 3914 }, { "epoch": 0.7128424501683808, "grad_norm": 16.375, "learning_rate": 8.968543054647662e-06, "loss": 1.5337557792663574, "step": 3916 }, { "epoch": 0.7132065167925731, "grad_norm": 10.5625, "learning_rate": 8.967515815192772e-06, "loss": 1.0237574577331543, "step": 3918 }, { "epoch": 0.7135705834167653, "grad_norm": 11.875, "learning_rate": 8.96648814026696e-06, "loss": 1.3167476654052734, "step": 3920 }, { "epoch": 0.7139346500409575, "grad_norm": 40.5, "learning_rate": 8.965460030021038e-06, "loss": 0.9302914142608643, "step": 3922 }, { "epoch": 0.7142987166651498, "grad_norm": 16.25, "learning_rate": 8.964431484605874e-06, "loss": 1.4194064140319824, "step": 3924 }, { "epoch": 0.714662783289342, "grad_norm": 12.0, "learning_rate": 8.963402504172403e-06, "loss": 1.2571460008621216, "step": 3926 }, { "epoch": 0.7150268499135342, "grad_norm": 10.5, "learning_rate": 8.962373088871624e-06, "loss": 1.4063042402267456, "step": 3928 }, { "epoch": 0.7153909165377264, "grad_norm": 14.5625, "learning_rate": 8.9613432388546e-06, "loss": 1.3174469470977783, "step": 3930 }, { "epoch": 0.7157549831619187, "grad_norm": 19.0, "learning_rate": 8.960312954272457e-06, "loss": 0.9649209976196289, "step": 3932 }, { "epoch": 0.7161190497861109, "grad_norm": 13.4375, "learning_rate": 8.959282235276386e-06, "loss": 1.6821236610412598, "step": 3934 }, { "epoch": 0.716483116410303, "grad_norm": 12.5, "learning_rate": 8.958251082017637e-06, "loss": 1.40818190574646, "step": 3936 }, { "epoch": 0.7168471830344953, "grad_norm": 55.75, "learning_rate": 8.957219494647534e-06, "loss": 1.3784319162368774, "step": 3938 }, { "epoch": 0.7172112496586875, "grad_norm": 5.15625, "learning_rate": 8.95618747331745e-06, "loss": 0.9126887321472168, "step": 3940 }, { "epoch": 0.7175753162828797, "grad_norm": 26.125, "learning_rate": 8.955155018178839e-06, "loss": 0.997277557849884, "step": 3942 }, { "epoch": 0.717939382907072, "grad_norm": 16.875, "learning_rate": 8.954122129383205e-06, "loss": 0.6627995371818542, "step": 3944 }, { "epoch": 0.7183034495312642, "grad_norm": 10.0, "learning_rate": 8.95308880708212e-06, "loss": 1.61794114112854, "step": 3946 }, { "epoch": 0.7186675161554564, "grad_norm": 13.0, "learning_rate": 8.95205505142722e-06, "loss": 1.6856334209442139, "step": 3948 }, { "epoch": 0.7190315827796486, "grad_norm": 10.25, "learning_rate": 8.951020862570204e-06, "loss": 1.308815836906433, "step": 3950 }, { "epoch": 0.7193956494038409, "grad_norm": 8.1875, "learning_rate": 8.949986240662835e-06, "loss": 1.3072153329849243, "step": 3952 }, { "epoch": 0.7197597160280331, "grad_norm": 10.0625, "learning_rate": 8.948951185856943e-06, "loss": 1.3817613124847412, "step": 3954 }, { "epoch": 0.7201237826522253, "grad_norm": 14.5, "learning_rate": 8.947915698304415e-06, "loss": 1.75496506690979, "step": 3956 }, { "epoch": 0.7204878492764176, "grad_norm": 4.46875, "learning_rate": 8.946879778157203e-06, "loss": 1.1558823585510254, "step": 3958 }, { "epoch": 0.7208519159006098, "grad_norm": 9.0625, "learning_rate": 8.94584342556733e-06, "loss": 1.1663873195648193, "step": 3960 }, { "epoch": 0.721215982524802, "grad_norm": 19.75, "learning_rate": 8.944806640686869e-06, "loss": 1.1661570072174072, "step": 3962 }, { "epoch": 0.7215800491489943, "grad_norm": 14.9375, "learning_rate": 8.94376942366797e-06, "loss": 1.5698572397232056, "step": 3964 }, { "epoch": 0.7219441157731865, "grad_norm": 17.875, "learning_rate": 8.942731774662837e-06, "loss": 1.1468448638916016, "step": 3966 }, { "epoch": 0.7223081823973787, "grad_norm": 18.0, "learning_rate": 8.94169369382374e-06, "loss": 1.856109619140625, "step": 3968 }, { "epoch": 0.7226722490215709, "grad_norm": 5.46875, "learning_rate": 8.940655181303019e-06, "loss": 1.5610462427139282, "step": 3970 }, { "epoch": 0.7230363156457632, "grad_norm": 23.125, "learning_rate": 8.939616237253068e-06, "loss": 1.1868939399719238, "step": 3972 }, { "epoch": 0.7234003822699554, "grad_norm": 12.0625, "learning_rate": 8.938576861826344e-06, "loss": 0.9291361570358276, "step": 3974 }, { "epoch": 0.7237644488941476, "grad_norm": 5.375, "learning_rate": 8.937537055175375e-06, "loss": 1.2235300540924072, "step": 3976 }, { "epoch": 0.7241285155183399, "grad_norm": 16.75, "learning_rate": 8.936496817452752e-06, "loss": 1.2607587575912476, "step": 3978 }, { "epoch": 0.7244925821425321, "grad_norm": 16.25, "learning_rate": 8.935456148811116e-06, "loss": 1.295597791671753, "step": 3980 }, { "epoch": 0.7248566487667243, "grad_norm": 14.3125, "learning_rate": 8.93441504940319e-06, "loss": 1.4838569164276123, "step": 3982 }, { "epoch": 0.7252207153909166, "grad_norm": 64.5, "learning_rate": 8.933373519381748e-06, "loss": 1.0721181631088257, "step": 3984 }, { "epoch": 0.7255847820151088, "grad_norm": 15.75, "learning_rate": 8.932331558899627e-06, "loss": 1.4383556842803955, "step": 3986 }, { "epoch": 0.725948848639301, "grad_norm": 7.125, "learning_rate": 8.931289168109737e-06, "loss": 1.3305082321166992, "step": 3988 }, { "epoch": 0.7263129152634932, "grad_norm": 12.1875, "learning_rate": 8.930246347165038e-06, "loss": 1.8584890365600586, "step": 3990 }, { "epoch": 0.7266769818876855, "grad_norm": 11.625, "learning_rate": 8.929203096218561e-06, "loss": 1.5628858804702759, "step": 3992 }, { "epoch": 0.7270410485118777, "grad_norm": 22.0, "learning_rate": 8.928159415423406e-06, "loss": 1.3630568981170654, "step": 3994 }, { "epoch": 0.7274051151360699, "grad_norm": 30.5, "learning_rate": 8.92711530493272e-06, "loss": 1.333431363105774, "step": 3996 }, { "epoch": 0.7277691817602622, "grad_norm": 22.625, "learning_rate": 8.926070764899729e-06, "loss": 1.5804121494293213, "step": 3998 }, { "epoch": 0.7281332483844544, "grad_norm": 16.875, "learning_rate": 8.92502579547771e-06, "loss": 1.5849170684814453, "step": 4000 }, { "epoch": 0.7284973150086466, "grad_norm": 10.125, "learning_rate": 8.923980396820006e-06, "loss": 1.6163780689239502, "step": 4002 }, { "epoch": 0.7288613816328388, "grad_norm": 14.9375, "learning_rate": 8.922934569080033e-06, "loss": 1.7660216093063354, "step": 4004 }, { "epoch": 0.7292254482570311, "grad_norm": 13.4375, "learning_rate": 8.921888312411256e-06, "loss": 1.4897310733795166, "step": 4006 }, { "epoch": 0.7295895148812233, "grad_norm": 7.46875, "learning_rate": 8.92084162696721e-06, "loss": 1.14540433883667, "step": 4008 }, { "epoch": 0.7299535815054154, "grad_norm": 8.5, "learning_rate": 8.919794512901495e-06, "loss": 1.2850167751312256, "step": 4010 }, { "epoch": 0.7303176481296078, "grad_norm": 13.8125, "learning_rate": 8.918746970367764e-06, "loss": 1.3529331684112549, "step": 4012 }, { "epoch": 0.7306817147538, "grad_norm": 16.625, "learning_rate": 8.917698999519746e-06, "loss": 1.7041479349136353, "step": 4014 }, { "epoch": 0.7310457813779921, "grad_norm": 15.8125, "learning_rate": 8.916650600511225e-06, "loss": 1.6238069534301758, "step": 4016 }, { "epoch": 0.7314098480021844, "grad_norm": 24.125, "learning_rate": 8.915601773496048e-06, "loss": 0.6529870629310608, "step": 4018 }, { "epoch": 0.7317739146263766, "grad_norm": 43.75, "learning_rate": 8.914552518628126e-06, "loss": 0.9866514205932617, "step": 4020 }, { "epoch": 0.7321379812505688, "grad_norm": 20.75, "learning_rate": 8.913502836061434e-06, "loss": 1.5501985549926758, "step": 4022 }, { "epoch": 0.732502047874761, "grad_norm": 12.125, "learning_rate": 8.912452725950008e-06, "loss": 1.9918923377990723, "step": 4024 }, { "epoch": 0.7328661144989533, "grad_norm": 19.625, "learning_rate": 8.911402188447946e-06, "loss": 1.2583119869232178, "step": 4026 }, { "epoch": 0.7332301811231455, "grad_norm": 9.0, "learning_rate": 8.910351223709416e-06, "loss": 1.0755724906921387, "step": 4028 }, { "epoch": 0.7335942477473377, "grad_norm": 18.5, "learning_rate": 8.909299831888634e-06, "loss": 1.4027138948440552, "step": 4030 }, { "epoch": 0.73395831437153, "grad_norm": 11.375, "learning_rate": 8.908248013139895e-06, "loss": 0.8530186414718628, "step": 4032 }, { "epoch": 0.7343223809957222, "grad_norm": 8.25, "learning_rate": 8.907195767617545e-06, "loss": 1.2342077493667603, "step": 4034 }, { "epoch": 0.7346864476199144, "grad_norm": 5.03125, "learning_rate": 8.906143095475999e-06, "loss": 0.518145740032196, "step": 4036 }, { "epoch": 0.7350505142441067, "grad_norm": 11.4375, "learning_rate": 8.905089996869729e-06, "loss": 1.4473772048950195, "step": 4038 }, { "epoch": 0.7354145808682989, "grad_norm": 16.0, "learning_rate": 8.904036471953277e-06, "loss": 1.4251716136932373, "step": 4040 }, { "epoch": 0.7357786474924911, "grad_norm": 8.875, "learning_rate": 8.90298252088124e-06, "loss": 1.3303751945495605, "step": 4042 }, { "epoch": 0.7361427141166833, "grad_norm": 9.5625, "learning_rate": 8.901928143808285e-06, "loss": 1.4375848770141602, "step": 4044 }, { "epoch": 0.7365067807408756, "grad_norm": 32.75, "learning_rate": 8.900873340889136e-06, "loss": 1.4494879245758057, "step": 4046 }, { "epoch": 0.7368708473650678, "grad_norm": 24.25, "learning_rate": 8.899818112278578e-06, "loss": 1.4735255241394043, "step": 4048 }, { "epoch": 0.73723491398926, "grad_norm": 20.625, "learning_rate": 8.898762458131467e-06, "loss": 1.40456223487854, "step": 4050 }, { "epoch": 0.7375989806134523, "grad_norm": 7.53125, "learning_rate": 8.897706378602708e-06, "loss": 1.2281516790390015, "step": 4052 }, { "epoch": 0.7379630472376445, "grad_norm": 15.375, "learning_rate": 8.896649873847286e-06, "loss": 1.433131456375122, "step": 4054 }, { "epoch": 0.7383271138618367, "grad_norm": 27.75, "learning_rate": 8.895592944020232e-06, "loss": 1.968809962272644, "step": 4056 }, { "epoch": 0.738691180486029, "grad_norm": 24.375, "learning_rate": 8.894535589276649e-06, "loss": 1.0219695568084717, "step": 4058 }, { "epoch": 0.7390552471102212, "grad_norm": 15.75, "learning_rate": 8.893477809771698e-06, "loss": 1.2095305919647217, "step": 4060 }, { "epoch": 0.7394193137344134, "grad_norm": 12.125, "learning_rate": 8.892419605660606e-06, "loss": 1.8842158317565918, "step": 4062 }, { "epoch": 0.7397833803586056, "grad_norm": 10.9375, "learning_rate": 8.891360977098658e-06, "loss": 1.4887069463729858, "step": 4064 }, { "epoch": 0.7401474469827979, "grad_norm": 7.03125, "learning_rate": 8.890301924241203e-06, "loss": 1.483014464378357, "step": 4066 }, { "epoch": 0.7405115136069901, "grad_norm": 17.0, "learning_rate": 8.889242447243655e-06, "loss": 1.4747929573059082, "step": 4068 }, { "epoch": 0.7408755802311823, "grad_norm": 7.8125, "learning_rate": 8.888182546261488e-06, "loss": 1.4071441888809204, "step": 4070 }, { "epoch": 0.7412396468553746, "grad_norm": 19.125, "learning_rate": 8.887122221450235e-06, "loss": 1.1633793115615845, "step": 4072 }, { "epoch": 0.7416037134795668, "grad_norm": 16.0, "learning_rate": 8.8860614729655e-06, "loss": 1.3279290199279785, "step": 4074 }, { "epoch": 0.741967780103759, "grad_norm": 13.9375, "learning_rate": 8.885000300962936e-06, "loss": 1.4762736558914185, "step": 4076 }, { "epoch": 0.7423318467279512, "grad_norm": 12.1875, "learning_rate": 8.883938705598271e-06, "loss": 1.5852270126342773, "step": 4078 }, { "epoch": 0.7426959133521435, "grad_norm": 13.3125, "learning_rate": 8.882876687027289e-06, "loss": 1.5529594421386719, "step": 4080 }, { "epoch": 0.7430599799763357, "grad_norm": 29.75, "learning_rate": 8.881814245405838e-06, "loss": 1.4649889469146729, "step": 4082 }, { "epoch": 0.7434240466005279, "grad_norm": 5.6875, "learning_rate": 8.880751380889822e-06, "loss": 0.5284292697906494, "step": 4084 }, { "epoch": 0.7437881132247202, "grad_norm": 13.75, "learning_rate": 8.879688093635218e-06, "loss": 1.3982738256454468, "step": 4086 }, { "epoch": 0.7441521798489124, "grad_norm": 13.9375, "learning_rate": 8.878624383798056e-06, "loss": 1.694324254989624, "step": 4088 }, { "epoch": 0.7445162464731045, "grad_norm": 5.0, "learning_rate": 8.877560251534431e-06, "loss": 1.3337924480438232, "step": 4090 }, { "epoch": 0.7448803130972969, "grad_norm": 16.25, "learning_rate": 8.876495697000502e-06, "loss": 1.4946918487548828, "step": 4092 }, { "epoch": 0.745244379721489, "grad_norm": 9.5625, "learning_rate": 8.875430720352487e-06, "loss": 1.9913033246994019, "step": 4094 }, { "epoch": 0.7456084463456812, "grad_norm": 20.375, "learning_rate": 8.874365321746668e-06, "loss": 1.2863436937332153, "step": 4096 }, { "epoch": 0.7459725129698734, "grad_norm": 14.6875, "learning_rate": 8.873299501339383e-06, "loss": 1.560056209564209, "step": 4098 }, { "epoch": 0.7463365795940657, "grad_norm": 9.1875, "learning_rate": 8.872233259287044e-06, "loss": 1.3569194078445435, "step": 4100 }, { "epoch": 0.7467006462182579, "grad_norm": 12.375, "learning_rate": 8.871166595746113e-06, "loss": 1.429707407951355, "step": 4102 }, { "epoch": 0.7470647128424501, "grad_norm": 16.0, "learning_rate": 8.87009951087312e-06, "loss": 1.4371904134750366, "step": 4104 }, { "epoch": 0.7474287794666424, "grad_norm": 8.5625, "learning_rate": 8.869032004824656e-06, "loss": 1.375626564025879, "step": 4106 }, { "epoch": 0.7477928460908346, "grad_norm": 4.6875, "learning_rate": 8.867964077757372e-06, "loss": 1.330753207206726, "step": 4108 }, { "epoch": 0.7481569127150268, "grad_norm": 5.9375, "learning_rate": 8.866895729827983e-06, "loss": 0.913055956363678, "step": 4110 }, { "epoch": 0.7485209793392191, "grad_norm": 36.0, "learning_rate": 8.865826961193263e-06, "loss": 1.1803151369094849, "step": 4112 }, { "epoch": 0.7488850459634113, "grad_norm": 10.1875, "learning_rate": 8.86475777201005e-06, "loss": 0.8327276110649109, "step": 4114 }, { "epoch": 0.7492491125876035, "grad_norm": 11.125, "learning_rate": 8.863688162435244e-06, "loss": 1.4408912658691406, "step": 4116 }, { "epoch": 0.7496131792117957, "grad_norm": 23.0, "learning_rate": 8.862618132625806e-06, "loss": 1.602658987045288, "step": 4118 }, { "epoch": 0.749977245835988, "grad_norm": 82.0, "learning_rate": 8.86154768273876e-06, "loss": 1.4650168418884277, "step": 4120 }, { "epoch": 0.7503413124601802, "grad_norm": 8.1875, "learning_rate": 8.860476812931188e-06, "loss": 1.435798168182373, "step": 4122 }, { "epoch": 0.7507053790843724, "grad_norm": 9.3125, "learning_rate": 8.859405523360234e-06, "loss": 1.2188894748687744, "step": 4124 }, { "epoch": 0.7510694457085647, "grad_norm": 17.25, "learning_rate": 8.858333814183109e-06, "loss": 1.763692021369934, "step": 4126 }, { "epoch": 0.7514335123327569, "grad_norm": 13.75, "learning_rate": 8.857261685557079e-06, "loss": 1.8446099758148193, "step": 4128 }, { "epoch": 0.7517975789569491, "grad_norm": 23.875, "learning_rate": 8.85618913763948e-06, "loss": 1.2607200145721436, "step": 4130 }, { "epoch": 0.7521616455811414, "grad_norm": 14.0, "learning_rate": 8.855116170587697e-06, "loss": 0.876882016658783, "step": 4132 }, { "epoch": 0.7525257122053336, "grad_norm": 6.0625, "learning_rate": 8.854042784559187e-06, "loss": 1.0581748485565186, "step": 4134 }, { "epoch": 0.7528897788295258, "grad_norm": 24.0, "learning_rate": 8.852968979711465e-06, "loss": 1.3951890468597412, "step": 4136 }, { "epoch": 0.753253845453718, "grad_norm": 8.125, "learning_rate": 8.851894756202109e-06, "loss": 1.4047353267669678, "step": 4138 }, { "epoch": 0.7536179120779103, "grad_norm": 9.0, "learning_rate": 8.850820114188754e-06, "loss": 1.1785528659820557, "step": 4140 }, { "epoch": 0.7539819787021025, "grad_norm": 21.875, "learning_rate": 8.8497450538291e-06, "loss": 1.7320830821990967, "step": 4142 }, { "epoch": 0.7543460453262947, "grad_norm": 9.6875, "learning_rate": 8.848669575280907e-06, "loss": 1.4284143447875977, "step": 4144 }, { "epoch": 0.754710111950487, "grad_norm": 9.5, "learning_rate": 8.847593678702002e-06, "loss": 1.2777068614959717, "step": 4146 }, { "epoch": 0.7550741785746792, "grad_norm": 18.375, "learning_rate": 8.846517364250265e-06, "loss": 1.6407257318496704, "step": 4148 }, { "epoch": 0.7554382451988714, "grad_norm": 8.375, "learning_rate": 8.845440632083637e-06, "loss": 0.8985050320625305, "step": 4150 }, { "epoch": 0.7558023118230636, "grad_norm": 39.25, "learning_rate": 8.84436348236013e-06, "loss": 1.2232482433319092, "step": 4152 }, { "epoch": 0.7561663784472559, "grad_norm": 19.25, "learning_rate": 8.84328591523781e-06, "loss": 1.7907624244689941, "step": 4154 }, { "epoch": 0.7565304450714481, "grad_norm": 7.125, "learning_rate": 8.842207930874802e-06, "loss": 1.2141859531402588, "step": 4156 }, { "epoch": 0.7568945116956403, "grad_norm": 6.90625, "learning_rate": 8.841129529429299e-06, "loss": 1.3649156093597412, "step": 4158 }, { "epoch": 0.7572585783198326, "grad_norm": 19.25, "learning_rate": 8.840050711059556e-06, "loss": 1.3847813606262207, "step": 4160 }, { "epoch": 0.7576226449440248, "grad_norm": 13.125, "learning_rate": 8.838971475923876e-06, "loss": 1.7742314338684082, "step": 4162 }, { "epoch": 0.757986711568217, "grad_norm": 10.9375, "learning_rate": 8.83789182418064e-06, "loss": 1.4739961624145508, "step": 4164 }, { "epoch": 0.7583507781924093, "grad_norm": 20.375, "learning_rate": 8.83681175598828e-06, "loss": 1.5541844367980957, "step": 4166 }, { "epoch": 0.7587148448166015, "grad_norm": 326.0, "learning_rate": 8.835731271505289e-06, "loss": 1.3313920497894287, "step": 4168 }, { "epoch": 0.7590789114407936, "grad_norm": 6.8125, "learning_rate": 8.834650370890227e-06, "loss": 1.3126857280731201, "step": 4170 }, { "epoch": 0.7594429780649858, "grad_norm": 16.125, "learning_rate": 8.833569054301712e-06, "loss": 1.2136203050613403, "step": 4172 }, { "epoch": 0.7598070446891781, "grad_norm": 8.5625, "learning_rate": 8.83248732189842e-06, "loss": 1.5499008893966675, "step": 4174 }, { "epoch": 0.7601711113133703, "grad_norm": 17.75, "learning_rate": 8.831405173839094e-06, "loss": 1.3748478889465332, "step": 4176 }, { "epoch": 0.7605351779375625, "grad_norm": 9.375, "learning_rate": 8.830322610282533e-06, "loss": 1.2371443510055542, "step": 4178 }, { "epoch": 0.7608992445617548, "grad_norm": 16.625, "learning_rate": 8.829239631387598e-06, "loss": 1.2147772312164307, "step": 4180 }, { "epoch": 0.761263311185947, "grad_norm": 15.1875, "learning_rate": 8.828156237313215e-06, "loss": 1.9534438848495483, "step": 4182 }, { "epoch": 0.7616273778101392, "grad_norm": 15.75, "learning_rate": 8.827072428218366e-06, "loss": 1.5573246479034424, "step": 4184 }, { "epoch": 0.7619914444343315, "grad_norm": 12.75, "learning_rate": 8.825988204262092e-06, "loss": 1.4831327199935913, "step": 4186 }, { "epoch": 0.7623555110585237, "grad_norm": 8.9375, "learning_rate": 8.824903565603507e-06, "loss": 1.2221503257751465, "step": 4188 }, { "epoch": 0.7627195776827159, "grad_norm": 7.46875, "learning_rate": 8.823818512401768e-06, "loss": 1.3167275190353394, "step": 4190 }, { "epoch": 0.7630836443069081, "grad_norm": 8.125, "learning_rate": 8.822733044816108e-06, "loss": 1.0497537851333618, "step": 4192 }, { "epoch": 0.7634477109311004, "grad_norm": 10.625, "learning_rate": 8.821647163005811e-06, "loss": 1.4484376907348633, "step": 4194 }, { "epoch": 0.7638117775552926, "grad_norm": 11.1875, "learning_rate": 8.82056086713023e-06, "loss": 1.0506078004837036, "step": 4196 }, { "epoch": 0.7641758441794848, "grad_norm": 12.0625, "learning_rate": 8.819474157348774e-06, "loss": 1.6282072067260742, "step": 4198 }, { "epoch": 0.7645399108036771, "grad_norm": 10.875, "learning_rate": 8.818387033820907e-06, "loss": 1.5184917449951172, "step": 4200 }, { "epoch": 0.7649039774278693, "grad_norm": 10.6875, "learning_rate": 8.817299496706166e-06, "loss": 1.6753480434417725, "step": 4202 }, { "epoch": 0.7652680440520615, "grad_norm": 12.25, "learning_rate": 8.816211546164145e-06, "loss": 1.4577313661575317, "step": 4204 }, { "epoch": 0.7656321106762538, "grad_norm": 12.1875, "learning_rate": 8.81512318235449e-06, "loss": 1.4209911823272705, "step": 4206 }, { "epoch": 0.765996177300446, "grad_norm": 11.125, "learning_rate": 8.814034405436918e-06, "loss": 1.5116889476776123, "step": 4208 }, { "epoch": 0.7663602439246382, "grad_norm": 10.875, "learning_rate": 8.812945215571198e-06, "loss": 1.1695141792297363, "step": 4210 }, { "epoch": 0.7667243105488304, "grad_norm": 11.75, "learning_rate": 8.811855612917172e-06, "loss": 1.3789341449737549, "step": 4212 }, { "epoch": 0.7670883771730227, "grad_norm": 10.125, "learning_rate": 8.810765597634728e-06, "loss": 1.172945261001587, "step": 4214 }, { "epoch": 0.7674524437972149, "grad_norm": 25.5, "learning_rate": 8.809675169883823e-06, "loss": 1.0605055093765259, "step": 4216 }, { "epoch": 0.7678165104214071, "grad_norm": 11.375, "learning_rate": 8.808584329824474e-06, "loss": 1.2311534881591797, "step": 4218 }, { "epoch": 0.7681805770455994, "grad_norm": 8.4375, "learning_rate": 8.807493077616757e-06, "loss": 1.150730013847351, "step": 4220 }, { "epoch": 0.7685446436697916, "grad_norm": 10.5625, "learning_rate": 8.806401413420809e-06, "loss": 1.649304986000061, "step": 4222 }, { "epoch": 0.7689087102939838, "grad_norm": 28.875, "learning_rate": 8.805309337396826e-06, "loss": 1.9404007196426392, "step": 4224 }, { "epoch": 0.769272776918176, "grad_norm": 5.8125, "learning_rate": 8.804216849705067e-06, "loss": 1.3319504261016846, "step": 4226 }, { "epoch": 0.7696368435423683, "grad_norm": 19.375, "learning_rate": 8.803123950505852e-06, "loss": 1.5371342897415161, "step": 4228 }, { "epoch": 0.7700009101665605, "grad_norm": 19.25, "learning_rate": 8.802030639959553e-06, "loss": 1.4116675853729248, "step": 4230 }, { "epoch": 0.7703649767907527, "grad_norm": 15.0625, "learning_rate": 8.800936918226616e-06, "loss": 1.0641881227493286, "step": 4232 }, { "epoch": 0.770729043414945, "grad_norm": 19.0, "learning_rate": 8.799842785467538e-06, "loss": 0.5184696316719055, "step": 4234 }, { "epoch": 0.7710931100391372, "grad_norm": 14.75, "learning_rate": 8.798748241842878e-06, "loss": 1.5277142524719238, "step": 4236 }, { "epoch": 0.7714571766633294, "grad_norm": 9.0, "learning_rate": 8.797653287513256e-06, "loss": 1.4668095111846924, "step": 4238 }, { "epoch": 0.7718212432875217, "grad_norm": 5.6875, "learning_rate": 8.796557922639347e-06, "loss": 1.599367380142212, "step": 4240 }, { "epoch": 0.7721853099117139, "grad_norm": 6.90625, "learning_rate": 8.795462147381902e-06, "loss": 1.2175581455230713, "step": 4242 }, { "epoch": 0.772549376535906, "grad_norm": 12.125, "learning_rate": 8.794365961901714e-06, "loss": 1.287764310836792, "step": 4244 }, { "epoch": 0.7729134431600982, "grad_norm": 8.3125, "learning_rate": 8.793269366359645e-06, "loss": 1.232977271080017, "step": 4246 }, { "epoch": 0.7732775097842906, "grad_norm": 18.875, "learning_rate": 8.792172360916618e-06, "loss": 1.4290766716003418, "step": 4248 }, { "epoch": 0.7736415764084827, "grad_norm": 20.625, "learning_rate": 8.79107494573361e-06, "loss": 1.7165418863296509, "step": 4250 }, { "epoch": 0.7740056430326749, "grad_norm": 8.625, "learning_rate": 8.78997712097167e-06, "loss": 1.4682202339172363, "step": 4252 }, { "epoch": 0.7743697096568672, "grad_norm": 28.625, "learning_rate": 8.788878886791889e-06, "loss": 1.5918464660644531, "step": 4254 }, { "epoch": 0.7747337762810594, "grad_norm": 10.6875, "learning_rate": 8.787780243355437e-06, "loss": 1.5947998762130737, "step": 4256 }, { "epoch": 0.7750978429052516, "grad_norm": 28.5, "learning_rate": 8.786681190823531e-06, "loss": 1.198481798171997, "step": 4258 }, { "epoch": 0.7754619095294439, "grad_norm": 7.0625, "learning_rate": 8.785581729357456e-06, "loss": 1.3257670402526855, "step": 4260 }, { "epoch": 0.7758259761536361, "grad_norm": 101.0, "learning_rate": 8.784481859118547e-06, "loss": 1.5759248733520508, "step": 4262 }, { "epoch": 0.7761900427778283, "grad_norm": 12.5625, "learning_rate": 8.78338158026821e-06, "loss": 1.9172375202178955, "step": 4264 }, { "epoch": 0.7765541094020205, "grad_norm": 7.5, "learning_rate": 8.782280892967909e-06, "loss": 1.3287369012832642, "step": 4266 }, { "epoch": 0.7769181760262128, "grad_norm": 7.65625, "learning_rate": 8.781179797379162e-06, "loss": 1.1690568923950195, "step": 4268 }, { "epoch": 0.777282242650405, "grad_norm": 12.25, "learning_rate": 8.78007829366355e-06, "loss": 1.3491458892822266, "step": 4270 }, { "epoch": 0.7776463092745972, "grad_norm": 9.375, "learning_rate": 8.778976381982716e-06, "loss": 1.4549459218978882, "step": 4272 }, { "epoch": 0.7780103758987895, "grad_norm": 9.875, "learning_rate": 8.77787406249836e-06, "loss": 1.206111192703247, "step": 4274 }, { "epoch": 0.7783744425229817, "grad_norm": 8.0625, "learning_rate": 8.77677133537224e-06, "loss": 0.9205513000488281, "step": 4276 }, { "epoch": 0.7787385091471739, "grad_norm": 10.375, "learning_rate": 8.775668200766186e-06, "loss": 1.348332405090332, "step": 4278 }, { "epoch": 0.7791025757713662, "grad_norm": 6.5, "learning_rate": 8.774564658842066e-06, "loss": 1.286337971687317, "step": 4280 }, { "epoch": 0.7794666423955584, "grad_norm": 37.5, "learning_rate": 8.773460709761831e-06, "loss": 1.2027983665466309, "step": 4282 }, { "epoch": 0.7798307090197506, "grad_norm": 56.75, "learning_rate": 8.772356353687474e-06, "loss": 1.7031192779541016, "step": 4284 }, { "epoch": 0.7801947756439428, "grad_norm": 13.25, "learning_rate": 8.771251590781059e-06, "loss": 1.289367437362671, "step": 4286 }, { "epoch": 0.7805588422681351, "grad_norm": 14.125, "learning_rate": 8.770146421204704e-06, "loss": 1.0499001741409302, "step": 4288 }, { "epoch": 0.7809229088923273, "grad_norm": 7.0, "learning_rate": 8.769040845120587e-06, "loss": 1.2087839841842651, "step": 4290 }, { "epoch": 0.7812869755165195, "grad_norm": 12.125, "learning_rate": 8.767934862690948e-06, "loss": 1.2550642490386963, "step": 4292 }, { "epoch": 0.7816510421407118, "grad_norm": 12.875, "learning_rate": 8.766828474078087e-06, "loss": 1.7575970888137817, "step": 4294 }, { "epoch": 0.782015108764904, "grad_norm": 8.5, "learning_rate": 8.765721679444359e-06, "loss": 1.30368173122406, "step": 4296 }, { "epoch": 0.7823791753890962, "grad_norm": 8.1875, "learning_rate": 8.764614478952185e-06, "loss": 1.3250945806503296, "step": 4298 }, { "epoch": 0.7827432420132885, "grad_norm": 9.0625, "learning_rate": 8.763506872764036e-06, "loss": 1.5625274181365967, "step": 4300 }, { "epoch": 0.7831073086374807, "grad_norm": 12.0, "learning_rate": 8.762398861042456e-06, "loss": 1.3666670322418213, "step": 4302 }, { "epoch": 0.7834713752616729, "grad_norm": 13.125, "learning_rate": 8.761290443950037e-06, "loss": 1.488893747329712, "step": 4304 }, { "epoch": 0.7838354418858651, "grad_norm": 25.0, "learning_rate": 8.760181621649438e-06, "loss": 1.437034249305725, "step": 4306 }, { "epoch": 0.7841995085100574, "grad_norm": 26.875, "learning_rate": 8.75907239430337e-06, "loss": 1.2303574085235596, "step": 4308 }, { "epoch": 0.7845635751342496, "grad_norm": 7.90625, "learning_rate": 8.75796276207461e-06, "loss": 1.3229930400848389, "step": 4310 }, { "epoch": 0.7849276417584418, "grad_norm": 8.9375, "learning_rate": 8.756852725125993e-06, "loss": 1.1679694652557373, "step": 4312 }, { "epoch": 0.7852917083826341, "grad_norm": 22.125, "learning_rate": 8.75574228362041e-06, "loss": 1.4020464420318604, "step": 4314 }, { "epoch": 0.7856557750068263, "grad_norm": 2.21875, "learning_rate": 8.754631437720814e-06, "loss": 1.1125102043151855, "step": 4316 }, { "epoch": 0.7860198416310185, "grad_norm": 11.125, "learning_rate": 8.753520187590222e-06, "loss": 1.3107123374938965, "step": 4318 }, { "epoch": 0.7863839082552107, "grad_norm": 10.5625, "learning_rate": 8.752408533391697e-06, "loss": 1.4200538396835327, "step": 4320 }, { "epoch": 0.786747974879403, "grad_norm": 12.75, "learning_rate": 8.751296475288375e-06, "loss": 1.6238305568695068, "step": 4322 }, { "epoch": 0.7871120415035952, "grad_norm": 18.25, "learning_rate": 8.750184013443445e-06, "loss": 1.7460156679153442, "step": 4324 }, { "epoch": 0.7874761081277873, "grad_norm": 19.625, "learning_rate": 8.749071148020159e-06, "loss": 0.9765514135360718, "step": 4326 }, { "epoch": 0.7878401747519796, "grad_norm": 13.9375, "learning_rate": 8.74795787918182e-06, "loss": 1.1897203922271729, "step": 4328 }, { "epoch": 0.7882042413761718, "grad_norm": 9.75, "learning_rate": 8.7468442070918e-06, "loss": 1.2098305225372314, "step": 4330 }, { "epoch": 0.788568308000364, "grad_norm": 9.375, "learning_rate": 8.745730131913525e-06, "loss": 0.8845806121826172, "step": 4332 }, { "epoch": 0.7889323746245563, "grad_norm": 5.8125, "learning_rate": 8.744615653810482e-06, "loss": 0.9948340654373169, "step": 4334 }, { "epoch": 0.7892964412487485, "grad_norm": 4.875, "learning_rate": 8.743500772946215e-06, "loss": 1.226870059967041, "step": 4336 }, { "epoch": 0.7896605078729407, "grad_norm": 18.625, "learning_rate": 8.742385489484325e-06, "loss": 1.5136758089065552, "step": 4338 }, { "epoch": 0.7900245744971329, "grad_norm": 16.875, "learning_rate": 8.741269803588479e-06, "loss": 0.9573846459388733, "step": 4340 }, { "epoch": 0.7903886411213252, "grad_norm": 13.125, "learning_rate": 8.7401537154224e-06, "loss": 1.5381865501403809, "step": 4342 }, { "epoch": 0.7907527077455174, "grad_norm": 31.875, "learning_rate": 8.739037225149867e-06, "loss": 1.2576684951782227, "step": 4344 }, { "epoch": 0.7911167743697096, "grad_norm": 45.0, "learning_rate": 8.737920332934724e-06, "loss": 1.0145173072814941, "step": 4346 }, { "epoch": 0.7914808409939019, "grad_norm": 14.0, "learning_rate": 8.736803038940867e-06, "loss": 1.7330098152160645, "step": 4348 }, { "epoch": 0.7918449076180941, "grad_norm": 6.15625, "learning_rate": 8.735685343332251e-06, "loss": 0.963650643825531, "step": 4350 }, { "epoch": 0.7922089742422863, "grad_norm": 19.125, "learning_rate": 8.734567246272902e-06, "loss": 1.4850554466247559, "step": 4352 }, { "epoch": 0.7925730408664786, "grad_norm": 11.9375, "learning_rate": 8.733448747926892e-06, "loss": 1.444158911705017, "step": 4354 }, { "epoch": 0.7929371074906708, "grad_norm": 8.1875, "learning_rate": 8.732329848458357e-06, "loss": 1.3289103507995605, "step": 4356 }, { "epoch": 0.793301174114863, "grad_norm": 7.96875, "learning_rate": 8.731210548031485e-06, "loss": 1.4776029586791992, "step": 4358 }, { "epoch": 0.7936652407390552, "grad_norm": 8.5625, "learning_rate": 8.730090846810537e-06, "loss": 1.4532073736190796, "step": 4360 }, { "epoch": 0.7940293073632475, "grad_norm": 7.8125, "learning_rate": 8.72897074495982e-06, "loss": 1.2802387475967407, "step": 4362 }, { "epoch": 0.7943933739874397, "grad_norm": 18.375, "learning_rate": 8.727850242643707e-06, "loss": 1.2998063564300537, "step": 4364 }, { "epoch": 0.7947574406116319, "grad_norm": 24.125, "learning_rate": 8.726729340026625e-06, "loss": 1.371389627456665, "step": 4366 }, { "epoch": 0.7951215072358242, "grad_norm": 6.46875, "learning_rate": 8.725608037273063e-06, "loss": 1.3968528509140015, "step": 4368 }, { "epoch": 0.7954855738600164, "grad_norm": 8.125, "learning_rate": 8.724486334547568e-06, "loss": 1.310551404953003, "step": 4370 }, { "epoch": 0.7958496404842086, "grad_norm": 7.6875, "learning_rate": 8.723364232014744e-06, "loss": 1.35434889793396, "step": 4372 }, { "epoch": 0.7962137071084009, "grad_norm": 17.875, "learning_rate": 8.722241729839257e-06, "loss": 1.2660176753997803, "step": 4374 }, { "epoch": 0.7965777737325931, "grad_norm": 12.0, "learning_rate": 8.721118828185828e-06, "loss": 1.4760253429412842, "step": 4376 }, { "epoch": 0.7969418403567853, "grad_norm": 9.5625, "learning_rate": 8.719995527219238e-06, "loss": 1.267617106437683, "step": 4378 }, { "epoch": 0.7973059069809775, "grad_norm": 8.6875, "learning_rate": 8.718871827104327e-06, "loss": 1.4835522174835205, "step": 4380 }, { "epoch": 0.7976699736051698, "grad_norm": 10.5, "learning_rate": 8.717747728005997e-06, "loss": 1.3898547887802124, "step": 4382 }, { "epoch": 0.798034040229362, "grad_norm": 9.125, "learning_rate": 8.716623230089199e-06, "loss": 1.2879462242126465, "step": 4384 }, { "epoch": 0.7983981068535542, "grad_norm": 8.3125, "learning_rate": 8.715498333518953e-06, "loss": 1.351919412612915, "step": 4386 }, { "epoch": 0.7987621734777465, "grad_norm": 11.125, "learning_rate": 8.71437303846033e-06, "loss": 1.5388047695159912, "step": 4388 }, { "epoch": 0.7991262401019387, "grad_norm": 38.75, "learning_rate": 8.713247345078467e-06, "loss": 1.153308629989624, "step": 4390 }, { "epoch": 0.7994903067261309, "grad_norm": 11.5, "learning_rate": 8.712121253538549e-06, "loss": 0.690681517124176, "step": 4392 }, { "epoch": 0.7998543733503231, "grad_norm": 11.5, "learning_rate": 8.71099476400583e-06, "loss": 1.2446155548095703, "step": 4394 }, { "epoch": 0.8002184399745154, "grad_norm": 10.375, "learning_rate": 8.709867876645613e-06, "loss": 1.4488739967346191, "step": 4396 }, { "epoch": 0.8005825065987076, "grad_norm": 8.5625, "learning_rate": 8.70874059162327e-06, "loss": 1.2444548606872559, "step": 4398 }, { "epoch": 0.8009465732228997, "grad_norm": 8.9375, "learning_rate": 8.707612909104222e-06, "loss": 1.2452828884124756, "step": 4400 }, { "epoch": 0.801310639847092, "grad_norm": 36.25, "learning_rate": 8.706484829253954e-06, "loss": 1.4948606491088867, "step": 4402 }, { "epoch": 0.8016747064712842, "grad_norm": 15.125, "learning_rate": 8.705356352238003e-06, "loss": 1.2883861064910889, "step": 4404 }, { "epoch": 0.8020387730954764, "grad_norm": 38.5, "learning_rate": 8.704227478221974e-06, "loss": 1.2535079717636108, "step": 4406 }, { "epoch": 0.8024028397196687, "grad_norm": 10.0625, "learning_rate": 8.70309820737152e-06, "loss": 0.8813364505767822, "step": 4408 }, { "epoch": 0.8027669063438609, "grad_norm": 26.75, "learning_rate": 8.70196853985236e-06, "loss": 1.6842833757400513, "step": 4410 }, { "epoch": 0.8031309729680531, "grad_norm": 9.9375, "learning_rate": 8.700838475830267e-06, "loss": 1.0274977684020996, "step": 4412 }, { "epoch": 0.8034950395922453, "grad_norm": 3.546875, "learning_rate": 8.699708015471071e-06, "loss": 0.6947841644287109, "step": 4414 }, { "epoch": 0.8038591062164376, "grad_norm": 4.0625, "learning_rate": 8.698577158940666e-06, "loss": 1.1942253112792969, "step": 4416 }, { "epoch": 0.8042231728406298, "grad_norm": 12.25, "learning_rate": 8.697445906405e-06, "loss": 1.1912000179290771, "step": 4418 }, { "epoch": 0.804587239464822, "grad_norm": 2.828125, "learning_rate": 8.696314258030078e-06, "loss": 1.1682474613189697, "step": 4420 }, { "epoch": 0.8049513060890143, "grad_norm": 11.3125, "learning_rate": 8.695182213981968e-06, "loss": 1.1248276233673096, "step": 4422 }, { "epoch": 0.8053153727132065, "grad_norm": 6.71875, "learning_rate": 8.694049774426786e-06, "loss": 1.4576165676116943, "step": 4424 }, { "epoch": 0.8056794393373987, "grad_norm": 8.875, "learning_rate": 8.692916939530722e-06, "loss": 1.3074344396591187, "step": 4426 }, { "epoch": 0.806043505961591, "grad_norm": 4.4375, "learning_rate": 8.69178370946001e-06, "loss": 1.23238205909729, "step": 4428 }, { "epoch": 0.8064075725857832, "grad_norm": 18.625, "learning_rate": 8.690650084380946e-06, "loss": 1.8036983013153076, "step": 4430 }, { "epoch": 0.8067716392099754, "grad_norm": 19.375, "learning_rate": 8.689516064459886e-06, "loss": 1.278721809387207, "step": 4432 }, { "epoch": 0.8071357058341676, "grad_norm": 10.0625, "learning_rate": 8.688381649863245e-06, "loss": 1.2096893787384033, "step": 4434 }, { "epoch": 0.8074997724583599, "grad_norm": 7.90625, "learning_rate": 8.68724684075749e-06, "loss": 1.3531429767608643, "step": 4436 }, { "epoch": 0.8078638390825521, "grad_norm": 6.0, "learning_rate": 8.686111637309153e-06, "loss": 1.2938752174377441, "step": 4438 }, { "epoch": 0.8082279057067443, "grad_norm": 3.328125, "learning_rate": 8.68497603968482e-06, "loss": 1.0541632175445557, "step": 4440 }, { "epoch": 0.8085919723309366, "grad_norm": 17.625, "learning_rate": 8.683840048051133e-06, "loss": 1.6006433963775635, "step": 4442 }, { "epoch": 0.8089560389551288, "grad_norm": 22.125, "learning_rate": 8.682703662574796e-06, "loss": 1.6809207201004028, "step": 4444 }, { "epoch": 0.809320105579321, "grad_norm": 14.9375, "learning_rate": 8.681566883422567e-06, "loss": 1.2290083169937134, "step": 4446 }, { "epoch": 0.8096841722035133, "grad_norm": 7.96875, "learning_rate": 8.680429710761269e-06, "loss": 1.5254971981048584, "step": 4448 }, { "epoch": 0.8100482388277055, "grad_norm": 5.59375, "learning_rate": 8.679292144757771e-06, "loss": 0.9327143430709839, "step": 4450 }, { "epoch": 0.8104123054518977, "grad_norm": 19.5, "learning_rate": 8.67815418557901e-06, "loss": 1.0140594244003296, "step": 4452 }, { "epoch": 0.8107763720760899, "grad_norm": 14.4375, "learning_rate": 8.677015833391976e-06, "loss": 0.7433057427406311, "step": 4454 }, { "epoch": 0.8111404387002822, "grad_norm": 4.6875, "learning_rate": 8.675877088363715e-06, "loss": 0.9882104396820068, "step": 4456 }, { "epoch": 0.8115045053244744, "grad_norm": 14.375, "learning_rate": 8.674737950661338e-06, "loss": 1.3083375692367554, "step": 4458 }, { "epoch": 0.8118685719486666, "grad_norm": 34.75, "learning_rate": 8.673598420452006e-06, "loss": 1.9675068855285645, "step": 4460 }, { "epoch": 0.8122326385728589, "grad_norm": 13.4375, "learning_rate": 8.672458497902943e-06, "loss": 1.9735381603240967, "step": 4462 }, { "epoch": 0.8125967051970511, "grad_norm": 11.375, "learning_rate": 8.671318183181422e-06, "loss": 2.0083773136138916, "step": 4464 }, { "epoch": 0.8129607718212433, "grad_norm": 8.5, "learning_rate": 8.670177476454787e-06, "loss": 1.4760448932647705, "step": 4466 }, { "epoch": 0.8133248384454355, "grad_norm": 7.5, "learning_rate": 8.669036377890425e-06, "loss": 1.4627360105514526, "step": 4468 }, { "epoch": 0.8136889050696278, "grad_norm": 16.5, "learning_rate": 8.667894887655794e-06, "loss": 1.3630309104919434, "step": 4470 }, { "epoch": 0.81405297169382, "grad_norm": 14.5, "learning_rate": 8.6667530059184e-06, "loss": 1.4635146856307983, "step": 4472 }, { "epoch": 0.8144170383180122, "grad_norm": 9.9375, "learning_rate": 8.665610732845809e-06, "loss": 1.494768500328064, "step": 4474 }, { "epoch": 0.8147811049422045, "grad_norm": 10.25, "learning_rate": 8.664468068605648e-06, "loss": 1.092423915863037, "step": 4476 }, { "epoch": 0.8151451715663967, "grad_norm": 14.0625, "learning_rate": 8.663325013365591e-06, "loss": 1.326206922531128, "step": 4478 }, { "epoch": 0.8155092381905888, "grad_norm": 6.4375, "learning_rate": 8.662181567293386e-06, "loss": 1.8527684211730957, "step": 4480 }, { "epoch": 0.8158733048147812, "grad_norm": 9.5, "learning_rate": 8.661037730556823e-06, "loss": 1.338208556175232, "step": 4482 }, { "epoch": 0.8162373714389733, "grad_norm": 9.4375, "learning_rate": 8.65989350332376e-06, "loss": 1.6235486268997192, "step": 4484 }, { "epoch": 0.8166014380631655, "grad_norm": 15.6875, "learning_rate": 8.658748885762103e-06, "loss": 1.3146204948425293, "step": 4486 }, { "epoch": 0.8169655046873577, "grad_norm": 28.375, "learning_rate": 8.65760387803982e-06, "loss": 1.09885835647583, "step": 4488 }, { "epoch": 0.81732957131155, "grad_norm": 47.5, "learning_rate": 8.656458480324942e-06, "loss": 0.6355609893798828, "step": 4490 }, { "epoch": 0.8176936379357422, "grad_norm": 15.75, "learning_rate": 8.655312692785545e-06, "loss": 1.4863014221191406, "step": 4492 }, { "epoch": 0.8180577045599344, "grad_norm": 29.75, "learning_rate": 8.654166515589773e-06, "loss": 1.5574688911437988, "step": 4494 }, { "epoch": 0.8184217711841267, "grad_norm": 11.9375, "learning_rate": 8.653019948905819e-06, "loss": 1.5795235633850098, "step": 4496 }, { "epoch": 0.8187858378083189, "grad_norm": 12.5625, "learning_rate": 8.651872992901942e-06, "loss": 1.678736686706543, "step": 4498 }, { "epoch": 0.8191499044325111, "grad_norm": 13.8125, "learning_rate": 8.650725647746449e-06, "loss": 1.5295724868774414, "step": 4500 }, { "epoch": 0.8195139710567034, "grad_norm": 5.1875, "learning_rate": 8.649577913607709e-06, "loss": 1.323065996170044, "step": 4502 }, { "epoch": 0.8198780376808956, "grad_norm": 8.0, "learning_rate": 8.648429790654149e-06, "loss": 1.0482020378112793, "step": 4504 }, { "epoch": 0.8202421043050878, "grad_norm": 13.4375, "learning_rate": 8.647281279054251e-06, "loss": 1.4844077825546265, "step": 4506 }, { "epoch": 0.82060617092928, "grad_norm": 15.625, "learning_rate": 8.646132378976553e-06, "loss": 1.617138147354126, "step": 4508 }, { "epoch": 0.8209702375534723, "grad_norm": 51.75, "learning_rate": 8.644983090589651e-06, "loss": 0.5058410167694092, "step": 4510 }, { "epoch": 0.8213343041776645, "grad_norm": 13.625, "learning_rate": 8.643833414062202e-06, "loss": 1.4589542150497437, "step": 4512 }, { "epoch": 0.8216983708018567, "grad_norm": 6.9375, "learning_rate": 8.642683349562913e-06, "loss": 1.2672033309936523, "step": 4514 }, { "epoch": 0.822062437426049, "grad_norm": 19.25, "learning_rate": 8.641532897260552e-06, "loss": 1.3366519212722778, "step": 4516 }, { "epoch": 0.8224265040502412, "grad_norm": 21.75, "learning_rate": 8.640382057323944e-06, "loss": 1.3999751806259155, "step": 4518 }, { "epoch": 0.8227905706744334, "grad_norm": 90.0, "learning_rate": 8.639230829921968e-06, "loss": 1.2534772157669067, "step": 4520 }, { "epoch": 0.8231546372986257, "grad_norm": 8.25, "learning_rate": 8.638079215223564e-06, "loss": 0.44437363743782043, "step": 4522 }, { "epoch": 0.8235187039228179, "grad_norm": 7.75, "learning_rate": 8.63692721339773e-06, "loss": 1.2233778238296509, "step": 4524 }, { "epoch": 0.8238827705470101, "grad_norm": 15.6875, "learning_rate": 8.63577482461351e-06, "loss": 1.2223553657531738, "step": 4526 }, { "epoch": 0.8242468371712023, "grad_norm": 9.5, "learning_rate": 8.634622049040018e-06, "loss": 1.6178956031799316, "step": 4528 }, { "epoch": 0.8246109037953946, "grad_norm": 14.4375, "learning_rate": 8.633468886846417e-06, "loss": 1.471444845199585, "step": 4530 }, { "epoch": 0.8249749704195868, "grad_norm": 6.9375, "learning_rate": 8.632315338201929e-06, "loss": 1.151768684387207, "step": 4532 }, { "epoch": 0.825339037043779, "grad_norm": 7.96875, "learning_rate": 8.631161403275833e-06, "loss": 1.2900487184524536, "step": 4534 }, { "epoch": 0.8257031036679713, "grad_norm": 8.75, "learning_rate": 8.630007082237466e-06, "loss": 1.1465637683868408, "step": 4536 }, { "epoch": 0.8260671702921635, "grad_norm": 12.75, "learning_rate": 8.628852375256216e-06, "loss": 1.3564552068710327, "step": 4538 }, { "epoch": 0.8264312369163557, "grad_norm": 13.6875, "learning_rate": 8.627697282501535e-06, "loss": 1.4555790424346924, "step": 4540 }, { "epoch": 0.8267953035405479, "grad_norm": 12.625, "learning_rate": 8.626541804142926e-06, "loss": 1.5490326881408691, "step": 4542 }, { "epoch": 0.8271593701647402, "grad_norm": 6.90625, "learning_rate": 8.625385940349953e-06, "loss": 1.2507295608520508, "step": 4544 }, { "epoch": 0.8275234367889324, "grad_norm": 15.75, "learning_rate": 8.624229691292232e-06, "loss": 1.327358365058899, "step": 4546 }, { "epoch": 0.8278875034131246, "grad_norm": 69.5, "learning_rate": 8.62307305713944e-06, "loss": 1.1240493059158325, "step": 4548 }, { "epoch": 0.8282515700373169, "grad_norm": 3.3125, "learning_rate": 8.621916038061304e-06, "loss": 0.9504419565200806, "step": 4550 }, { "epoch": 0.8286156366615091, "grad_norm": 19.0, "learning_rate": 8.620758634227617e-06, "loss": 1.206761121749878, "step": 4552 }, { "epoch": 0.8289797032857013, "grad_norm": 17.875, "learning_rate": 8.619600845808222e-06, "loss": 1.7506375312805176, "step": 4554 }, { "epoch": 0.8293437699098936, "grad_norm": 16.25, "learning_rate": 8.61844267297302e-06, "loss": 1.412170648574829, "step": 4556 }, { "epoch": 0.8297078365340858, "grad_norm": 9.8125, "learning_rate": 8.617284115891967e-06, "loss": 1.3583259582519531, "step": 4558 }, { "epoch": 0.830071903158278, "grad_norm": 14.875, "learning_rate": 8.616125174735074e-06, "loss": 1.4089040756225586, "step": 4560 }, { "epoch": 0.8304359697824701, "grad_norm": 9.875, "learning_rate": 8.614965849672416e-06, "loss": 1.405224084854126, "step": 4562 }, { "epoch": 0.8308000364066624, "grad_norm": 19.5, "learning_rate": 8.613806140874119e-06, "loss": 0.9720988273620605, "step": 4564 }, { "epoch": 0.8311641030308546, "grad_norm": 18.5, "learning_rate": 8.61264604851036e-06, "loss": 1.3065283298492432, "step": 4566 }, { "epoch": 0.8315281696550468, "grad_norm": 10.3125, "learning_rate": 8.611485572751386e-06, "loss": 1.6876684427261353, "step": 4568 }, { "epoch": 0.8318922362792391, "grad_norm": 9.3125, "learning_rate": 8.610324713767484e-06, "loss": 1.3253494501113892, "step": 4570 }, { "epoch": 0.8322563029034313, "grad_norm": 11.4375, "learning_rate": 8.609163471729009e-06, "loss": 1.3207170963287354, "step": 4572 }, { "epoch": 0.8326203695276235, "grad_norm": 6.0625, "learning_rate": 8.608001846806372e-06, "loss": 1.431268334388733, "step": 4574 }, { "epoch": 0.8329844361518158, "grad_norm": 2.21875, "learning_rate": 8.606839839170029e-06, "loss": 1.201312780380249, "step": 4576 }, { "epoch": 0.833348502776008, "grad_norm": 14.375, "learning_rate": 8.605677448990507e-06, "loss": 1.3415477275848389, "step": 4578 }, { "epoch": 0.8337125694002002, "grad_norm": 21.125, "learning_rate": 8.604514676438377e-06, "loss": 1.891241192817688, "step": 4580 }, { "epoch": 0.8340766360243924, "grad_norm": 18.875, "learning_rate": 8.603351521684276e-06, "loss": 1.4462015628814697, "step": 4582 }, { "epoch": 0.8344407026485847, "grad_norm": 29.625, "learning_rate": 8.60218798489889e-06, "loss": 1.554612159729004, "step": 4584 }, { "epoch": 0.8348047692727769, "grad_norm": 10.0, "learning_rate": 8.601024066252959e-06, "loss": 1.2380497455596924, "step": 4586 }, { "epoch": 0.8351688358969691, "grad_norm": 16.875, "learning_rate": 8.599859765917291e-06, "loss": 1.8688032627105713, "step": 4588 }, { "epoch": 0.8355329025211614, "grad_norm": 13.5625, "learning_rate": 8.598695084062735e-06, "loss": 1.8470561504364014, "step": 4590 }, { "epoch": 0.8358969691453536, "grad_norm": 3.25, "learning_rate": 8.59753002086021e-06, "loss": 1.0626814365386963, "step": 4592 }, { "epoch": 0.8362610357695458, "grad_norm": 3.515625, "learning_rate": 8.59636457648068e-06, "loss": 1.0638459920883179, "step": 4594 }, { "epoch": 0.8366251023937381, "grad_norm": 6.15625, "learning_rate": 8.595198751095171e-06, "loss": 0.9944407343864441, "step": 4596 }, { "epoch": 0.8369891690179303, "grad_norm": 20.5, "learning_rate": 8.594032544874764e-06, "loss": 1.381744623184204, "step": 4598 }, { "epoch": 0.8373532356421225, "grad_norm": 45.75, "learning_rate": 8.592865957990592e-06, "loss": 1.9402213096618652, "step": 4600 }, { "epoch": 0.8377173022663147, "grad_norm": 18.25, "learning_rate": 8.591698990613848e-06, "loss": 1.8793973922729492, "step": 4602 }, { "epoch": 0.838081368890507, "grad_norm": 7.03125, "learning_rate": 8.590531642915783e-06, "loss": 1.4722939729690552, "step": 4604 }, { "epoch": 0.8384454355146992, "grad_norm": 10.25, "learning_rate": 8.589363915067697e-06, "loss": 1.2915740013122559, "step": 4606 }, { "epoch": 0.8388095021388914, "grad_norm": 117.5, "learning_rate": 8.588195807240949e-06, "loss": 1.3434512615203857, "step": 4608 }, { "epoch": 0.8391735687630837, "grad_norm": 22.375, "learning_rate": 8.587027319606956e-06, "loss": 1.4871494770050049, "step": 4610 }, { "epoch": 0.8395376353872759, "grad_norm": 8.8125, "learning_rate": 8.585858452337188e-06, "loss": 1.6669485569000244, "step": 4612 }, { "epoch": 0.8399017020114681, "grad_norm": 3.375, "learning_rate": 8.584689205603171e-06, "loss": 1.0657391548156738, "step": 4614 }, { "epoch": 0.8402657686356603, "grad_norm": 9.75, "learning_rate": 8.583519579576489e-06, "loss": 1.0997756719589233, "step": 4616 }, { "epoch": 0.8406298352598526, "grad_norm": 21.625, "learning_rate": 8.58234957442878e-06, "loss": 1.145671010017395, "step": 4618 }, { "epoch": 0.8409939018840448, "grad_norm": 17.125, "learning_rate": 8.581179190331735e-06, "loss": 1.6979058980941772, "step": 4620 }, { "epoch": 0.841357968508237, "grad_norm": 8.125, "learning_rate": 8.580008427457102e-06, "loss": 1.4544659852981567, "step": 4622 }, { "epoch": 0.8417220351324293, "grad_norm": 15.6875, "learning_rate": 8.578837285976691e-06, "loss": 1.3736255168914795, "step": 4624 }, { "epoch": 0.8420861017566215, "grad_norm": 15.1875, "learning_rate": 8.57766576606236e-06, "loss": 1.7360119819641113, "step": 4626 }, { "epoch": 0.8424501683808137, "grad_norm": 13.4375, "learning_rate": 8.576493867886022e-06, "loss": 1.564675211906433, "step": 4628 }, { "epoch": 0.842814235005006, "grad_norm": 16.25, "learning_rate": 8.575321591619652e-06, "loss": 1.6518443822860718, "step": 4630 }, { "epoch": 0.8431783016291982, "grad_norm": 11.3125, "learning_rate": 8.574148937435274e-06, "loss": 1.9839905500411987, "step": 4632 }, { "epoch": 0.8435423682533904, "grad_norm": 19.125, "learning_rate": 8.572975905504972e-06, "loss": 1.3030701875686646, "step": 4634 }, { "epoch": 0.8439064348775825, "grad_norm": 42.25, "learning_rate": 8.571802496000885e-06, "loss": 0.9104695916175842, "step": 4636 }, { "epoch": 0.8442705015017749, "grad_norm": 10.5, "learning_rate": 8.570628709095203e-06, "loss": 1.4311996698379517, "step": 4638 }, { "epoch": 0.844634568125967, "grad_norm": 9.1875, "learning_rate": 8.569454544960174e-06, "loss": 1.2529677152633667, "step": 4640 }, { "epoch": 0.8449986347501592, "grad_norm": 11.3125, "learning_rate": 8.568280003768107e-06, "loss": 1.3084317445755005, "step": 4642 }, { "epoch": 0.8453627013743515, "grad_norm": 7.6875, "learning_rate": 8.567105085691357e-06, "loss": 1.383557915687561, "step": 4644 }, { "epoch": 0.8457267679985437, "grad_norm": 16.75, "learning_rate": 8.565929790902337e-06, "loss": 1.4977948665618896, "step": 4646 }, { "epoch": 0.8460908346227359, "grad_norm": 10.4375, "learning_rate": 8.564754119573519e-06, "loss": 1.5442651510238647, "step": 4648 }, { "epoch": 0.8464549012469282, "grad_norm": 56.5, "learning_rate": 8.563578071877429e-06, "loss": 0.5610999464988708, "step": 4650 }, { "epoch": 0.8468189678711204, "grad_norm": 33.5, "learning_rate": 8.562401647986646e-06, "loss": 1.4163718223571777, "step": 4652 }, { "epoch": 0.8471830344953126, "grad_norm": 8.25, "learning_rate": 8.561224848073808e-06, "loss": 1.2096872329711914, "step": 4654 }, { "epoch": 0.8475471011195048, "grad_norm": 14.0, "learning_rate": 8.560047672311602e-06, "loss": 1.3381762504577637, "step": 4656 }, { "epoch": 0.8479111677436971, "grad_norm": 5.34375, "learning_rate": 8.558870120872774e-06, "loss": 1.2475321292877197, "step": 4658 }, { "epoch": 0.8482752343678893, "grad_norm": 1504.0, "learning_rate": 8.557692193930125e-06, "loss": 1.5093698501586914, "step": 4660 }, { "epoch": 0.8486393009920815, "grad_norm": 10.0625, "learning_rate": 8.556513891656516e-06, "loss": 1.805031180381775, "step": 4662 }, { "epoch": 0.8490033676162738, "grad_norm": 8.3125, "learning_rate": 8.55533521422485e-06, "loss": 0.82941734790802, "step": 4664 }, { "epoch": 0.849367434240466, "grad_norm": 10.0625, "learning_rate": 8.554156161808099e-06, "loss": 0.9767967462539673, "step": 4666 }, { "epoch": 0.8497315008646582, "grad_norm": 14.625, "learning_rate": 8.552976734579281e-06, "loss": 1.5947502851486206, "step": 4668 }, { "epoch": 0.8500955674888505, "grad_norm": 21.75, "learning_rate": 8.551796932711476e-06, "loss": 1.6526554822921753, "step": 4670 }, { "epoch": 0.8504596341130427, "grad_norm": 27.25, "learning_rate": 8.550616756377809e-06, "loss": 1.4393222332000732, "step": 4672 }, { "epoch": 0.8508237007372349, "grad_norm": 11.8125, "learning_rate": 8.549436205751474e-06, "loss": 1.4700366258621216, "step": 4674 }, { "epoch": 0.8511877673614271, "grad_norm": 15.5, "learning_rate": 8.548255281005704e-06, "loss": 1.7356404066085815, "step": 4676 }, { "epoch": 0.8515518339856194, "grad_norm": 19.5, "learning_rate": 8.5470739823138e-06, "loss": 1.289542317390442, "step": 4678 }, { "epoch": 0.8519159006098116, "grad_norm": 8.5625, "learning_rate": 8.545892309849113e-06, "loss": 1.308435320854187, "step": 4680 }, { "epoch": 0.8522799672340038, "grad_norm": 9.875, "learning_rate": 8.544710263785046e-06, "loss": 1.412814736366272, "step": 4682 }, { "epoch": 0.8526440338581961, "grad_norm": 9.25, "learning_rate": 8.543527844295062e-06, "loss": 1.3229460716247559, "step": 4684 }, { "epoch": 0.8530081004823883, "grad_norm": 2.625, "learning_rate": 8.542345051552672e-06, "loss": 1.1141674518585205, "step": 4686 }, { "epoch": 0.8533721671065805, "grad_norm": 12.4375, "learning_rate": 8.54116188573145e-06, "loss": 1.0369725227355957, "step": 4688 }, { "epoch": 0.8537362337307728, "grad_norm": 14.0, "learning_rate": 8.53997834700502e-06, "loss": 1.4032094478607178, "step": 4690 }, { "epoch": 0.854100300354965, "grad_norm": 8.625, "learning_rate": 8.538794435547063e-06, "loss": 1.7089297771453857, "step": 4692 }, { "epoch": 0.8544643669791572, "grad_norm": 16.625, "learning_rate": 8.537610151531308e-06, "loss": 1.9842157363891602, "step": 4694 }, { "epoch": 0.8548284336033494, "grad_norm": 24.25, "learning_rate": 8.536425495131548e-06, "loss": 1.698715329170227, "step": 4696 }, { "epoch": 0.8551925002275417, "grad_norm": 11.6875, "learning_rate": 8.535240466521628e-06, "loss": 1.3599730730056763, "step": 4698 }, { "epoch": 0.8555565668517339, "grad_norm": 9.9375, "learning_rate": 8.534055065875442e-06, "loss": 1.3171544075012207, "step": 4700 }, { "epoch": 0.8559206334759261, "grad_norm": 17.875, "learning_rate": 8.532869293366945e-06, "loss": 1.4855996370315552, "step": 4702 }, { "epoch": 0.8562847001001184, "grad_norm": 7.03125, "learning_rate": 8.531683149170144e-06, "loss": 1.3304357528686523, "step": 4704 }, { "epoch": 0.8566487667243106, "grad_norm": 9.75, "learning_rate": 8.530496633459102e-06, "loss": 1.072676420211792, "step": 4706 }, { "epoch": 0.8570128333485028, "grad_norm": 25.75, "learning_rate": 8.529309746407935e-06, "loss": 1.4439506530761719, "step": 4708 }, { "epoch": 0.857376899972695, "grad_norm": 33.75, "learning_rate": 8.528122488190811e-06, "loss": 1.3170230388641357, "step": 4710 }, { "epoch": 0.8577409665968873, "grad_norm": 31.375, "learning_rate": 8.526934858981957e-06, "loss": 0.8731761574745178, "step": 4712 }, { "epoch": 0.8581050332210794, "grad_norm": 6.03125, "learning_rate": 8.525746858955657e-06, "loss": 0.3805575370788574, "step": 4714 }, { "epoch": 0.8584690998452716, "grad_norm": 28.5, "learning_rate": 8.524558488286239e-06, "loss": 1.6718380451202393, "step": 4716 }, { "epoch": 0.858833166469464, "grad_norm": 10.1875, "learning_rate": 8.523369747148094e-06, "loss": 1.316070318222046, "step": 4718 }, { "epoch": 0.8591972330936561, "grad_norm": 14.0625, "learning_rate": 8.522180635715662e-06, "loss": 1.9480071067810059, "step": 4720 }, { "epoch": 0.8595612997178483, "grad_norm": 17.375, "learning_rate": 8.520991154163448e-06, "loss": 1.8747416734695435, "step": 4722 }, { "epoch": 0.8599253663420406, "grad_norm": 10.6875, "learning_rate": 8.519801302665996e-06, "loss": 1.327930212020874, "step": 4724 }, { "epoch": 0.8602894329662328, "grad_norm": 20.375, "learning_rate": 8.518611081397917e-06, "loss": 1.5011496543884277, "step": 4726 }, { "epoch": 0.860653499590425, "grad_norm": 11.6875, "learning_rate": 8.517420490533865e-06, "loss": 1.3953161239624023, "step": 4728 }, { "epoch": 0.8610175662146172, "grad_norm": 10.8125, "learning_rate": 8.51622953024856e-06, "loss": 1.2196745872497559, "step": 4730 }, { "epoch": 0.8613816328388095, "grad_norm": 9.375, "learning_rate": 8.51503820071677e-06, "loss": 1.3489603996276855, "step": 4732 }, { "epoch": 0.8617456994630017, "grad_norm": 11.3125, "learning_rate": 8.513846502113317e-06, "loss": 1.4766356945037842, "step": 4734 }, { "epoch": 0.8621097660871939, "grad_norm": 10.6875, "learning_rate": 8.512654434613074e-06, "loss": 1.4373071193695068, "step": 4736 }, { "epoch": 0.8624738327113862, "grad_norm": 6.1875, "learning_rate": 8.51146199839098e-06, "loss": 1.2352737188339233, "step": 4738 }, { "epoch": 0.8628378993355784, "grad_norm": 21.75, "learning_rate": 8.510269193622014e-06, "loss": 1.0454188585281372, "step": 4740 }, { "epoch": 0.8632019659597706, "grad_norm": 47.0, "learning_rate": 8.509076020481217e-06, "loss": 0.6144300103187561, "step": 4742 }, { "epoch": 0.8635660325839629, "grad_norm": 9.6875, "learning_rate": 8.507882479143681e-06, "loss": 1.545186996459961, "step": 4744 }, { "epoch": 0.8639300992081551, "grad_norm": 9.75, "learning_rate": 8.506688569784557e-06, "loss": 1.4921015501022339, "step": 4746 }, { "epoch": 0.8642941658323473, "grad_norm": 9.3125, "learning_rate": 8.505494292579041e-06, "loss": 1.3835653066635132, "step": 4748 }, { "epoch": 0.8646582324565395, "grad_norm": 47.5, "learning_rate": 8.504299647702396e-06, "loss": 1.2491490840911865, "step": 4750 }, { "epoch": 0.8650222990807318, "grad_norm": 10.375, "learning_rate": 8.503104635329924e-06, "loss": 1.6203796863555908, "step": 4752 }, { "epoch": 0.865386365704924, "grad_norm": 14.375, "learning_rate": 8.50190925563699e-06, "loss": 1.4374362230300903, "step": 4754 }, { "epoch": 0.8657504323291162, "grad_norm": 18.5, "learning_rate": 8.500713508799014e-06, "loss": 1.4110661745071411, "step": 4756 }, { "epoch": 0.8661144989533085, "grad_norm": 7.46875, "learning_rate": 8.499517394991466e-06, "loss": 1.1272107362747192, "step": 4758 }, { "epoch": 0.8664785655775007, "grad_norm": 9.6875, "learning_rate": 8.498320914389865e-06, "loss": 1.2199032306671143, "step": 4760 }, { "epoch": 0.8668426322016929, "grad_norm": 6.3125, "learning_rate": 8.4971240671698e-06, "loss": 1.4588050842285156, "step": 4762 }, { "epoch": 0.8672066988258852, "grad_norm": 8.5625, "learning_rate": 8.495926853506897e-06, "loss": 1.4851653575897217, "step": 4764 }, { "epoch": 0.8675707654500774, "grad_norm": 34.0, "learning_rate": 8.494729273576842e-06, "loss": 1.281112790107727, "step": 4766 }, { "epoch": 0.8679348320742696, "grad_norm": 12.3125, "learning_rate": 8.493531327555378e-06, "loss": 1.2939958572387695, "step": 4768 }, { "epoch": 0.8682988986984618, "grad_norm": 10.375, "learning_rate": 8.492333015618295e-06, "loss": 1.711450219154358, "step": 4770 }, { "epoch": 0.8686629653226541, "grad_norm": 9.9375, "learning_rate": 8.491134337941442e-06, "loss": 1.1526232957839966, "step": 4772 }, { "epoch": 0.8690270319468463, "grad_norm": 8.4375, "learning_rate": 8.489935294700722e-06, "loss": 1.4241366386413574, "step": 4774 }, { "epoch": 0.8693910985710385, "grad_norm": 6.5625, "learning_rate": 8.48873588607209e-06, "loss": 1.183090090751648, "step": 4776 }, { "epoch": 0.8697551651952308, "grad_norm": 21.5, "learning_rate": 8.487536112231548e-06, "loss": 1.3121047019958496, "step": 4778 }, { "epoch": 0.870119231819423, "grad_norm": 10.4375, "learning_rate": 8.486335973355168e-06, "loss": 1.1925883293151855, "step": 4780 }, { "epoch": 0.8704832984436152, "grad_norm": 11.4375, "learning_rate": 8.485135469619058e-06, "loss": 1.3835647106170654, "step": 4782 }, { "epoch": 0.8708473650678074, "grad_norm": 12.625, "learning_rate": 8.483934601199391e-06, "loss": 1.4478524923324585, "step": 4784 }, { "epoch": 0.8712114316919997, "grad_norm": 10.875, "learning_rate": 8.482733368272385e-06, "loss": 1.408987045288086, "step": 4786 }, { "epoch": 0.8715754983161919, "grad_norm": 9.25, "learning_rate": 8.48153177101432e-06, "loss": 1.3271706104278564, "step": 4788 }, { "epoch": 0.871939564940384, "grad_norm": 14.625, "learning_rate": 8.480329809601521e-06, "loss": 1.3561227321624756, "step": 4790 }, { "epoch": 0.8723036315645764, "grad_norm": 7.875, "learning_rate": 8.47912748421038e-06, "loss": 1.4736913442611694, "step": 4792 }, { "epoch": 0.8726676981887685, "grad_norm": 20.25, "learning_rate": 8.477924795017324e-06, "loss": 1.3741657733917236, "step": 4794 }, { "epoch": 0.8730317648129607, "grad_norm": 17.25, "learning_rate": 8.476721742198848e-06, "loss": 2.122314929962158, "step": 4796 }, { "epoch": 0.873395831437153, "grad_norm": 7.15625, "learning_rate": 8.475518325931493e-06, "loss": 1.1818575859069824, "step": 4798 }, { "epoch": 0.8737598980613452, "grad_norm": 21.25, "learning_rate": 8.474314546391855e-06, "loss": 1.8577296733856201, "step": 4800 }, { "epoch": 0.8741239646855374, "grad_norm": 9.75, "learning_rate": 8.473110403756585e-06, "loss": 1.5991967916488647, "step": 4802 }, { "epoch": 0.8744880313097296, "grad_norm": 6.5, "learning_rate": 8.47190589820239e-06, "loss": 1.3095811605453491, "step": 4804 }, { "epoch": 0.8748520979339219, "grad_norm": 9.375, "learning_rate": 8.470701029906019e-06, "loss": 1.3215951919555664, "step": 4806 }, { "epoch": 0.8752161645581141, "grad_norm": 12.25, "learning_rate": 8.469495799044284e-06, "loss": 1.3220648765563965, "step": 4808 }, { "epoch": 0.8755802311823063, "grad_norm": 17.125, "learning_rate": 8.46829020579405e-06, "loss": 0.9009023904800415, "step": 4810 }, { "epoch": 0.8759442978064986, "grad_norm": 5.875, "learning_rate": 8.467084250332231e-06, "loss": 1.3488471508026123, "step": 4812 }, { "epoch": 0.8763083644306908, "grad_norm": 11.3125, "learning_rate": 8.465877932835796e-06, "loss": 1.3471102714538574, "step": 4814 }, { "epoch": 0.876672431054883, "grad_norm": 38.25, "learning_rate": 8.464671253481766e-06, "loss": 1.6553246974945068, "step": 4816 }, { "epoch": 0.8770364976790753, "grad_norm": 20.5, "learning_rate": 8.463464212447221e-06, "loss": 1.4767603874206543, "step": 4818 }, { "epoch": 0.8774005643032675, "grad_norm": 26.625, "learning_rate": 8.462256809909285e-06, "loss": 1.8642398118972778, "step": 4820 }, { "epoch": 0.8777646309274597, "grad_norm": 17.0, "learning_rate": 8.461049046045143e-06, "loss": 1.7345826625823975, "step": 4822 }, { "epoch": 0.8781286975516519, "grad_norm": 84.5, "learning_rate": 8.459840921032025e-06, "loss": 1.211651086807251, "step": 4824 }, { "epoch": 0.8784927641758442, "grad_norm": 23.25, "learning_rate": 8.458632435047221e-06, "loss": 1.039841651916504, "step": 4826 }, { "epoch": 0.8788568308000364, "grad_norm": 18.125, "learning_rate": 8.45742358826807e-06, "loss": 1.2840505838394165, "step": 4828 }, { "epoch": 0.8792208974242286, "grad_norm": 25.75, "learning_rate": 8.456214380871968e-06, "loss": 1.78365159034729, "step": 4830 }, { "epoch": 0.8795849640484209, "grad_norm": 8.625, "learning_rate": 8.45500481303636e-06, "loss": 0.8666900396347046, "step": 4832 }, { "epoch": 0.8799490306726131, "grad_norm": 24.375, "learning_rate": 8.453794884938745e-06, "loss": 1.4163602590560913, "step": 4834 }, { "epoch": 0.8803130972968053, "grad_norm": 4.3125, "learning_rate": 8.452584596756674e-06, "loss": 0.9222185611724854, "step": 4836 }, { "epoch": 0.8806771639209976, "grad_norm": 13.6875, "learning_rate": 8.451373948667754e-06, "loss": 1.1016875505447388, "step": 4838 }, { "epoch": 0.8810412305451898, "grad_norm": 9.0, "learning_rate": 8.45016294084964e-06, "loss": 1.2642927169799805, "step": 4840 }, { "epoch": 0.881405297169382, "grad_norm": 25.25, "learning_rate": 8.448951573480044e-06, "loss": 1.3859792947769165, "step": 4842 }, { "epoch": 0.8817693637935742, "grad_norm": 10.9375, "learning_rate": 8.447739846736732e-06, "loss": 0.9050328731536865, "step": 4844 }, { "epoch": 0.8821334304177665, "grad_norm": 8.4375, "learning_rate": 8.446527760797514e-06, "loss": 1.337114930152893, "step": 4846 }, { "epoch": 0.8824974970419587, "grad_norm": 9.625, "learning_rate": 8.445315315840263e-06, "loss": 0.8304621577262878, "step": 4848 }, { "epoch": 0.8828615636661509, "grad_norm": 9.875, "learning_rate": 8.4441025120429e-06, "loss": 1.2512221336364746, "step": 4850 }, { "epoch": 0.8832256302903432, "grad_norm": 17.875, "learning_rate": 8.4428893495834e-06, "loss": 1.6167973279953003, "step": 4852 }, { "epoch": 0.8835896969145354, "grad_norm": 13.875, "learning_rate": 8.441675828639785e-06, "loss": 0.732928991317749, "step": 4854 }, { "epoch": 0.8839537635387276, "grad_norm": 10.8125, "learning_rate": 8.44046194939014e-06, "loss": 1.6956959962844849, "step": 4856 }, { "epoch": 0.8843178301629198, "grad_norm": 12.625, "learning_rate": 8.439247712012593e-06, "loss": 1.5149354934692383, "step": 4858 }, { "epoch": 0.8846818967871121, "grad_norm": 28.375, "learning_rate": 8.438033116685329e-06, "loss": 1.8743064403533936, "step": 4860 }, { "epoch": 0.8850459634113043, "grad_norm": 13.6875, "learning_rate": 8.436818163586588e-06, "loss": 1.7524183988571167, "step": 4862 }, { "epoch": 0.8854100300354965, "grad_norm": 4.375, "learning_rate": 8.435602852894656e-06, "loss": 1.2591854333877563, "step": 4864 }, { "epoch": 0.8857740966596888, "grad_norm": 5.875, "learning_rate": 8.434387184787874e-06, "loss": 0.7776451706886292, "step": 4866 }, { "epoch": 0.886138163283881, "grad_norm": 11.125, "learning_rate": 8.43317115944464e-06, "loss": 0.9579576253890991, "step": 4868 }, { "epoch": 0.8865022299080731, "grad_norm": 10.6875, "learning_rate": 8.431954777043398e-06, "loss": 0.9780623912811279, "step": 4870 }, { "epoch": 0.8868662965322655, "grad_norm": 6.0, "learning_rate": 8.430738037762651e-06, "loss": 1.3000987768173218, "step": 4872 }, { "epoch": 0.8872303631564576, "grad_norm": 15.25, "learning_rate": 8.429520941780946e-06, "loss": 1.4808517694473267, "step": 4874 }, { "epoch": 0.8875944297806498, "grad_norm": 11.125, "learning_rate": 8.428303489276888e-06, "loss": 1.4657047986984253, "step": 4876 }, { "epoch": 0.887958496404842, "grad_norm": 7.96875, "learning_rate": 8.427085680429137e-06, "loss": 1.5792397260665894, "step": 4878 }, { "epoch": 0.8883225630290343, "grad_norm": 7.96875, "learning_rate": 8.425867515416396e-06, "loss": 1.1995391845703125, "step": 4880 }, { "epoch": 0.8886866296532265, "grad_norm": 6.78125, "learning_rate": 8.424648994417427e-06, "loss": 1.1995643377304077, "step": 4882 }, { "epoch": 0.8890506962774187, "grad_norm": 5.53125, "learning_rate": 8.423430117611047e-06, "loss": 1.083752155303955, "step": 4884 }, { "epoch": 0.889414762901611, "grad_norm": 3.4375, "learning_rate": 8.42221088517612e-06, "loss": 1.5045748949050903, "step": 4886 }, { "epoch": 0.8897788295258032, "grad_norm": 10.9375, "learning_rate": 8.420991297291556e-06, "loss": 1.0758121013641357, "step": 4888 }, { "epoch": 0.8901428961499954, "grad_norm": 9.75, "learning_rate": 8.419771354136335e-06, "loss": 1.478236198425293, "step": 4890 }, { "epoch": 0.8905069627741877, "grad_norm": 52.25, "learning_rate": 8.418551055889472e-06, "loss": 1.5452797412872314, "step": 4892 }, { "epoch": 0.8908710293983799, "grad_norm": 9.875, "learning_rate": 8.417330402730047e-06, "loss": 1.2707470655441284, "step": 4894 }, { "epoch": 0.8912350960225721, "grad_norm": 8.3125, "learning_rate": 8.416109394837178e-06, "loss": 0.7832297086715698, "step": 4896 }, { "epoch": 0.8915991626467643, "grad_norm": 15.9375, "learning_rate": 8.414888032390049e-06, "loss": 0.6458851099014282, "step": 4898 }, { "epoch": 0.8919632292709566, "grad_norm": 3.515625, "learning_rate": 8.413666315567888e-06, "loss": 0.9119938611984253, "step": 4900 }, { "epoch": 0.8923272958951488, "grad_norm": 9.9375, "learning_rate": 8.412444244549975e-06, "loss": 1.2867916822433472, "step": 4902 }, { "epoch": 0.892691362519341, "grad_norm": 44.75, "learning_rate": 8.411221819515646e-06, "loss": 1.285341739654541, "step": 4904 }, { "epoch": 0.8930554291435333, "grad_norm": 24.75, "learning_rate": 8.40999904064429e-06, "loss": 1.0806200504302979, "step": 4906 }, { "epoch": 0.8934194957677255, "grad_norm": 10.1875, "learning_rate": 8.408775908115339e-06, "loss": 1.166571021080017, "step": 4908 }, { "epoch": 0.8937835623919177, "grad_norm": 14.8125, "learning_rate": 8.407552422108287e-06, "loss": 1.7610280513763428, "step": 4910 }, { "epoch": 0.89414762901611, "grad_norm": 6.46875, "learning_rate": 8.406328582802672e-06, "loss": 1.1678876876831055, "step": 4912 }, { "epoch": 0.8945116956403022, "grad_norm": 10.4375, "learning_rate": 8.405104390378091e-06, "loss": 1.4904696941375732, "step": 4914 }, { "epoch": 0.8948757622644944, "grad_norm": 4.90625, "learning_rate": 8.403879845014187e-06, "loss": 1.3808884620666504, "step": 4916 }, { "epoch": 0.8952398288886866, "grad_norm": 19.125, "learning_rate": 8.402654946890658e-06, "loss": 1.1424446105957031, "step": 4918 }, { "epoch": 0.8956038955128789, "grad_norm": 24.5, "learning_rate": 8.401429696187253e-06, "loss": 1.9523061513900757, "step": 4920 }, { "epoch": 0.8959679621370711, "grad_norm": 46.25, "learning_rate": 8.400204093083773e-06, "loss": 1.4287670850753784, "step": 4922 }, { "epoch": 0.8963320287612633, "grad_norm": 13.625, "learning_rate": 8.398978137760068e-06, "loss": 1.08514404296875, "step": 4924 }, { "epoch": 0.8966960953854556, "grad_norm": 9.4375, "learning_rate": 8.397751830396042e-06, "loss": 1.5198423862457275, "step": 4926 }, { "epoch": 0.8970601620096478, "grad_norm": 11.0, "learning_rate": 8.396525171171654e-06, "loss": 1.3252668380737305, "step": 4928 }, { "epoch": 0.89742422863384, "grad_norm": 12.0625, "learning_rate": 8.395298160266911e-06, "loss": 1.685268759727478, "step": 4930 }, { "epoch": 0.8977882952580322, "grad_norm": 12.3125, "learning_rate": 8.39407079786187e-06, "loss": 1.306122064590454, "step": 4932 }, { "epoch": 0.8981523618822245, "grad_norm": 7.59375, "learning_rate": 8.39284308413664e-06, "loss": 1.524824857711792, "step": 4934 }, { "epoch": 0.8985164285064167, "grad_norm": 6.8125, "learning_rate": 8.391615019271384e-06, "loss": 1.1539874076843262, "step": 4936 }, { "epoch": 0.8988804951306089, "grad_norm": 5.0625, "learning_rate": 8.390386603446316e-06, "loss": 1.2209997177124023, "step": 4938 }, { "epoch": 0.8992445617548012, "grad_norm": 12.3125, "learning_rate": 8.389157836841704e-06, "loss": 1.5792509317398071, "step": 4940 }, { "epoch": 0.8996086283789934, "grad_norm": 15.875, "learning_rate": 8.387928719637862e-06, "loss": 1.7660202980041504, "step": 4942 }, { "epoch": 0.8999726950031856, "grad_norm": 6.1875, "learning_rate": 8.386699252015156e-06, "loss": 1.128103256225586, "step": 4944 }, { "epoch": 0.9003367616273779, "grad_norm": 6.5625, "learning_rate": 8.385469434154008e-06, "loss": 1.2405339479446411, "step": 4946 }, { "epoch": 0.90070082825157, "grad_norm": 9.625, "learning_rate": 8.384239266234887e-06, "loss": 1.3091473579406738, "step": 4948 }, { "epoch": 0.9010648948757622, "grad_norm": 16.5, "learning_rate": 8.383008748438317e-06, "loss": 1.3025203943252563, "step": 4950 }, { "epoch": 0.9014289614999544, "grad_norm": 95.0, "learning_rate": 8.38177788094487e-06, "loss": 1.8716806173324585, "step": 4952 }, { "epoch": 0.9017930281241467, "grad_norm": 12.5, "learning_rate": 8.380546663935171e-06, "loss": 0.988882303237915, "step": 4954 }, { "epoch": 0.9021570947483389, "grad_norm": 7.71875, "learning_rate": 8.379315097589897e-06, "loss": 1.0238853693008423, "step": 4956 }, { "epoch": 0.9025211613725311, "grad_norm": 9.875, "learning_rate": 8.378083182089778e-06, "loss": 1.3713093996047974, "step": 4958 }, { "epoch": 0.9028852279967234, "grad_norm": 13.4375, "learning_rate": 8.376850917615587e-06, "loss": 1.4977836608886719, "step": 4960 }, { "epoch": 0.9032492946209156, "grad_norm": 6.53125, "learning_rate": 8.375618304348156e-06, "loss": 1.1415998935699463, "step": 4962 }, { "epoch": 0.9036133612451078, "grad_norm": 15.5625, "learning_rate": 8.374385342468365e-06, "loss": 1.337787389755249, "step": 4964 }, { "epoch": 0.9039774278693001, "grad_norm": 11.0, "learning_rate": 8.37315203215715e-06, "loss": 1.2822262048721313, "step": 4966 }, { "epoch": 0.9043414944934923, "grad_norm": 15.6875, "learning_rate": 8.371918373595494e-06, "loss": 0.9648714065551758, "step": 4968 }, { "epoch": 0.9047055611176845, "grad_norm": 20.25, "learning_rate": 8.370684366964426e-06, "loss": 1.1467806100845337, "step": 4970 }, { "epoch": 0.9050696277418767, "grad_norm": 15.1875, "learning_rate": 8.369450012445033e-06, "loss": 1.447210431098938, "step": 4972 }, { "epoch": 0.905433694366069, "grad_norm": 13.0625, "learning_rate": 8.368215310218454e-06, "loss": 1.545515537261963, "step": 4974 }, { "epoch": 0.9057977609902612, "grad_norm": 16.25, "learning_rate": 8.36698026046588e-06, "loss": 1.4063040018081665, "step": 4976 }, { "epoch": 0.9061618276144534, "grad_norm": 10.5, "learning_rate": 8.36574486336854e-06, "loss": 1.3798906803131104, "step": 4978 }, { "epoch": 0.9065258942386457, "grad_norm": 9.5, "learning_rate": 8.364509119107734e-06, "loss": 1.3341408967971802, "step": 4980 }, { "epoch": 0.9068899608628379, "grad_norm": 15.3125, "learning_rate": 8.363273027864793e-06, "loss": 1.2749433517456055, "step": 4982 }, { "epoch": 0.9072540274870301, "grad_norm": 29.0, "learning_rate": 8.362036589821114e-06, "loss": 1.9989795684814453, "step": 4984 }, { "epoch": 0.9076180941112224, "grad_norm": 420.0, "learning_rate": 8.360799805158139e-06, "loss": 1.1506237983703613, "step": 4986 }, { "epoch": 0.9079821607354146, "grad_norm": 10.9375, "learning_rate": 8.35956267405736e-06, "loss": 1.7607243061065674, "step": 4988 }, { "epoch": 0.9083462273596068, "grad_norm": 19.0, "learning_rate": 8.358325196700318e-06, "loss": 1.412642002105713, "step": 4990 }, { "epoch": 0.908710293983799, "grad_norm": 16.875, "learning_rate": 8.357087373268613e-06, "loss": 1.9293653964996338, "step": 4992 }, { "epoch": 0.9090743606079913, "grad_norm": 22.0, "learning_rate": 8.355849203943888e-06, "loss": 1.420868992805481, "step": 4994 }, { "epoch": 0.9094384272321835, "grad_norm": 12.0, "learning_rate": 8.354610688907843e-06, "loss": 1.3737428188323975, "step": 4996 }, { "epoch": 0.9098024938563757, "grad_norm": 14.3125, "learning_rate": 8.353371828342218e-06, "loss": 0.9121301174163818, "step": 4998 }, { "epoch": 0.910166560480568, "grad_norm": 6.84375, "learning_rate": 8.352132622428814e-06, "loss": 0.9797143936157227, "step": 5000 }, { "epoch": 0.9105306271047602, "grad_norm": 12.0625, "learning_rate": 8.350893071349484e-06, "loss": 0.5149597525596619, "step": 5002 }, { "epoch": 0.9108946937289524, "grad_norm": 8.0625, "learning_rate": 8.349653175286122e-06, "loss": 1.4142628908157349, "step": 5004 }, { "epoch": 0.9112587603531446, "grad_norm": 26.375, "learning_rate": 8.348412934420675e-06, "loss": 1.8623378276824951, "step": 5006 }, { "epoch": 0.9116228269773369, "grad_norm": 8.5625, "learning_rate": 8.34717234893515e-06, "loss": 1.1840145587921143, "step": 5008 }, { "epoch": 0.9119868936015291, "grad_norm": 97.0, "learning_rate": 8.345931419011594e-06, "loss": 1.4677081108093262, "step": 5010 }, { "epoch": 0.9123509602257213, "grad_norm": 6.84375, "learning_rate": 8.344690144832113e-06, "loss": 1.0516201257705688, "step": 5012 }, { "epoch": 0.9127150268499136, "grad_norm": 9.3125, "learning_rate": 8.343448526578852e-06, "loss": 1.4309465885162354, "step": 5014 }, { "epoch": 0.9130790934741058, "grad_norm": 6.84375, "learning_rate": 8.342206564434017e-06, "loss": 1.171036720275879, "step": 5016 }, { "epoch": 0.913443160098298, "grad_norm": 9.25, "learning_rate": 8.340964258579862e-06, "loss": 1.3203787803649902, "step": 5018 }, { "epoch": 0.9138072267224903, "grad_norm": 7.125, "learning_rate": 8.339721609198688e-06, "loss": 1.3614780902862549, "step": 5020 }, { "epoch": 0.9141712933466825, "grad_norm": 8.3125, "learning_rate": 8.338478616472849e-06, "loss": 1.4372724294662476, "step": 5022 }, { "epoch": 0.9145353599708747, "grad_norm": 10.625, "learning_rate": 8.337235280584752e-06, "loss": 1.2682194709777832, "step": 5024 }, { "epoch": 0.9148994265950668, "grad_norm": 11.4375, "learning_rate": 8.33599160171685e-06, "loss": 1.185659646987915, "step": 5026 }, { "epoch": 0.9152634932192591, "grad_norm": 17.5, "learning_rate": 8.334747580051647e-06, "loss": 1.4867969751358032, "step": 5028 }, { "epoch": 0.9156275598434513, "grad_norm": 20.5, "learning_rate": 8.333503215771696e-06, "loss": 1.5580549240112305, "step": 5030 }, { "epoch": 0.9159916264676435, "grad_norm": 9.0625, "learning_rate": 8.332258509059608e-06, "loss": 1.5288459062576294, "step": 5032 }, { "epoch": 0.9163556930918358, "grad_norm": 26.125, "learning_rate": 8.331013460098034e-06, "loss": 1.522063136100769, "step": 5034 }, { "epoch": 0.916719759716028, "grad_norm": 8.8125, "learning_rate": 8.329768069069684e-06, "loss": 1.3480898141860962, "step": 5036 }, { "epoch": 0.9170838263402202, "grad_norm": 9.9375, "learning_rate": 8.328522336157309e-06, "loss": 1.5097423791885376, "step": 5038 }, { "epoch": 0.9174478929644125, "grad_norm": 11.875, "learning_rate": 8.32727626154372e-06, "loss": 1.3834683895111084, "step": 5040 }, { "epoch": 0.9178119595886047, "grad_norm": 5.625, "learning_rate": 8.326029845411769e-06, "loss": 1.2475342750549316, "step": 5042 }, { "epoch": 0.9181760262127969, "grad_norm": 9.5, "learning_rate": 8.324783087944365e-06, "loss": 0.9466933012008667, "step": 5044 }, { "epoch": 0.9185400928369891, "grad_norm": 13.0625, "learning_rate": 8.323535989324465e-06, "loss": 1.868581771850586, "step": 5046 }, { "epoch": 0.9189041594611814, "grad_norm": 12.1875, "learning_rate": 8.322288549735076e-06, "loss": 1.1781412363052368, "step": 5048 }, { "epoch": 0.9192682260853736, "grad_norm": 3.640625, "learning_rate": 8.321040769359252e-06, "loss": 0.8578981757164001, "step": 5050 }, { "epoch": 0.9196322927095658, "grad_norm": 8.125, "learning_rate": 8.3197926483801e-06, "loss": 1.571542739868164, "step": 5052 }, { "epoch": 0.9199963593337581, "grad_norm": 4.4375, "learning_rate": 8.318544186980782e-06, "loss": 0.9482288360595703, "step": 5054 }, { "epoch": 0.9203604259579503, "grad_norm": 9.6875, "learning_rate": 8.317295385344499e-06, "loss": 1.2681465148925781, "step": 5056 }, { "epoch": 0.9207244925821425, "grad_norm": 6.5625, "learning_rate": 8.31604624365451e-06, "loss": 1.228989839553833, "step": 5058 }, { "epoch": 0.9210885592063348, "grad_norm": 6.75, "learning_rate": 8.31479676209412e-06, "loss": 1.2084938287734985, "step": 5060 }, { "epoch": 0.921452625830527, "grad_norm": 7.34375, "learning_rate": 8.313546940846686e-06, "loss": 1.2084535360336304, "step": 5062 }, { "epoch": 0.9218166924547192, "grad_norm": 4.59375, "learning_rate": 8.312296780095617e-06, "loss": 1.1716161966323853, "step": 5064 }, { "epoch": 0.9221807590789114, "grad_norm": 14.875, "learning_rate": 8.311046280024364e-06, "loss": 1.3594609498977661, "step": 5066 }, { "epoch": 0.9225448257031037, "grad_norm": 34.25, "learning_rate": 8.309795440816435e-06, "loss": 1.4242632389068604, "step": 5068 }, { "epoch": 0.9229088923272959, "grad_norm": 11.0625, "learning_rate": 8.308544262655387e-06, "loss": 1.5514527559280396, "step": 5070 }, { "epoch": 0.9232729589514881, "grad_norm": 12.9375, "learning_rate": 8.307292745724823e-06, "loss": 0.4746064841747284, "step": 5072 }, { "epoch": 0.9236370255756804, "grad_norm": 26.75, "learning_rate": 8.3060408902084e-06, "loss": 1.3490463495254517, "step": 5074 }, { "epoch": 0.9240010921998726, "grad_norm": 20.625, "learning_rate": 8.304788696289824e-06, "loss": 1.0141253471374512, "step": 5076 }, { "epoch": 0.9243651588240648, "grad_norm": 4.625, "learning_rate": 8.303536164152843e-06, "loss": 0.9631630778312683, "step": 5078 }, { "epoch": 0.9247292254482571, "grad_norm": 7.5, "learning_rate": 8.302283293981265e-06, "loss": 1.2772789001464844, "step": 5080 }, { "epoch": 0.9250932920724493, "grad_norm": 10.0625, "learning_rate": 8.301030085958948e-06, "loss": 1.2958730459213257, "step": 5082 }, { "epoch": 0.9254573586966415, "grad_norm": 4.75, "learning_rate": 8.29977654026979e-06, "loss": 1.0446795225143433, "step": 5084 }, { "epoch": 0.9258214253208337, "grad_norm": 16.25, "learning_rate": 8.298522657097746e-06, "loss": 1.114783763885498, "step": 5086 }, { "epoch": 0.926185491945026, "grad_norm": 13.9375, "learning_rate": 8.297268436626812e-06, "loss": 1.363879680633545, "step": 5088 }, { "epoch": 0.9265495585692182, "grad_norm": 9.0, "learning_rate": 8.296013879041049e-06, "loss": 1.2302645444869995, "step": 5090 }, { "epoch": 0.9269136251934104, "grad_norm": 13.5, "learning_rate": 8.294758984524556e-06, "loss": 1.7460567951202393, "step": 5092 }, { "epoch": 0.9272776918176027, "grad_norm": 12.4375, "learning_rate": 8.293503753261478e-06, "loss": 1.9477344751358032, "step": 5094 }, { "epoch": 0.9276417584417949, "grad_norm": 6.34375, "learning_rate": 8.29224818543602e-06, "loss": 1.330409049987793, "step": 5096 }, { "epoch": 0.9280058250659871, "grad_norm": 27.375, "learning_rate": 8.290992281232434e-06, "loss": 1.1722042560577393, "step": 5098 }, { "epoch": 0.9283698916901792, "grad_norm": 3.796875, "learning_rate": 8.289736040835011e-06, "loss": 1.0856956243515015, "step": 5100 }, { "epoch": 0.9287339583143716, "grad_norm": 5.125, "learning_rate": 8.288479464428104e-06, "loss": 1.2190728187561035, "step": 5102 }, { "epoch": 0.9290980249385637, "grad_norm": 16.25, "learning_rate": 8.28722255219611e-06, "loss": 1.3171327114105225, "step": 5104 }, { "epoch": 0.9294620915627559, "grad_norm": 15.75, "learning_rate": 8.285965304323477e-06, "loss": 1.6932451725006104, "step": 5106 }, { "epoch": 0.9298261581869482, "grad_norm": 7.375, "learning_rate": 8.2847077209947e-06, "loss": 1.3218982219696045, "step": 5108 }, { "epoch": 0.9301902248111404, "grad_norm": 3.625, "learning_rate": 8.283449802394323e-06, "loss": 1.330397129058838, "step": 5110 }, { "epoch": 0.9305542914353326, "grad_norm": 7.84375, "learning_rate": 8.28219154870694e-06, "loss": 1.1778228282928467, "step": 5112 }, { "epoch": 0.9309183580595249, "grad_norm": 13.3125, "learning_rate": 8.2809329601172e-06, "loss": 0.8992382287979126, "step": 5114 }, { "epoch": 0.9312824246837171, "grad_norm": 17.75, "learning_rate": 8.27967403680979e-06, "loss": 1.3484028577804565, "step": 5116 }, { "epoch": 0.9316464913079093, "grad_norm": 15.75, "learning_rate": 8.278414778969454e-06, "loss": 1.9711592197418213, "step": 5118 }, { "epoch": 0.9320105579321015, "grad_norm": 5.3125, "learning_rate": 8.277155186780983e-06, "loss": 1.1555635929107666, "step": 5120 }, { "epoch": 0.9323746245562938, "grad_norm": 9.1875, "learning_rate": 8.275895260429217e-06, "loss": 1.209521770477295, "step": 5122 }, { "epoch": 0.932738691180486, "grad_norm": 20.25, "learning_rate": 8.274635000099043e-06, "loss": 1.5935063362121582, "step": 5124 }, { "epoch": 0.9331027578046782, "grad_norm": 8.6875, "learning_rate": 8.273374405975402e-06, "loss": 1.1172841787338257, "step": 5126 }, { "epoch": 0.9334668244288705, "grad_norm": 6.75, "learning_rate": 8.272113478243281e-06, "loss": 1.2846150398254395, "step": 5128 }, { "epoch": 0.9338308910530627, "grad_norm": 6.84375, "learning_rate": 8.270852217087715e-06, "loss": 0.9405727386474609, "step": 5130 }, { "epoch": 0.9341949576772549, "grad_norm": 10.1875, "learning_rate": 8.269590622693788e-06, "loss": 1.3887560367584229, "step": 5132 }, { "epoch": 0.9345590243014472, "grad_norm": 65.5, "learning_rate": 8.268328695246637e-06, "loss": 1.0281797647476196, "step": 5134 }, { "epoch": 0.9349230909256394, "grad_norm": 15.5, "learning_rate": 8.267066434931441e-06, "loss": 1.4884922504425049, "step": 5136 }, { "epoch": 0.9352871575498316, "grad_norm": 32.25, "learning_rate": 8.265803841933432e-06, "loss": 1.8731805086135864, "step": 5138 }, { "epoch": 0.9356512241740238, "grad_norm": 11.25, "learning_rate": 8.264540916437893e-06, "loss": 1.7738471031188965, "step": 5140 }, { "epoch": 0.9360152907982161, "grad_norm": 9.625, "learning_rate": 8.263277658630153e-06, "loss": 1.4231477975845337, "step": 5142 }, { "epoch": 0.9363793574224083, "grad_norm": 22.0, "learning_rate": 8.26201406869559e-06, "loss": 1.5946168899536133, "step": 5144 }, { "epoch": 0.9367434240466005, "grad_norm": 8.3125, "learning_rate": 8.260750146819628e-06, "loss": 1.3846397399902344, "step": 5146 }, { "epoch": 0.9371074906707928, "grad_norm": 6.34375, "learning_rate": 8.259485893187744e-06, "loss": 0.9462764263153076, "step": 5148 }, { "epoch": 0.937471557294985, "grad_norm": 6.3125, "learning_rate": 8.25822130798546e-06, "loss": 0.9857575297355652, "step": 5150 }, { "epoch": 0.9378356239191772, "grad_norm": 7.875, "learning_rate": 8.256956391398352e-06, "loss": 1.2868921756744385, "step": 5152 }, { "epoch": 0.9381996905433695, "grad_norm": 11.9375, "learning_rate": 8.25569114361204e-06, "loss": 1.2789064645767212, "step": 5154 }, { "epoch": 0.9385637571675617, "grad_norm": 15.75, "learning_rate": 8.254425564812196e-06, "loss": 1.760619878768921, "step": 5156 }, { "epoch": 0.9389278237917539, "grad_norm": 7.15625, "learning_rate": 8.253159655184537e-06, "loss": 1.2983062267303467, "step": 5158 }, { "epoch": 0.9392918904159461, "grad_norm": 11.875, "learning_rate": 8.25189341491483e-06, "loss": 1.3249213695526123, "step": 5160 }, { "epoch": 0.9396559570401384, "grad_norm": 17.125, "learning_rate": 8.250626844188886e-06, "loss": 1.342602014541626, "step": 5162 }, { "epoch": 0.9400200236643306, "grad_norm": 9.5625, "learning_rate": 8.249359943192578e-06, "loss": 1.1832472085952759, "step": 5164 }, { "epoch": 0.9403840902885228, "grad_norm": 14.0625, "learning_rate": 8.248092712111813e-06, "loss": 1.0097612142562866, "step": 5166 }, { "epoch": 0.9407481569127151, "grad_norm": 15.875, "learning_rate": 8.246825151132552e-06, "loss": 1.484398365020752, "step": 5168 }, { "epoch": 0.9411122235369073, "grad_norm": 22.75, "learning_rate": 8.245557260440807e-06, "loss": 1.6879181861877441, "step": 5170 }, { "epoch": 0.9414762901610995, "grad_norm": 13.3125, "learning_rate": 8.244289040222633e-06, "loss": 1.4201488494873047, "step": 5172 }, { "epoch": 0.9418403567852917, "grad_norm": 9.0, "learning_rate": 8.24302049066414e-06, "loss": 1.4941177368164062, "step": 5174 }, { "epoch": 0.942204423409484, "grad_norm": 17.5, "learning_rate": 8.241751611951481e-06, "loss": 1.1575415134429932, "step": 5176 }, { "epoch": 0.9425684900336762, "grad_norm": 14.5, "learning_rate": 8.240482404270856e-06, "loss": 1.3664989471435547, "step": 5178 }, { "epoch": 0.9429325566578683, "grad_norm": 10.8125, "learning_rate": 8.239212867808518e-06, "loss": 1.367465615272522, "step": 5180 }, { "epoch": 0.9432966232820607, "grad_norm": 9.875, "learning_rate": 8.237943002750765e-06, "loss": 1.3932271003723145, "step": 5182 }, { "epoch": 0.9436606899062528, "grad_norm": 15.0, "learning_rate": 8.236672809283945e-06, "loss": 1.0750181674957275, "step": 5184 }, { "epoch": 0.944024756530445, "grad_norm": 26.5, "learning_rate": 8.235402287594458e-06, "loss": 0.897424042224884, "step": 5186 }, { "epoch": 0.9443888231546373, "grad_norm": 12.0625, "learning_rate": 8.234131437868745e-06, "loss": 1.3413605690002441, "step": 5188 }, { "epoch": 0.9447528897788295, "grad_norm": 35.75, "learning_rate": 8.232860260293297e-06, "loss": 1.293330430984497, "step": 5190 }, { "epoch": 0.9451169564030217, "grad_norm": 4.75, "learning_rate": 8.231588755054654e-06, "loss": 1.2065048217773438, "step": 5192 }, { "epoch": 0.9454810230272139, "grad_norm": 9.75, "learning_rate": 8.230316922339406e-06, "loss": 1.4518396854400635, "step": 5194 }, { "epoch": 0.9458450896514062, "grad_norm": 14.0625, "learning_rate": 8.229044762334187e-06, "loss": 1.380263090133667, "step": 5196 }, { "epoch": 0.9462091562755984, "grad_norm": 12.5, "learning_rate": 8.22777227522568e-06, "loss": 1.5777246952056885, "step": 5198 }, { "epoch": 0.9465732228997906, "grad_norm": 13.8125, "learning_rate": 8.226499461200623e-06, "loss": 1.9536418914794922, "step": 5200 }, { "epoch": 0.9469372895239829, "grad_norm": 10.5, "learning_rate": 8.225226320445795e-06, "loss": 1.1971180438995361, "step": 5202 }, { "epoch": 0.9473013561481751, "grad_norm": 14.3125, "learning_rate": 8.22395285314802e-06, "loss": 1.3106062412261963, "step": 5204 }, { "epoch": 0.9476654227723673, "grad_norm": 10.6875, "learning_rate": 8.222679059494179e-06, "loss": 1.810975432395935, "step": 5206 }, { "epoch": 0.9480294893965596, "grad_norm": 8.5625, "learning_rate": 8.221404939671192e-06, "loss": 1.3279353380203247, "step": 5208 }, { "epoch": 0.9483935560207518, "grad_norm": 18.5, "learning_rate": 8.220130493866033e-06, "loss": 0.9554933309555054, "step": 5210 }, { "epoch": 0.948757622644944, "grad_norm": 16.125, "learning_rate": 8.218855722265721e-06, "loss": 1.6110066175460815, "step": 5212 }, { "epoch": 0.9491216892691362, "grad_norm": 7.9375, "learning_rate": 8.217580625057324e-06, "loss": 1.5206242799758911, "step": 5214 }, { "epoch": 0.9494857558933285, "grad_norm": 12.5625, "learning_rate": 8.216305202427959e-06, "loss": 1.3869340419769287, "step": 5216 }, { "epoch": 0.9498498225175207, "grad_norm": 25.125, "learning_rate": 8.215029454564788e-06, "loss": 1.1318068504333496, "step": 5218 }, { "epoch": 0.9502138891417129, "grad_norm": 27.625, "learning_rate": 8.213753381655017e-06, "loss": 0.851793110370636, "step": 5220 }, { "epoch": 0.9505779557659052, "grad_norm": 9.8125, "learning_rate": 8.212476983885912e-06, "loss": 1.0434181690216064, "step": 5222 }, { "epoch": 0.9509420223900974, "grad_norm": 3.359375, "learning_rate": 8.211200261444775e-06, "loss": 0.9184368848800659, "step": 5224 }, { "epoch": 0.9513060890142896, "grad_norm": 5.71875, "learning_rate": 8.209923214518962e-06, "loss": 1.1558799743652344, "step": 5226 }, { "epoch": 0.9516701556384819, "grad_norm": 11.1875, "learning_rate": 8.208645843295873e-06, "loss": 1.3522435426712036, "step": 5228 }, { "epoch": 0.9520342222626741, "grad_norm": 7.34375, "learning_rate": 8.207368147962955e-06, "loss": 1.3889973163604736, "step": 5230 }, { "epoch": 0.9523982888868663, "grad_norm": 11.25, "learning_rate": 8.206090128707709e-06, "loss": 1.2717233896255493, "step": 5232 }, { "epoch": 0.9527623555110585, "grad_norm": 69.0, "learning_rate": 8.204811785717677e-06, "loss": 1.466750144958496, "step": 5234 }, { "epoch": 0.9531264221352508, "grad_norm": 12.5625, "learning_rate": 8.203533119180452e-06, "loss": 2.098508596420288, "step": 5236 }, { "epoch": 0.953490488759443, "grad_norm": 9.625, "learning_rate": 8.202254129283669e-06, "loss": 1.3959881067276, "step": 5238 }, { "epoch": 0.9538545553836352, "grad_norm": 9.875, "learning_rate": 8.20097481621502e-06, "loss": 1.5155279636383057, "step": 5240 }, { "epoch": 0.9542186220078275, "grad_norm": 14.5, "learning_rate": 8.199695180162234e-06, "loss": 1.3899306058883667, "step": 5242 }, { "epoch": 0.9545826886320197, "grad_norm": 12.0, "learning_rate": 8.198415221313096e-06, "loss": 1.476435899734497, "step": 5244 }, { "epoch": 0.9549467552562119, "grad_norm": 10.0, "learning_rate": 8.197134939855435e-06, "loss": 1.5116914510726929, "step": 5246 }, { "epoch": 0.9553108218804041, "grad_norm": 24.25, "learning_rate": 8.195854335977124e-06, "loss": 1.25270676612854, "step": 5248 }, { "epoch": 0.9556748885045964, "grad_norm": 7.8125, "learning_rate": 8.19457340986609e-06, "loss": 0.954714298248291, "step": 5250 }, { "epoch": 0.9560389551287886, "grad_norm": 8.25, "learning_rate": 8.1932921617103e-06, "loss": 1.685788869857788, "step": 5252 }, { "epoch": 0.9564030217529808, "grad_norm": 21.125, "learning_rate": 8.192010591697777e-06, "loss": 1.696622371673584, "step": 5254 }, { "epoch": 0.9567670883771731, "grad_norm": 10.5625, "learning_rate": 8.190728700016579e-06, "loss": 1.5226070880889893, "step": 5256 }, { "epoch": 0.9571311550013653, "grad_norm": 16.75, "learning_rate": 8.189446486854827e-06, "loss": 1.7219946384429932, "step": 5258 }, { "epoch": 0.9574952216255574, "grad_norm": 17.25, "learning_rate": 8.188163952400672e-06, "loss": 1.4439606666564941, "step": 5260 }, { "epoch": 0.9578592882497498, "grad_norm": 24.0, "learning_rate": 8.186881096842325e-06, "loss": 1.0094411373138428, "step": 5262 }, { "epoch": 0.958223354873942, "grad_norm": 12.1875, "learning_rate": 8.185597920368042e-06, "loss": 1.5339179039001465, "step": 5264 }, { "epoch": 0.9585874214981341, "grad_norm": 12.6875, "learning_rate": 8.184314423166123e-06, "loss": 1.436272144317627, "step": 5266 }, { "epoch": 0.9589514881223263, "grad_norm": 7.9375, "learning_rate": 8.183030605424912e-06, "loss": 1.4084436893463135, "step": 5268 }, { "epoch": 0.9593155547465186, "grad_norm": 3.375, "learning_rate": 8.181746467332804e-06, "loss": 1.054578185081482, "step": 5270 }, { "epoch": 0.9596796213707108, "grad_norm": 10.5625, "learning_rate": 8.18046200907825e-06, "loss": 1.0280214548110962, "step": 5272 }, { "epoch": 0.960043687994903, "grad_norm": 12.5625, "learning_rate": 8.17917723084973e-06, "loss": 1.4819560050964355, "step": 5274 }, { "epoch": 0.9604077546190953, "grad_norm": 31.875, "learning_rate": 8.177892132835781e-06, "loss": 1.3622816801071167, "step": 5276 }, { "epoch": 0.9607718212432875, "grad_norm": 9.375, "learning_rate": 8.176606715224989e-06, "loss": 1.3200557231903076, "step": 5278 }, { "epoch": 0.9611358878674797, "grad_norm": 26.625, "learning_rate": 8.175320978205983e-06, "loss": 1.4435665607452393, "step": 5280 }, { "epoch": 0.961499954491672, "grad_norm": 15.875, "learning_rate": 8.17403492196744e-06, "loss": 1.4555540084838867, "step": 5282 }, { "epoch": 0.9618640211158642, "grad_norm": 5.40625, "learning_rate": 8.172748546698082e-06, "loss": 1.0828317403793335, "step": 5284 }, { "epoch": 0.9622280877400564, "grad_norm": 7.8125, "learning_rate": 8.17146185258668e-06, "loss": 1.1645407676696777, "step": 5286 }, { "epoch": 0.9625921543642486, "grad_norm": 3.765625, "learning_rate": 8.170174839822051e-06, "loss": 1.3565809726715088, "step": 5288 }, { "epoch": 0.9629562209884409, "grad_norm": 23.375, "learning_rate": 8.168887508593058e-06, "loss": 0.8393096923828125, "step": 5290 }, { "epoch": 0.9633202876126331, "grad_norm": 13.25, "learning_rate": 8.167599859088615e-06, "loss": 0.9749665260314941, "step": 5292 }, { "epoch": 0.9636843542368253, "grad_norm": 26.125, "learning_rate": 8.166311891497678e-06, "loss": 1.551950216293335, "step": 5294 }, { "epoch": 0.9640484208610176, "grad_norm": 18.625, "learning_rate": 8.165023606009248e-06, "loss": 1.8238662481307983, "step": 5296 }, { "epoch": 0.9644124874852098, "grad_norm": 7.46875, "learning_rate": 8.163735002812378e-06, "loss": 0.9817075729370117, "step": 5298 }, { "epoch": 0.964776554109402, "grad_norm": 10.1875, "learning_rate": 8.162446082096167e-06, "loss": 0.9963616132736206, "step": 5300 }, { "epoch": 0.9651406207335943, "grad_norm": 11.5, "learning_rate": 8.161156844049755e-06, "loss": 1.4401131868362427, "step": 5302 }, { "epoch": 0.9655046873577865, "grad_norm": 100.0, "learning_rate": 8.159867288862336e-06, "loss": 1.1715326309204102, "step": 5304 }, { "epoch": 0.9658687539819787, "grad_norm": 11.625, "learning_rate": 8.158577416723143e-06, "loss": 1.2917958498001099, "step": 5306 }, { "epoch": 0.9662328206061709, "grad_norm": 25.0, "learning_rate": 8.157287227821466e-06, "loss": 1.5508229732513428, "step": 5308 }, { "epoch": 0.9665968872303632, "grad_norm": 11.9375, "learning_rate": 8.155996722346628e-06, "loss": 1.7019456624984741, "step": 5310 }, { "epoch": 0.9669609538545554, "grad_norm": 21.75, "learning_rate": 8.154705900488011e-06, "loss": 1.0934464931488037, "step": 5312 }, { "epoch": 0.9673250204787476, "grad_norm": 14.8125, "learning_rate": 8.153414762435032e-06, "loss": 1.3818862438201904, "step": 5314 }, { "epoch": 0.9676890871029399, "grad_norm": 15.4375, "learning_rate": 8.152123308377167e-06, "loss": 1.3036679029464722, "step": 5316 }, { "epoch": 0.9680531537271321, "grad_norm": 12.75, "learning_rate": 8.150831538503927e-06, "loss": 1.5211178064346313, "step": 5318 }, { "epoch": 0.9684172203513243, "grad_norm": 9.3125, "learning_rate": 8.149539453004876e-06, "loss": 1.3985037803649902, "step": 5320 }, { "epoch": 0.9687812869755165, "grad_norm": 13.125, "learning_rate": 8.14824705206962e-06, "loss": 1.1710278987884521, "step": 5322 }, { "epoch": 0.9691453535997088, "grad_norm": 21.0, "learning_rate": 8.146954335887816e-06, "loss": 1.7147796154022217, "step": 5324 }, { "epoch": 0.969509420223901, "grad_norm": 12.625, "learning_rate": 8.145661304649164e-06, "loss": 1.3703334331512451, "step": 5326 }, { "epoch": 0.9698734868480932, "grad_norm": 10.25, "learning_rate": 8.144367958543411e-06, "loss": 1.2839322090148926, "step": 5328 }, { "epoch": 0.9702375534722855, "grad_norm": 7.3125, "learning_rate": 8.143074297760349e-06, "loss": 1.2957262992858887, "step": 5330 }, { "epoch": 0.9706016200964777, "grad_norm": 16.875, "learning_rate": 8.141780322489821e-06, "loss": 1.485806941986084, "step": 5332 }, { "epoch": 0.9709656867206699, "grad_norm": 23.125, "learning_rate": 8.14048603292171e-06, "loss": 1.864945411682129, "step": 5334 }, { "epoch": 0.9713297533448622, "grad_norm": 15.1875, "learning_rate": 8.139191429245949e-06, "loss": 1.2725948095321655, "step": 5336 }, { "epoch": 0.9716938199690544, "grad_norm": 14.0625, "learning_rate": 8.137896511652515e-06, "loss": 1.4446682929992676, "step": 5338 }, { "epoch": 0.9720578865932465, "grad_norm": 14.8125, "learning_rate": 8.136601280331431e-06, "loss": 1.7600085735321045, "step": 5340 }, { "epoch": 0.9724219532174387, "grad_norm": 10.375, "learning_rate": 8.135305735472769e-06, "loss": 1.6449780464172363, "step": 5342 }, { "epoch": 0.972786019841631, "grad_norm": 12.9375, "learning_rate": 8.134009877266645e-06, "loss": 0.9663881063461304, "step": 5344 }, { "epoch": 0.9731500864658232, "grad_norm": 19.5, "learning_rate": 8.132713705903218e-06, "loss": 1.1315466165542603, "step": 5346 }, { "epoch": 0.9735141530900154, "grad_norm": 9.625, "learning_rate": 8.131417221572697e-06, "loss": 1.482200026512146, "step": 5348 }, { "epoch": 0.9738782197142077, "grad_norm": 11.875, "learning_rate": 8.130120424465337e-06, "loss": 1.3849362134933472, "step": 5350 }, { "epoch": 0.9742422863383999, "grad_norm": 6.71875, "learning_rate": 8.128823314771438e-06, "loss": 1.348287582397461, "step": 5352 }, { "epoch": 0.9746063529625921, "grad_norm": 3.65625, "learning_rate": 8.127525892681347e-06, "loss": 1.2137531042099, "step": 5354 }, { "epoch": 0.9749704195867844, "grad_norm": 4.375, "learning_rate": 8.126228158385453e-06, "loss": 1.0482590198516846, "step": 5356 }, { "epoch": 0.9753344862109766, "grad_norm": 6.84375, "learning_rate": 8.12493011207419e-06, "loss": 1.190213680267334, "step": 5358 }, { "epoch": 0.9756985528351688, "grad_norm": 19.0, "learning_rate": 8.123631753938046e-06, "loss": 1.2104922533035278, "step": 5360 }, { "epoch": 0.976062619459361, "grad_norm": 10.3125, "learning_rate": 8.12233308416755e-06, "loss": 1.0388247966766357, "step": 5362 }, { "epoch": 0.9764266860835533, "grad_norm": 24.5, "learning_rate": 8.121034102953274e-06, "loss": 1.3831732273101807, "step": 5364 }, { "epoch": 0.9767907527077455, "grad_norm": 14.4375, "learning_rate": 8.11973481048584e-06, "loss": 1.5067081451416016, "step": 5366 }, { "epoch": 0.9771548193319377, "grad_norm": 13.5, "learning_rate": 8.11843520695591e-06, "loss": 1.541979193687439, "step": 5368 }, { "epoch": 0.97751888595613, "grad_norm": 6.34375, "learning_rate": 8.117135292554202e-06, "loss": 1.4077279567718506, "step": 5370 }, { "epoch": 0.9778829525803222, "grad_norm": 15.1875, "learning_rate": 8.115835067471468e-06, "loss": 1.1266310214996338, "step": 5372 }, { "epoch": 0.9782470192045144, "grad_norm": 32.25, "learning_rate": 8.114534531898515e-06, "loss": 2.0582756996154785, "step": 5374 }, { "epoch": 0.9786110858287067, "grad_norm": 18.0, "learning_rate": 8.113233686026188e-06, "loss": 1.877091646194458, "step": 5376 }, { "epoch": 0.9789751524528989, "grad_norm": 13.125, "learning_rate": 8.111932530045378e-06, "loss": 1.180967092514038, "step": 5378 }, { "epoch": 0.9793392190770911, "grad_norm": 20.625, "learning_rate": 8.110631064147035e-06, "loss": 0.9638977646827698, "step": 5380 }, { "epoch": 0.9797032857012833, "grad_norm": 10.1875, "learning_rate": 8.10932928852213e-06, "loss": 1.4034727811813354, "step": 5382 }, { "epoch": 0.9800673523254756, "grad_norm": 11.875, "learning_rate": 8.108027203361704e-06, "loss": 1.380660891532898, "step": 5384 }, { "epoch": 0.9804314189496678, "grad_norm": 14.5, "learning_rate": 8.106724808856829e-06, "loss": 1.2414599657058716, "step": 5386 }, { "epoch": 0.98079548557386, "grad_norm": 9.5, "learning_rate": 8.105422105198626e-06, "loss": 0.574763298034668, "step": 5388 }, { "epoch": 0.9811595521980523, "grad_norm": 10.6875, "learning_rate": 8.10411909257826e-06, "loss": 0.8744221925735474, "step": 5390 }, { "epoch": 0.9815236188222445, "grad_norm": 7.4375, "learning_rate": 8.102815771186946e-06, "loss": 1.4710537195205688, "step": 5392 }, { "epoch": 0.9818876854464367, "grad_norm": 6.09375, "learning_rate": 8.101512141215939e-06, "loss": 0.9895541071891785, "step": 5394 }, { "epoch": 0.9822517520706289, "grad_norm": 6.875, "learning_rate": 8.100208202856542e-06, "loss": 1.1242363452911377, "step": 5396 }, { "epoch": 0.9826158186948212, "grad_norm": 9.625, "learning_rate": 8.098903956300104e-06, "loss": 1.4189202785491943, "step": 5398 }, { "epoch": 0.9829798853190134, "grad_norm": 8.625, "learning_rate": 8.097599401738014e-06, "loss": 1.5487031936645508, "step": 5400 }, { "epoch": 0.9833439519432056, "grad_norm": 5.5625, "learning_rate": 8.096294539361713e-06, "loss": 1.016351342201233, "step": 5402 }, { "epoch": 0.9837080185673979, "grad_norm": 9.4375, "learning_rate": 8.094989369362685e-06, "loss": 1.4842634201049805, "step": 5404 }, { "epoch": 0.9840720851915901, "grad_norm": 29.625, "learning_rate": 8.093683891932458e-06, "loss": 1.4828146696090698, "step": 5406 }, { "epoch": 0.9844361518157823, "grad_norm": 25.75, "learning_rate": 8.092378107262603e-06, "loss": 1.6820260286331177, "step": 5408 }, { "epoch": 0.9848002184399746, "grad_norm": 59.0, "learning_rate": 8.091072015544743e-06, "loss": 1.2868156433105469, "step": 5410 }, { "epoch": 0.9851642850641668, "grad_norm": 17.25, "learning_rate": 8.089765616970534e-06, "loss": 1.1464850902557373, "step": 5412 }, { "epoch": 0.985528351688359, "grad_norm": 12.6875, "learning_rate": 8.088458911731696e-06, "loss": 1.3672478199005127, "step": 5414 }, { "epoch": 0.9858924183125511, "grad_norm": 7.0, "learning_rate": 8.087151900019975e-06, "loss": 1.2416682243347168, "step": 5416 }, { "epoch": 0.9862564849367434, "grad_norm": 13.1875, "learning_rate": 8.085844582027168e-06, "loss": 0.6675612926483154, "step": 5418 }, { "epoch": 0.9866205515609356, "grad_norm": 10.875, "learning_rate": 8.084536957945124e-06, "loss": 0.32999187707901, "step": 5420 }, { "epoch": 0.9869846181851278, "grad_norm": 13.4375, "learning_rate": 8.083229027965728e-06, "loss": 1.7137484550476074, "step": 5422 }, { "epoch": 0.9873486848093201, "grad_norm": 17.75, "learning_rate": 8.081920792280915e-06, "loss": 1.1099997758865356, "step": 5424 }, { "epoch": 0.9877127514335123, "grad_norm": 9.9375, "learning_rate": 8.080612251082664e-06, "loss": 1.3685656785964966, "step": 5426 }, { "epoch": 0.9880768180577045, "grad_norm": 5.90625, "learning_rate": 8.079303404562997e-06, "loss": 1.3203872442245483, "step": 5428 }, { "epoch": 0.9884408846818968, "grad_norm": 21.375, "learning_rate": 8.077994252913984e-06, "loss": 1.183948278427124, "step": 5430 }, { "epoch": 0.988804951306089, "grad_norm": 15.0625, "learning_rate": 8.076684796327732e-06, "loss": 1.546653151512146, "step": 5432 }, { "epoch": 0.9891690179302812, "grad_norm": 8.4375, "learning_rate": 8.075375034996405e-06, "loss": 1.072319746017456, "step": 5434 }, { "epoch": 0.9895330845544734, "grad_norm": 7.71875, "learning_rate": 8.074064969112199e-06, "loss": 1.4160891771316528, "step": 5436 }, { "epoch": 0.9898971511786657, "grad_norm": 9.4375, "learning_rate": 8.072754598867367e-06, "loss": 1.2624622583389282, "step": 5438 }, { "epoch": 0.9902612178028579, "grad_norm": 11.25, "learning_rate": 8.071443924454196e-06, "loss": 1.232493281364441, "step": 5440 }, { "epoch": 0.9906252844270501, "grad_norm": 15.5625, "learning_rate": 8.070132946065026e-06, "loss": 0.8610067367553711, "step": 5442 }, { "epoch": 0.9909893510512424, "grad_norm": 12.0, "learning_rate": 8.068821663892234e-06, "loss": 1.402634859085083, "step": 5444 }, { "epoch": 0.9913534176754346, "grad_norm": 14.125, "learning_rate": 8.067510078128248e-06, "loss": 1.3765374422073364, "step": 5446 }, { "epoch": 0.9917174842996268, "grad_norm": 23.75, "learning_rate": 8.066198188965538e-06, "loss": 1.3468719720840454, "step": 5448 }, { "epoch": 0.9920815509238191, "grad_norm": 8.4375, "learning_rate": 8.064885996596616e-06, "loss": 1.3279544115066528, "step": 5450 }, { "epoch": 0.9924456175480113, "grad_norm": 8.6875, "learning_rate": 8.063573501214042e-06, "loss": 1.4619123935699463, "step": 5452 }, { "epoch": 0.9928096841722035, "grad_norm": 16.625, "learning_rate": 8.06226070301042e-06, "loss": 1.315147876739502, "step": 5454 }, { "epoch": 0.9931737507963957, "grad_norm": 17.125, "learning_rate": 8.060947602178397e-06, "loss": 1.395255208015442, "step": 5456 }, { "epoch": 0.993537817420588, "grad_norm": 23.5, "learning_rate": 8.059634198910666e-06, "loss": 1.3836113214492798, "step": 5458 }, { "epoch": 0.9939018840447802, "grad_norm": 40.75, "learning_rate": 8.058320493399965e-06, "loss": 1.9325013160705566, "step": 5460 }, { "epoch": 0.9942659506689724, "grad_norm": 11.625, "learning_rate": 8.057006485839071e-06, "loss": 1.4436297416687012, "step": 5462 }, { "epoch": 0.9946300172931647, "grad_norm": 21.875, "learning_rate": 8.055692176420813e-06, "loss": 1.2843307256698608, "step": 5464 }, { "epoch": 0.9949940839173569, "grad_norm": 5.3125, "learning_rate": 8.054377565338057e-06, "loss": 1.1902520656585693, "step": 5466 }, { "epoch": 0.9953581505415491, "grad_norm": 3.609375, "learning_rate": 8.05306265278372e-06, "loss": 1.211944341659546, "step": 5468 }, { "epoch": 0.9957222171657414, "grad_norm": 7.71875, "learning_rate": 8.051747438950759e-06, "loss": 1.4459137916564941, "step": 5470 }, { "epoch": 0.9960862837899336, "grad_norm": 14.8125, "learning_rate": 8.050431924032176e-06, "loss": 1.3181432485580444, "step": 5472 }, { "epoch": 0.9964503504141258, "grad_norm": 92.5, "learning_rate": 8.049116108221018e-06, "loss": 0.8120696544647217, "step": 5474 }, { "epoch": 0.996814417038318, "grad_norm": 7.375, "learning_rate": 8.047799991710376e-06, "loss": 1.399395227432251, "step": 5476 }, { "epoch": 0.9971784836625103, "grad_norm": 5.90625, "learning_rate": 8.046483574693384e-06, "loss": 1.252817988395691, "step": 5478 }, { "epoch": 0.9975425502867025, "grad_norm": 10.1875, "learning_rate": 8.045166857363223e-06, "loss": 1.1775716543197632, "step": 5480 }, { "epoch": 0.9979066169108947, "grad_norm": 7.9375, "learning_rate": 8.043849839913112e-06, "loss": 1.3445779085159302, "step": 5482 }, { "epoch": 0.998270683535087, "grad_norm": 14.3125, "learning_rate": 8.04253252253632e-06, "loss": 1.2628525495529175, "step": 5484 }, { "epoch": 0.9986347501592792, "grad_norm": 7.8125, "learning_rate": 8.041214905426155e-06, "loss": 1.3635976314544678, "step": 5486 }, { "epoch": 0.9989988167834714, "grad_norm": 5.09375, "learning_rate": 8.039896988775979e-06, "loss": 1.1852967739105225, "step": 5488 }, { "epoch": 0.9993628834076635, "grad_norm": 16.375, "learning_rate": 8.038578772779186e-06, "loss": 1.8710843324661255, "step": 5490 }, { "epoch": 0.9997269500318559, "grad_norm": 22.75, "learning_rate": 8.03726025762922e-06, "loss": 1.5698821544647217, "step": 5492 }, { "epoch": 1.0, "grad_norm": 21.875, "learning_rate": 8.035941443519568e-06, "loss": 1.4250874519348145, "step": 5494 }, { "epoch": 1.0003640666241922, "grad_norm": 6.125, "learning_rate": 8.034622330643759e-06, "loss": 1.3944168090820312, "step": 5496 }, { "epoch": 1.0007281332483844, "grad_norm": 36.5, "learning_rate": 8.033302919195369e-06, "loss": 0.8998700976371765, "step": 5498 }, { "epoch": 1.0010921998725766, "grad_norm": 12.1875, "learning_rate": 8.031983209368015e-06, "loss": 1.4408015012741089, "step": 5500 }, { "epoch": 1.001456266496769, "grad_norm": 9.3125, "learning_rate": 8.030663201355359e-06, "loss": 1.031071424484253, "step": 5502 }, { "epoch": 1.0018203331209612, "grad_norm": 111.5, "learning_rate": 8.029342895351111e-06, "loss": 1.486803650856018, "step": 5504 }, { "epoch": 1.0021843997451534, "grad_norm": 25.75, "learning_rate": 8.028022291549015e-06, "loss": 0.2996535897254944, "step": 5506 }, { "epoch": 1.0025484663693456, "grad_norm": 10.6875, "learning_rate": 8.026701390142867e-06, "loss": 1.3124223947525024, "step": 5508 }, { "epoch": 1.0029125329935378, "grad_norm": 6.40625, "learning_rate": 8.025380191326506e-06, "loss": 1.3028819561004639, "step": 5510 }, { "epoch": 1.00327659961773, "grad_norm": 12.875, "learning_rate": 8.024058695293807e-06, "loss": 1.241189956665039, "step": 5512 }, { "epoch": 1.0036406662419222, "grad_norm": 16.75, "learning_rate": 8.0227369022387e-06, "loss": 1.8382549285888672, "step": 5514 }, { "epoch": 1.0040047328661146, "grad_norm": 84.5, "learning_rate": 8.021414812355146e-06, "loss": 1.1665067672729492, "step": 5516 }, { "epoch": 1.0043687994903068, "grad_norm": 13.8125, "learning_rate": 8.020092425837162e-06, "loss": 1.4099467992782593, "step": 5518 }, { "epoch": 1.004732866114499, "grad_norm": 5.1875, "learning_rate": 8.018769742878802e-06, "loss": 1.3823487758636475, "step": 5520 }, { "epoch": 1.0050969327386912, "grad_norm": 16.625, "learning_rate": 8.017446763674165e-06, "loss": 1.4213688373565674, "step": 5522 }, { "epoch": 1.0054609993628834, "grad_norm": 9.4375, "learning_rate": 8.016123488417389e-06, "loss": 1.474638819694519, "step": 5524 }, { "epoch": 1.0058250659870756, "grad_norm": 14.1875, "learning_rate": 8.014799917302662e-06, "loss": 1.3464412689208984, "step": 5526 }, { "epoch": 1.006189132611268, "grad_norm": 14.6875, "learning_rate": 8.013476050524212e-06, "loss": 1.8093068599700928, "step": 5528 }, { "epoch": 1.0065531992354602, "grad_norm": 7.8125, "learning_rate": 8.012151888276313e-06, "loss": 1.0855789184570312, "step": 5530 }, { "epoch": 1.0069172658596524, "grad_norm": 9.375, "learning_rate": 8.01082743075328e-06, "loss": 1.3014432191848755, "step": 5532 }, { "epoch": 1.0072813324838445, "grad_norm": 10.8125, "learning_rate": 8.009502678149467e-06, "loss": 1.377088189125061, "step": 5534 }, { "epoch": 1.0076453991080367, "grad_norm": 9.0, "learning_rate": 8.008177630659282e-06, "loss": 1.4790308475494385, "step": 5536 }, { "epoch": 1.008009465732229, "grad_norm": 8.1875, "learning_rate": 8.006852288477167e-06, "loss": 1.417969822883606, "step": 5538 }, { "epoch": 1.0083735323564211, "grad_norm": 19.75, "learning_rate": 8.005526651797615e-06, "loss": 1.2785357236862183, "step": 5540 }, { "epoch": 1.0087375989806135, "grad_norm": 10.875, "learning_rate": 8.004200720815152e-06, "loss": 1.297044277191162, "step": 5542 }, { "epoch": 1.0091016656048057, "grad_norm": 31.5, "learning_rate": 8.002874495724355e-06, "loss": 1.5670397281646729, "step": 5544 }, { "epoch": 1.009465732228998, "grad_norm": 5.28125, "learning_rate": 8.001547976719844e-06, "loss": 1.1834278106689453, "step": 5546 }, { "epoch": 1.0098297988531901, "grad_norm": 2.828125, "learning_rate": 8.000221163996277e-06, "loss": 1.164804458618164, "step": 5548 }, { "epoch": 1.0101938654773823, "grad_norm": 3.53125, "learning_rate": 7.998894057748361e-06, "loss": 0.9811059832572937, "step": 5550 }, { "epoch": 1.0105579321015745, "grad_norm": 11.4375, "learning_rate": 7.997566658170843e-06, "loss": 1.6221890449523926, "step": 5552 }, { "epoch": 1.0109219987257667, "grad_norm": 25.75, "learning_rate": 7.996238965458516e-06, "loss": 1.3792487382888794, "step": 5554 }, { "epoch": 1.0112860653499591, "grad_norm": 12.375, "learning_rate": 7.994910979806208e-06, "loss": 1.555490493774414, "step": 5556 }, { "epoch": 1.0116501319741513, "grad_norm": 6.6875, "learning_rate": 7.9935827014088e-06, "loss": 1.3472093343734741, "step": 5558 }, { "epoch": 1.0120141985983435, "grad_norm": 9.625, "learning_rate": 7.992254130461208e-06, "loss": 1.4074777364730835, "step": 5560 }, { "epoch": 1.0123782652225357, "grad_norm": 21.875, "learning_rate": 7.990925267158398e-06, "loss": 1.1080771684646606, "step": 5562 }, { "epoch": 1.012742331846728, "grad_norm": 4.34375, "learning_rate": 7.989596111695373e-06, "loss": 0.7663303017616272, "step": 5564 }, { "epoch": 1.01310639847092, "grad_norm": 18.875, "learning_rate": 7.988266664267181e-06, "loss": 1.478060245513916, "step": 5566 }, { "epoch": 1.0134704650951125, "grad_norm": 9.8125, "learning_rate": 7.986936925068913e-06, "loss": 1.3367947340011597, "step": 5568 }, { "epoch": 1.0138345317193047, "grad_norm": 9.8125, "learning_rate": 7.985606894295705e-06, "loss": 1.3023066520690918, "step": 5570 }, { "epoch": 1.014198598343497, "grad_norm": 26.0, "learning_rate": 7.984276572142733e-06, "loss": 1.348774790763855, "step": 5572 }, { "epoch": 1.014562664967689, "grad_norm": 5.71875, "learning_rate": 7.982945958805215e-06, "loss": 1.1589783430099487, "step": 5574 }, { "epoch": 1.0149267315918813, "grad_norm": 35.0, "learning_rate": 7.981615054478412e-06, "loss": 1.486038327217102, "step": 5576 }, { "epoch": 1.0152907982160735, "grad_norm": 5.21875, "learning_rate": 7.980283859357633e-06, "loss": 1.1041972637176514, "step": 5578 }, { "epoch": 1.0156548648402657, "grad_norm": 9.5, "learning_rate": 7.978952373638222e-06, "loss": 1.3358068466186523, "step": 5580 }, { "epoch": 1.016018931464458, "grad_norm": 11.9375, "learning_rate": 7.97762059751557e-06, "loss": 1.4987894296646118, "step": 5582 }, { "epoch": 1.0163829980886503, "grad_norm": 4.40625, "learning_rate": 7.976288531185112e-06, "loss": 1.5133428573608398, "step": 5584 }, { "epoch": 1.0167470647128425, "grad_norm": 7.71875, "learning_rate": 7.974956174842319e-06, "loss": 1.4151054620742798, "step": 5586 }, { "epoch": 1.0171111313370347, "grad_norm": 11.625, "learning_rate": 7.97362352868271e-06, "loss": 1.5820132493972778, "step": 5588 }, { "epoch": 1.0174751979612269, "grad_norm": 4.34375, "learning_rate": 7.972290592901847e-06, "loss": 0.930628776550293, "step": 5590 }, { "epoch": 1.017839264585419, "grad_norm": 21.375, "learning_rate": 7.970957367695335e-06, "loss": 2.016417980194092, "step": 5592 }, { "epoch": 1.0182033312096113, "grad_norm": 9.6875, "learning_rate": 7.969623853258816e-06, "loss": 1.2663755416870117, "step": 5594 }, { "epoch": 1.0185673978338037, "grad_norm": 10.6875, "learning_rate": 7.968290049787979e-06, "loss": 1.3435295820236206, "step": 5596 }, { "epoch": 1.0189314644579959, "grad_norm": 12.8125, "learning_rate": 7.966955957478553e-06, "loss": 1.3806339502334595, "step": 5598 }, { "epoch": 1.019295531082188, "grad_norm": 32.0, "learning_rate": 7.96562157652631e-06, "loss": 1.3372187614440918, "step": 5600 }, { "epoch": 1.0196595977063803, "grad_norm": 17.125, "learning_rate": 7.96428690712707e-06, "loss": 1.2605324983596802, "step": 5602 }, { "epoch": 1.0200236643305725, "grad_norm": 28.625, "learning_rate": 7.962951949476685e-06, "loss": 1.2971853017807007, "step": 5604 }, { "epoch": 1.0203877309547646, "grad_norm": 6.46875, "learning_rate": 7.961616703771055e-06, "loss": 1.1381902694702148, "step": 5606 }, { "epoch": 1.0207517975789568, "grad_norm": 16.875, "learning_rate": 7.960281170206128e-06, "loss": 1.4832932949066162, "step": 5608 }, { "epoch": 1.0211158642031493, "grad_norm": 16.625, "learning_rate": 7.95894534897788e-06, "loss": 1.4112111330032349, "step": 5610 }, { "epoch": 1.0214799308273415, "grad_norm": 32.25, "learning_rate": 7.957609240282342e-06, "loss": 1.7598168849945068, "step": 5612 }, { "epoch": 1.0218439974515336, "grad_norm": 3.828125, "learning_rate": 7.95627284431558e-06, "loss": 1.01741623878479, "step": 5614 }, { "epoch": 1.0222080640757258, "grad_norm": 7.90625, "learning_rate": 7.954936161273707e-06, "loss": 1.0698068141937256, "step": 5616 }, { "epoch": 1.022572130699918, "grad_norm": 7.59375, "learning_rate": 7.953599191352876e-06, "loss": 1.4497947692871094, "step": 5618 }, { "epoch": 1.0229361973241102, "grad_norm": 6.71875, "learning_rate": 7.95226193474928e-06, "loss": 1.1009907722473145, "step": 5620 }, { "epoch": 1.0233002639483026, "grad_norm": 11.625, "learning_rate": 7.950924391659159e-06, "loss": 1.5086032152175903, "step": 5622 }, { "epoch": 1.0236643305724948, "grad_norm": 16.125, "learning_rate": 7.949586562278788e-06, "loss": 1.419341802597046, "step": 5624 }, { "epoch": 1.024028397196687, "grad_norm": 10.625, "learning_rate": 7.948248446804492e-06, "loss": 1.4290971755981445, "step": 5626 }, { "epoch": 1.0243924638208792, "grad_norm": 18.875, "learning_rate": 7.946910045432627e-06, "loss": 1.3870618343353271, "step": 5628 }, { "epoch": 1.0247565304450714, "grad_norm": 7.78125, "learning_rate": 7.945571358359607e-06, "loss": 1.3220620155334473, "step": 5630 }, { "epoch": 1.0251205970692636, "grad_norm": 7.90625, "learning_rate": 7.944232385781874e-06, "loss": 1.4463415145874023, "step": 5632 }, { "epoch": 1.0254846636934558, "grad_norm": 11.25, "learning_rate": 7.94289312789592e-06, "loss": 1.3423161506652832, "step": 5634 }, { "epoch": 1.0258487303176482, "grad_norm": 4.4375, "learning_rate": 7.94155358489827e-06, "loss": 0.9303983449935913, "step": 5636 }, { "epoch": 1.0262127969418404, "grad_norm": 11.6875, "learning_rate": 7.940213756985503e-06, "loss": 1.551761269569397, "step": 5638 }, { "epoch": 1.0265768635660326, "grad_norm": 23.5, "learning_rate": 7.938873644354225e-06, "loss": 0.8018133640289307, "step": 5640 }, { "epoch": 1.0269409301902248, "grad_norm": 8.625, "learning_rate": 7.9375332472011e-06, "loss": 0.8830986022949219, "step": 5642 }, { "epoch": 1.027304996814417, "grad_norm": 24.125, "learning_rate": 7.936192565722824e-06, "loss": 0.9009889364242554, "step": 5644 }, { "epoch": 1.0276690634386092, "grad_norm": 24.125, "learning_rate": 7.934851600116136e-06, "loss": 1.7313756942749023, "step": 5646 }, { "epoch": 1.0280331300628014, "grad_norm": 12.375, "learning_rate": 7.933510350577816e-06, "loss": 1.3145555257797241, "step": 5648 }, { "epoch": 1.0283971966869938, "grad_norm": 7.28125, "learning_rate": 7.932168817304689e-06, "loss": 1.5072616338729858, "step": 5650 }, { "epoch": 1.028761263311186, "grad_norm": 7.84375, "learning_rate": 7.930827000493614e-06, "loss": 1.4100126028060913, "step": 5652 }, { "epoch": 1.0291253299353782, "grad_norm": 7.5, "learning_rate": 7.929484900341507e-06, "loss": 1.5347495079040527, "step": 5654 }, { "epoch": 1.0294893965595704, "grad_norm": 12.1875, "learning_rate": 7.928142517045307e-06, "loss": 1.370579481124878, "step": 5656 }, { "epoch": 1.0298534631837626, "grad_norm": 14.125, "learning_rate": 7.926799850802009e-06, "loss": 1.1086434125900269, "step": 5658 }, { "epoch": 1.0302175298079548, "grad_norm": 14.4375, "learning_rate": 7.925456901808642e-06, "loss": 1.4568206071853638, "step": 5660 }, { "epoch": 1.030581596432147, "grad_norm": 8.25, "learning_rate": 7.924113670262277e-06, "loss": 0.6029893159866333, "step": 5662 }, { "epoch": 1.0309456630563394, "grad_norm": 8.8125, "learning_rate": 7.922770156360027e-06, "loss": 1.4496461153030396, "step": 5664 }, { "epoch": 1.0313097296805316, "grad_norm": 9.0, "learning_rate": 7.921426360299052e-06, "loss": 1.3289098739624023, "step": 5666 }, { "epoch": 1.0316737963047238, "grad_norm": 7.40625, "learning_rate": 7.920082282276542e-06, "loss": 1.0863322019577026, "step": 5668 }, { "epoch": 1.032037862928916, "grad_norm": 17.125, "learning_rate": 7.918737922489741e-06, "loss": 1.4817003011703491, "step": 5670 }, { "epoch": 1.0324019295531082, "grad_norm": 10.3125, "learning_rate": 7.917393281135923e-06, "loss": 1.361006736755371, "step": 5672 }, { "epoch": 1.0327659961773004, "grad_norm": 10.1875, "learning_rate": 7.916048358412413e-06, "loss": 1.1761387586593628, "step": 5674 }, { "epoch": 1.0331300628014928, "grad_norm": 24.625, "learning_rate": 7.914703154516572e-06, "loss": 1.8891961574554443, "step": 5676 }, { "epoch": 1.033494129425685, "grad_norm": 2.453125, "learning_rate": 7.913357669645803e-06, "loss": 1.049350380897522, "step": 5678 }, { "epoch": 1.0338581960498772, "grad_norm": 10.0, "learning_rate": 7.91201190399755e-06, "loss": 1.3955374956130981, "step": 5680 }, { "epoch": 1.0342222626740694, "grad_norm": 2.78125, "learning_rate": 7.910665857769298e-06, "loss": 1.0531591176986694, "step": 5682 }, { "epoch": 1.0345863292982616, "grad_norm": 12.0625, "learning_rate": 7.909319531158577e-06, "loss": 1.6384118795394897, "step": 5684 }, { "epoch": 1.0349503959224537, "grad_norm": 13.125, "learning_rate": 7.90797292436295e-06, "loss": 1.4781283140182495, "step": 5686 }, { "epoch": 1.035314462546646, "grad_norm": 17.0, "learning_rate": 7.906626037580027e-06, "loss": 0.19681823253631592, "step": 5688 }, { "epoch": 1.0356785291708384, "grad_norm": 8.1875, "learning_rate": 7.905278871007465e-06, "loss": 1.3952281475067139, "step": 5690 }, { "epoch": 1.0360425957950306, "grad_norm": 6.90625, "learning_rate": 7.903931424842946e-06, "loss": 1.3457505702972412, "step": 5692 }, { "epoch": 1.0364066624192227, "grad_norm": 13.75, "learning_rate": 7.902583699284207e-06, "loss": 1.4581931829452515, "step": 5694 }, { "epoch": 1.036770729043415, "grad_norm": 6.78125, "learning_rate": 7.901235694529021e-06, "loss": 0.7864412069320679, "step": 5696 }, { "epoch": 1.0371347956676071, "grad_norm": 10.1875, "learning_rate": 7.899887410775202e-06, "loss": 0.9320163130760193, "step": 5698 }, { "epoch": 1.0374988622917993, "grad_norm": 17.875, "learning_rate": 7.898538848220607e-06, "loss": 1.3165302276611328, "step": 5700 }, { "epoch": 1.0378629289159915, "grad_norm": 10.4375, "learning_rate": 7.897190007063129e-06, "loss": 1.2565268278121948, "step": 5702 }, { "epoch": 1.038226995540184, "grad_norm": 4.3125, "learning_rate": 7.895840887500703e-06, "loss": 1.0381525754928589, "step": 5704 }, { "epoch": 1.0385910621643761, "grad_norm": 17.625, "learning_rate": 7.894491489731313e-06, "loss": 1.5520533323287964, "step": 5706 }, { "epoch": 1.0389551287885683, "grad_norm": 7.375, "learning_rate": 7.893141813952973e-06, "loss": 0.8928835391998291, "step": 5708 }, { "epoch": 1.0393191954127605, "grad_norm": 3.734375, "learning_rate": 7.891791860363747e-06, "loss": 1.226557731628418, "step": 5710 }, { "epoch": 1.0396832620369527, "grad_norm": 20.125, "learning_rate": 7.89044162916173e-06, "loss": 1.661374568939209, "step": 5712 }, { "epoch": 1.040047328661145, "grad_norm": 12.375, "learning_rate": 7.889091120545064e-06, "loss": 1.4480359554290771, "step": 5714 }, { "epoch": 1.040411395285337, "grad_norm": 4.5, "learning_rate": 7.887740334711935e-06, "loss": 1.0804306268692017, "step": 5716 }, { "epoch": 1.0407754619095295, "grad_norm": 14.375, "learning_rate": 7.88638927186056e-06, "loss": 1.383379578590393, "step": 5718 }, { "epoch": 1.0411395285337217, "grad_norm": 16.5, "learning_rate": 7.885037932189206e-06, "loss": 1.3997722864151, "step": 5720 }, { "epoch": 1.041503595157914, "grad_norm": 8.5625, "learning_rate": 7.883686315896173e-06, "loss": 1.4699680805206299, "step": 5722 }, { "epoch": 1.041867661782106, "grad_norm": 13.75, "learning_rate": 7.882334423179807e-06, "loss": 1.2019716501235962, "step": 5724 }, { "epoch": 1.0422317284062983, "grad_norm": 9.0625, "learning_rate": 7.880982254238495e-06, "loss": 1.3388502597808838, "step": 5726 }, { "epoch": 1.0425957950304905, "grad_norm": 10.5, "learning_rate": 7.879629809270657e-06, "loss": 1.1779283285140991, "step": 5728 }, { "epoch": 1.042959861654683, "grad_norm": 19.625, "learning_rate": 7.878277088474764e-06, "loss": 1.9388954639434814, "step": 5730 }, { "epoch": 1.043323928278875, "grad_norm": 2.96875, "learning_rate": 7.87692409204932e-06, "loss": 0.8207886815071106, "step": 5732 }, { "epoch": 1.0436879949030673, "grad_norm": 31.625, "learning_rate": 7.875570820192873e-06, "loss": 1.4753812551498413, "step": 5734 }, { "epoch": 1.0440520615272595, "grad_norm": 5.75, "learning_rate": 7.874217273104008e-06, "loss": 0.8696913719177246, "step": 5736 }, { "epoch": 1.0444161281514517, "grad_norm": 16.75, "learning_rate": 7.872863450981352e-06, "loss": 1.227979302406311, "step": 5738 }, { "epoch": 1.0447801947756439, "grad_norm": 11.3125, "learning_rate": 7.871509354023577e-06, "loss": 1.7013131380081177, "step": 5740 }, { "epoch": 1.045144261399836, "grad_norm": 14.125, "learning_rate": 7.870154982429387e-06, "loss": 1.4951039552688599, "step": 5742 }, { "epoch": 1.0455083280240285, "grad_norm": 3.703125, "learning_rate": 7.868800336397536e-06, "loss": 1.0460208654403687, "step": 5744 }, { "epoch": 1.0458723946482207, "grad_norm": 5.90625, "learning_rate": 7.867445416126804e-06, "loss": 1.1345962285995483, "step": 5746 }, { "epoch": 1.0462364612724129, "grad_norm": 16.125, "learning_rate": 7.86609022181603e-06, "loss": 2.0216028690338135, "step": 5748 }, { "epoch": 1.046600527896605, "grad_norm": 15.125, "learning_rate": 7.864734753664076e-06, "loss": 1.5517833232879639, "step": 5750 }, { "epoch": 1.0469645945207973, "grad_norm": 15.25, "learning_rate": 7.863379011869856e-06, "loss": 0.74480140209198, "step": 5752 }, { "epoch": 1.0473286611449895, "grad_norm": 18.25, "learning_rate": 7.862022996632315e-06, "loss": 1.3665461540222168, "step": 5754 }, { "epoch": 1.0476927277691817, "grad_norm": 12.5625, "learning_rate": 7.860666708150447e-06, "loss": 2.0109081268310547, "step": 5756 }, { "epoch": 1.048056794393374, "grad_norm": 4.09375, "learning_rate": 7.85931014662328e-06, "loss": 1.1212888956069946, "step": 5758 }, { "epoch": 1.0484208610175663, "grad_norm": 36.0, "learning_rate": 7.857953312249885e-06, "loss": 0.11577693372964859, "step": 5760 }, { "epoch": 1.0487849276417585, "grad_norm": 10.5625, "learning_rate": 7.85659620522937e-06, "loss": 1.418005347251892, "step": 5762 }, { "epoch": 1.0491489942659507, "grad_norm": 45.5, "learning_rate": 7.855238825760885e-06, "loss": 0.9027985334396362, "step": 5764 }, { "epoch": 1.0495130608901428, "grad_norm": 90.5, "learning_rate": 7.853881174043623e-06, "loss": 1.3457250595092773, "step": 5766 }, { "epoch": 1.049877127514335, "grad_norm": 3.359375, "learning_rate": 7.852523250276809e-06, "loss": 0.7009027004241943, "step": 5768 }, { "epoch": 1.0502411941385275, "grad_norm": 2.65625, "learning_rate": 7.85116505465972e-06, "loss": 0.751906156539917, "step": 5770 }, { "epoch": 1.0506052607627197, "grad_norm": 6.0, "learning_rate": 7.849806587391657e-06, "loss": 1.1912050247192383, "step": 5772 }, { "epoch": 1.0509693273869118, "grad_norm": 20.375, "learning_rate": 7.848447848671976e-06, "loss": 0.5299519896507263, "step": 5774 }, { "epoch": 1.051333394011104, "grad_norm": 26.125, "learning_rate": 7.847088838700066e-06, "loss": 1.519481897354126, "step": 5776 }, { "epoch": 1.0516974606352962, "grad_norm": 28.25, "learning_rate": 7.84572955767535e-06, "loss": 1.52915358543396, "step": 5778 }, { "epoch": 1.0520615272594884, "grad_norm": 14.875, "learning_rate": 7.844370005797304e-06, "loss": 1.8322023153305054, "step": 5780 }, { "epoch": 1.0524255938836806, "grad_norm": 2.5, "learning_rate": 7.843010183265436e-06, "loss": 1.0097967386245728, "step": 5782 }, { "epoch": 1.052789660507873, "grad_norm": 34.25, "learning_rate": 7.841650090279292e-06, "loss": 2.2194175720214844, "step": 5784 }, { "epoch": 1.0531537271320652, "grad_norm": 35.0, "learning_rate": 7.840289727038457e-06, "loss": 1.9527595043182373, "step": 5786 }, { "epoch": 1.0535177937562574, "grad_norm": 20.625, "learning_rate": 7.838929093742566e-06, "loss": 1.8302507400512695, "step": 5788 }, { "epoch": 1.0538818603804496, "grad_norm": 4.59375, "learning_rate": 7.837568190591283e-06, "loss": 1.1075001955032349, "step": 5790 }, { "epoch": 1.0542459270046418, "grad_norm": 11.5, "learning_rate": 7.836207017784314e-06, "loss": 1.4602190256118774, "step": 5792 }, { "epoch": 1.054609993628834, "grad_norm": 15.0625, "learning_rate": 7.834845575521408e-06, "loss": 1.7976443767547607, "step": 5794 }, { "epoch": 1.0549740602530262, "grad_norm": 30.25, "learning_rate": 7.833483864002347e-06, "loss": 1.040452480316162, "step": 5796 }, { "epoch": 1.0553381268772186, "grad_norm": 5.0625, "learning_rate": 7.832121883426961e-06, "loss": 1.0199024677276611, "step": 5798 }, { "epoch": 1.0557021935014108, "grad_norm": 8.875, "learning_rate": 7.830759633995116e-06, "loss": 1.1002187728881836, "step": 5800 }, { "epoch": 1.056066260125603, "grad_norm": 10.875, "learning_rate": 7.82939711590671e-06, "loss": 1.657292366027832, "step": 5802 }, { "epoch": 1.0564303267497952, "grad_norm": 13.125, "learning_rate": 7.828034329361694e-06, "loss": 1.3729122877120972, "step": 5804 }, { "epoch": 1.0567943933739874, "grad_norm": 33.5, "learning_rate": 7.826671274560048e-06, "loss": 0.6674918532371521, "step": 5806 }, { "epoch": 1.0571584599981796, "grad_norm": 13.6875, "learning_rate": 7.825307951701795e-06, "loss": 1.3292901515960693, "step": 5808 }, { "epoch": 1.057522526622372, "grad_norm": 16.0, "learning_rate": 7.823944360986997e-06, "loss": 1.1440261602401733, "step": 5810 }, { "epoch": 1.0578865932465642, "grad_norm": 54.5, "learning_rate": 7.822580502615755e-06, "loss": 2.157811403274536, "step": 5812 }, { "epoch": 1.0582506598707564, "grad_norm": 17.125, "learning_rate": 7.821216376788211e-06, "loss": 1.4427753686904907, "step": 5814 }, { "epoch": 1.0586147264949486, "grad_norm": 8.9375, "learning_rate": 7.819851983704548e-06, "loss": 0.21290744841098785, "step": 5816 }, { "epoch": 1.0589787931191408, "grad_norm": 10.5, "learning_rate": 7.818487323564976e-06, "loss": 1.7550071477890015, "step": 5818 }, { "epoch": 1.059342859743333, "grad_norm": 46.75, "learning_rate": 7.817122396569762e-06, "loss": 1.3410418033599854, "step": 5820 }, { "epoch": 1.0597069263675252, "grad_norm": 15.8125, "learning_rate": 7.8157572029192e-06, "loss": 1.7078181505203247, "step": 5822 }, { "epoch": 1.0600709929917176, "grad_norm": 22.5, "learning_rate": 7.814391742813627e-06, "loss": 1.946914792060852, "step": 5824 }, { "epoch": 1.0604350596159098, "grad_norm": 11.0625, "learning_rate": 7.81302601645342e-06, "loss": 1.6175501346588135, "step": 5826 }, { "epoch": 1.060799126240102, "grad_norm": 9.3125, "learning_rate": 7.811660024038992e-06, "loss": 1.3057560920715332, "step": 5828 }, { "epoch": 1.0611631928642942, "grad_norm": 16.125, "learning_rate": 7.810293765770798e-06, "loss": 1.3452696800231934, "step": 5830 }, { "epoch": 1.0615272594884864, "grad_norm": 18.375, "learning_rate": 7.80892724184933e-06, "loss": 1.3865573406219482, "step": 5832 }, { "epoch": 1.0618913261126786, "grad_norm": 9.6875, "learning_rate": 7.807560452475117e-06, "loss": 1.3722175359725952, "step": 5834 }, { "epoch": 1.0622553927368708, "grad_norm": 4.21875, "learning_rate": 7.806193397848735e-06, "loss": 1.2886347770690918, "step": 5836 }, { "epoch": 1.0626194593610632, "grad_norm": 18.875, "learning_rate": 7.804826078170795e-06, "loss": 1.3787620067596436, "step": 5838 }, { "epoch": 1.0629835259852554, "grad_norm": 10.125, "learning_rate": 7.80345849364194e-06, "loss": 1.409644365310669, "step": 5840 }, { "epoch": 1.0633475926094476, "grad_norm": 18.5, "learning_rate": 7.802090644462858e-06, "loss": 1.8887813091278076, "step": 5842 }, { "epoch": 1.0637116592336398, "grad_norm": 11.375, "learning_rate": 7.800722530834279e-06, "loss": 1.4753693342208862, "step": 5844 }, { "epoch": 1.064075725857832, "grad_norm": 12.875, "learning_rate": 7.799354152956967e-06, "loss": 1.5493706464767456, "step": 5846 }, { "epoch": 1.0644397924820241, "grad_norm": 12.375, "learning_rate": 7.797985511031724e-06, "loss": 1.069637656211853, "step": 5848 }, { "epoch": 1.0648038591062163, "grad_norm": 6.4375, "learning_rate": 7.796616605259395e-06, "loss": 1.4298814535140991, "step": 5850 }, { "epoch": 1.0651679257304087, "grad_norm": 10.375, "learning_rate": 7.79524743584086e-06, "loss": 1.6018315553665161, "step": 5852 }, { "epoch": 1.065531992354601, "grad_norm": 13.25, "learning_rate": 7.793878002977038e-06, "loss": 1.3345156908035278, "step": 5854 }, { "epoch": 1.0658960589787931, "grad_norm": 9.625, "learning_rate": 7.792508306868889e-06, "loss": 1.5287530422210693, "step": 5856 }, { "epoch": 1.0662601256029853, "grad_norm": 6.34375, "learning_rate": 7.79113834771741e-06, "loss": 1.1813935041427612, "step": 5858 }, { "epoch": 1.0666241922271775, "grad_norm": 4.59375, "learning_rate": 7.78976812572364e-06, "loss": 1.168440818786621, "step": 5860 }, { "epoch": 1.0669882588513697, "grad_norm": 30.25, "learning_rate": 7.78839764108865e-06, "loss": 1.3632657527923584, "step": 5862 }, { "epoch": 1.0673523254755621, "grad_norm": 3.46875, "learning_rate": 7.787026894013551e-06, "loss": 0.8462130427360535, "step": 5864 }, { "epoch": 1.0677163920997543, "grad_norm": 14.0, "learning_rate": 7.7856558846995e-06, "loss": 1.4883511066436768, "step": 5866 }, { "epoch": 1.0680804587239465, "grad_norm": 17.875, "learning_rate": 7.784284613347684e-06, "loss": 1.332727074623108, "step": 5868 }, { "epoch": 1.0684445253481387, "grad_norm": 13.25, "learning_rate": 7.782913080159334e-06, "loss": 1.416504979133606, "step": 5870 }, { "epoch": 1.068808591972331, "grad_norm": 11.25, "learning_rate": 7.781541285335712e-06, "loss": 1.5033235549926758, "step": 5872 }, { "epoch": 1.069172658596523, "grad_norm": 12.0, "learning_rate": 7.780169229078127e-06, "loss": 1.7850062847137451, "step": 5874 }, { "epoch": 1.0695367252207153, "grad_norm": 21.0, "learning_rate": 7.778796911587923e-06, "loss": 1.450186014175415, "step": 5876 }, { "epoch": 1.0699007918449077, "grad_norm": 27.125, "learning_rate": 7.77742433306648e-06, "loss": 1.6427778005599976, "step": 5878 }, { "epoch": 1.0702648584691, "grad_norm": 4.125, "learning_rate": 7.776051493715223e-06, "loss": 1.117126703262329, "step": 5880 }, { "epoch": 1.070628925093292, "grad_norm": 26.5, "learning_rate": 7.774678393735602e-06, "loss": 0.5835259556770325, "step": 5882 }, { "epoch": 1.0709929917174843, "grad_norm": 14.5625, "learning_rate": 7.773305033329121e-06, "loss": 1.3859399557113647, "step": 5884 }, { "epoch": 1.0713570583416765, "grad_norm": 6.3125, "learning_rate": 7.771931412697314e-06, "loss": 1.3661203384399414, "step": 5886 }, { "epoch": 1.0717211249658687, "grad_norm": 18.125, "learning_rate": 7.770557532041752e-06, "loss": 1.1463693380355835, "step": 5888 }, { "epoch": 1.0720851915900609, "grad_norm": 10.1875, "learning_rate": 7.769183391564047e-06, "loss": 1.524759292602539, "step": 5890 }, { "epoch": 1.0724492582142533, "grad_norm": 16.375, "learning_rate": 7.76780899146585e-06, "loss": 1.4006747007369995, "step": 5892 }, { "epoch": 1.0728133248384455, "grad_norm": 5.78125, "learning_rate": 7.766434331948846e-06, "loss": 0.8311281204223633, "step": 5894 }, { "epoch": 1.0731773914626377, "grad_norm": 20.0, "learning_rate": 7.765059413214767e-06, "loss": 1.905245304107666, "step": 5896 }, { "epoch": 1.0735414580868299, "grad_norm": 16.25, "learning_rate": 7.763684235465367e-06, "loss": 1.507002353668213, "step": 5898 }, { "epoch": 1.073905524711022, "grad_norm": 10.25, "learning_rate": 7.762308798902455e-06, "loss": 1.3457531929016113, "step": 5900 }, { "epoch": 1.0742695913352143, "grad_norm": 7.59375, "learning_rate": 7.76093310372787e-06, "loss": 1.5727198123931885, "step": 5902 }, { "epoch": 1.0746336579594065, "grad_norm": 10.3125, "learning_rate": 7.759557150143488e-06, "loss": 1.3533425331115723, "step": 5904 }, { "epoch": 1.0749977245835989, "grad_norm": 10.1875, "learning_rate": 7.758180938351225e-06, "loss": 1.562125563621521, "step": 5906 }, { "epoch": 1.075361791207791, "grad_norm": 8.0625, "learning_rate": 7.756804468553033e-06, "loss": 1.0933337211608887, "step": 5908 }, { "epoch": 1.0757258578319833, "grad_norm": 45.0, "learning_rate": 7.755427740950908e-06, "loss": 1.2925703525543213, "step": 5910 }, { "epoch": 1.0760899244561755, "grad_norm": 37.0, "learning_rate": 7.754050755746874e-06, "loss": 2.092103958129883, "step": 5912 }, { "epoch": 1.0764539910803677, "grad_norm": 15.3125, "learning_rate": 7.752673513143e-06, "loss": 1.3334004878997803, "step": 5914 }, { "epoch": 1.0768180577045599, "grad_norm": 7.8125, "learning_rate": 7.751296013341391e-06, "loss": 1.2754844427108765, "step": 5916 }, { "epoch": 1.0771821243287523, "grad_norm": 9.125, "learning_rate": 7.749918256544192e-06, "loss": 1.4102667570114136, "step": 5918 }, { "epoch": 1.0775461909529445, "grad_norm": 18.0, "learning_rate": 7.748540242953577e-06, "loss": 1.1046556234359741, "step": 5920 }, { "epoch": 1.0779102575771367, "grad_norm": 7.96875, "learning_rate": 7.747161972771769e-06, "loss": 1.364701271057129, "step": 5922 }, { "epoch": 1.0782743242013288, "grad_norm": 15.5, "learning_rate": 7.745783446201024e-06, "loss": 1.830952763557434, "step": 5924 }, { "epoch": 1.078638390825521, "grad_norm": 10.125, "learning_rate": 7.74440466344363e-06, "loss": 1.3364646434783936, "step": 5926 }, { "epoch": 1.0790024574497132, "grad_norm": 30.375, "learning_rate": 7.743025624701924e-06, "loss": 1.4164223670959473, "step": 5928 }, { "epoch": 1.0793665240739054, "grad_norm": 13.875, "learning_rate": 7.741646330178269e-06, "loss": 1.4897363185882568, "step": 5930 }, { "epoch": 1.0797305906980978, "grad_norm": 9.1875, "learning_rate": 7.740266780075074e-06, "loss": 1.3814654350280762, "step": 5932 }, { "epoch": 1.08009465732229, "grad_norm": 12.0, "learning_rate": 7.738886974594784e-06, "loss": 1.5612090826034546, "step": 5934 }, { "epoch": 1.0804587239464822, "grad_norm": 3.9375, "learning_rate": 7.737506913939875e-06, "loss": 1.2420921325683594, "step": 5936 }, { "epoch": 1.0808227905706744, "grad_norm": 14.25, "learning_rate": 7.736126598312867e-06, "loss": 2.12004017829895, "step": 5938 }, { "epoch": 1.0811868571948666, "grad_norm": 10.5625, "learning_rate": 7.734746027916319e-06, "loss": 1.4516373872756958, "step": 5940 }, { "epoch": 1.0815509238190588, "grad_norm": 4.0625, "learning_rate": 7.73336520295282e-06, "loss": 0.9513915777206421, "step": 5942 }, { "epoch": 1.081914990443251, "grad_norm": 21.0, "learning_rate": 7.731984123625002e-06, "loss": 0.9905973672866821, "step": 5944 }, { "epoch": 1.0822790570674434, "grad_norm": 13.9375, "learning_rate": 7.730602790135536e-06, "loss": 1.3266266584396362, "step": 5946 }, { "epoch": 1.0826431236916356, "grad_norm": 27.625, "learning_rate": 7.729221202687123e-06, "loss": 1.8169238567352295, "step": 5948 }, { "epoch": 1.0830071903158278, "grad_norm": 15.875, "learning_rate": 7.727839361482505e-06, "loss": 1.5460846424102783, "step": 5950 }, { "epoch": 1.08337125694002, "grad_norm": 16.0, "learning_rate": 7.726457266724463e-06, "loss": 0.6210437417030334, "step": 5952 }, { "epoch": 1.0837353235642122, "grad_norm": 7.65625, "learning_rate": 7.725074918615816e-06, "loss": 1.2871100902557373, "step": 5954 }, { "epoch": 1.0840993901884044, "grad_norm": 25.875, "learning_rate": 7.723692317359413e-06, "loss": 2.0566678047180176, "step": 5956 }, { "epoch": 1.0844634568125966, "grad_norm": 14.125, "learning_rate": 7.72230946315815e-06, "loss": 1.345217227935791, "step": 5958 }, { "epoch": 1.084827523436789, "grad_norm": 46.75, "learning_rate": 7.720926356214951e-06, "loss": 1.9433022737503052, "step": 5960 }, { "epoch": 1.0851915900609812, "grad_norm": 19.0, "learning_rate": 7.719542996732784e-06, "loss": 1.4428740739822388, "step": 5962 }, { "epoch": 1.0855556566851734, "grad_norm": 12.0, "learning_rate": 7.718159384914654e-06, "loss": 1.2566156387329102, "step": 5964 }, { "epoch": 1.0859197233093656, "grad_norm": 18.0, "learning_rate": 7.716775520963595e-06, "loss": 0.7551980018615723, "step": 5966 }, { "epoch": 1.0862837899335578, "grad_norm": 10.875, "learning_rate": 7.715391405082686e-06, "loss": 1.1641606092453003, "step": 5968 }, { "epoch": 1.08664785655775, "grad_norm": 16.625, "learning_rate": 7.71400703747504e-06, "loss": 0.7905727028846741, "step": 5970 }, { "epoch": 1.0870119231819424, "grad_norm": 6.53125, "learning_rate": 7.712622418343808e-06, "loss": 1.0725716352462769, "step": 5972 }, { "epoch": 1.0873759898061346, "grad_norm": 19.25, "learning_rate": 7.711237547892174e-06, "loss": 1.3367869853973389, "step": 5974 }, { "epoch": 1.0877400564303268, "grad_norm": 10.625, "learning_rate": 7.709852426323367e-06, "loss": 1.3810782432556152, "step": 5976 }, { "epoch": 1.088104123054519, "grad_norm": 11.1875, "learning_rate": 7.708467053840647e-06, "loss": 1.3132565021514893, "step": 5978 }, { "epoch": 1.0884681896787112, "grad_norm": 29.25, "learning_rate": 7.70708143064731e-06, "loss": 1.1596884727478027, "step": 5980 }, { "epoch": 1.0888322563029034, "grad_norm": 15.0625, "learning_rate": 7.70569555694669e-06, "loss": 1.4741616249084473, "step": 5982 }, { "epoch": 1.0891963229270956, "grad_norm": 14.0, "learning_rate": 7.704309432942161e-06, "loss": 1.375129222869873, "step": 5984 }, { "epoch": 1.089560389551288, "grad_norm": 31.25, "learning_rate": 7.702923058837131e-06, "loss": 2.0185065269470215, "step": 5986 }, { "epoch": 1.0899244561754802, "grad_norm": 17.5, "learning_rate": 7.701536434835042e-06, "loss": 1.766796588897705, "step": 5988 }, { "epoch": 1.0902885227996724, "grad_norm": 19.5, "learning_rate": 7.700149561139377e-06, "loss": 1.9443023204803467, "step": 5990 }, { "epoch": 1.0906525894238646, "grad_norm": 11.6875, "learning_rate": 7.698762437953653e-06, "loss": 0.7731174230575562, "step": 5992 }, { "epoch": 1.0910166560480568, "grad_norm": 11.625, "learning_rate": 7.697375065481425e-06, "loss": 1.0690741539001465, "step": 5994 }, { "epoch": 1.091380722672249, "grad_norm": 20.75, "learning_rate": 7.695987443926286e-06, "loss": 1.2377794981002808, "step": 5996 }, { "epoch": 1.0917447892964414, "grad_norm": 4.6875, "learning_rate": 7.694599573491863e-06, "loss": 0.9404780864715576, "step": 5998 }, { "epoch": 1.0921088559206336, "grad_norm": 17.375, "learning_rate": 7.693211454381822e-06, "loss": 1.5230134725570679, "step": 6000 }, { "epoch": 1.0924729225448258, "grad_norm": 19.75, "learning_rate": 7.691823086799862e-06, "loss": 0.7093430161476135, "step": 6002 }, { "epoch": 1.092836989169018, "grad_norm": 15.0, "learning_rate": 7.690434470949717e-06, "loss": 1.7909890413284302, "step": 6004 }, { "epoch": 1.0932010557932101, "grad_norm": 12.75, "learning_rate": 7.689045607035166e-06, "loss": 1.454171895980835, "step": 6006 }, { "epoch": 1.0935651224174023, "grad_norm": 20.5, "learning_rate": 7.68765649526002e-06, "loss": 1.4441622495651245, "step": 6008 }, { "epoch": 1.0939291890415945, "grad_norm": 18.625, "learning_rate": 7.686267135828119e-06, "loss": 0.9444335699081421, "step": 6010 }, { "epoch": 1.0942932556657867, "grad_norm": 11.0, "learning_rate": 7.684877528943348e-06, "loss": 1.5426393747329712, "step": 6012 }, { "epoch": 1.0946573222899791, "grad_norm": 12.6875, "learning_rate": 7.68348767480963e-06, "loss": 1.4582114219665527, "step": 6014 }, { "epoch": 1.0950213889141713, "grad_norm": 24.5, "learning_rate": 7.68209757363092e-06, "loss": 1.5973546504974365, "step": 6016 }, { "epoch": 1.0953854555383635, "grad_norm": 128.0, "learning_rate": 7.680707225611208e-06, "loss": 2.097869873046875, "step": 6018 }, { "epoch": 1.0957495221625557, "grad_norm": 25.0, "learning_rate": 7.67931663095452e-06, "loss": 1.0489354133605957, "step": 6020 }, { "epoch": 1.096113588786748, "grad_norm": 10.3125, "learning_rate": 7.677925789864923e-06, "loss": 1.5369552373886108, "step": 6022 }, { "epoch": 1.09647765541094, "grad_norm": 19.625, "learning_rate": 7.676534702546516e-06, "loss": 1.3601114749908447, "step": 6024 }, { "epoch": 1.0968417220351325, "grad_norm": 10.1875, "learning_rate": 7.675143369203437e-06, "loss": 1.0489931106567383, "step": 6026 }, { "epoch": 1.0972057886593247, "grad_norm": 3.546875, "learning_rate": 7.673751790039856e-06, "loss": 1.155547022819519, "step": 6028 }, { "epoch": 1.097569855283517, "grad_norm": 23.75, "learning_rate": 7.672359965259984e-06, "loss": 1.2132055759429932, "step": 6030 }, { "epoch": 1.097933921907709, "grad_norm": 12.5, "learning_rate": 7.670967895068065e-06, "loss": 1.3862146139144897, "step": 6032 }, { "epoch": 1.0982979885319013, "grad_norm": 4.375, "learning_rate": 7.669575579668375e-06, "loss": 1.0918357372283936, "step": 6034 }, { "epoch": 1.0986620551560935, "grad_norm": 8.375, "learning_rate": 7.668183019265238e-06, "loss": 1.2735404968261719, "step": 6036 }, { "epoch": 1.0990261217802857, "grad_norm": 15.0, "learning_rate": 7.666790214063005e-06, "loss": 1.5066826343536377, "step": 6038 }, { "epoch": 1.099390188404478, "grad_norm": 13.75, "learning_rate": 7.665397164266061e-06, "loss": 1.4150091409683228, "step": 6040 }, { "epoch": 1.0997542550286703, "grad_norm": 2.6875, "learning_rate": 7.664003870078833e-06, "loss": 1.1389790773391724, "step": 6042 }, { "epoch": 1.1001183216528625, "grad_norm": 13.5625, "learning_rate": 7.662610331705782e-06, "loss": 1.240532398223877, "step": 6044 }, { "epoch": 1.1004823882770547, "grad_norm": 124.0, "learning_rate": 7.6612165493514e-06, "loss": 0.4803674519062042, "step": 6046 }, { "epoch": 1.1008464549012469, "grad_norm": 10.25, "learning_rate": 7.659822523220225e-06, "loss": 1.2698731422424316, "step": 6048 }, { "epoch": 1.101210521525439, "grad_norm": 12.0, "learning_rate": 7.658428253516818e-06, "loss": 1.7715986967086792, "step": 6050 }, { "epoch": 1.1015745881496315, "grad_norm": 76.5, "learning_rate": 7.657033740445787e-06, "loss": 1.7982748746871948, "step": 6052 }, { "epoch": 1.1019386547738237, "grad_norm": 10.6875, "learning_rate": 7.65563898421177e-06, "loss": 1.2796062231063843, "step": 6054 }, { "epoch": 1.1023027213980159, "grad_norm": 41.75, "learning_rate": 7.654243985019442e-06, "loss": 1.7099835872650146, "step": 6056 }, { "epoch": 1.102666788022208, "grad_norm": 26.25, "learning_rate": 7.652848743073512e-06, "loss": 0.9476800560951233, "step": 6058 }, { "epoch": 1.1030308546464003, "grad_norm": 12.5625, "learning_rate": 7.651453258578731e-06, "loss": 1.2216453552246094, "step": 6060 }, { "epoch": 1.1033949212705925, "grad_norm": 9.75, "learning_rate": 7.650057531739873e-06, "loss": 1.2758268117904663, "step": 6062 }, { "epoch": 1.1037589878947847, "grad_norm": 27.375, "learning_rate": 7.648661562761763e-06, "loss": 0.8147430419921875, "step": 6064 }, { "epoch": 1.104123054518977, "grad_norm": 21.5, "learning_rate": 7.647265351849246e-06, "loss": 1.325575828552246, "step": 6066 }, { "epoch": 1.1044871211431693, "grad_norm": 9.0, "learning_rate": 7.645868899207219e-06, "loss": 1.3994100093841553, "step": 6068 }, { "epoch": 1.1048511877673615, "grad_norm": 42.0, "learning_rate": 7.644472205040598e-06, "loss": 1.3932067155838013, "step": 6070 }, { "epoch": 1.1052152543915537, "grad_norm": 10.0625, "learning_rate": 7.643075269554345e-06, "loss": 1.6284995079040527, "step": 6072 }, { "epoch": 1.1055793210157459, "grad_norm": 33.25, "learning_rate": 7.641678092953456e-06, "loss": 2.114983558654785, "step": 6074 }, { "epoch": 1.105943387639938, "grad_norm": 17.75, "learning_rate": 7.640280675442962e-06, "loss": 1.3591622114181519, "step": 6076 }, { "epoch": 1.1063074542641302, "grad_norm": 9.6875, "learning_rate": 7.638883017227924e-06, "loss": 1.5362677574157715, "step": 6078 }, { "epoch": 1.1066715208883227, "grad_norm": 26.125, "learning_rate": 7.637485118513447e-06, "loss": 1.373355507850647, "step": 6080 }, { "epoch": 1.1070355875125149, "grad_norm": 23.375, "learning_rate": 7.636086979504663e-06, "loss": 1.2211226224899292, "step": 6082 }, { "epoch": 1.107399654136707, "grad_norm": 9.0, "learning_rate": 7.634688600406745e-06, "loss": 1.0737813711166382, "step": 6084 }, { "epoch": 1.1077637207608992, "grad_norm": 14.75, "learning_rate": 7.6332899814249e-06, "loss": 1.4608807563781738, "step": 6086 }, { "epoch": 1.1081277873850914, "grad_norm": 4.21875, "learning_rate": 7.63189112276437e-06, "loss": 1.29204523563385, "step": 6088 }, { "epoch": 1.1084918540092836, "grad_norm": 16.375, "learning_rate": 7.630492024630431e-06, "loss": 1.6481047868728638, "step": 6090 }, { "epoch": 1.1088559206334758, "grad_norm": 15.8125, "learning_rate": 7.629092687228395e-06, "loss": 1.7501280307769775, "step": 6092 }, { "epoch": 1.1092199872576682, "grad_norm": 8.25, "learning_rate": 7.62769311076361e-06, "loss": 1.2036296129226685, "step": 6094 }, { "epoch": 1.1095840538818604, "grad_norm": 9.125, "learning_rate": 7.626293295441456e-06, "loss": 1.2800333499908447, "step": 6096 }, { "epoch": 1.1099481205060526, "grad_norm": 46.75, "learning_rate": 7.624893241467353e-06, "loss": 1.3343054056167603, "step": 6098 }, { "epoch": 1.1103121871302448, "grad_norm": 20.0, "learning_rate": 7.623492949046752e-06, "loss": 1.3392642736434937, "step": 6100 }, { "epoch": 1.110676253754437, "grad_norm": 11.75, "learning_rate": 7.622092418385139e-06, "loss": 1.4595372676849365, "step": 6102 }, { "epoch": 1.1110403203786292, "grad_norm": 9.0625, "learning_rate": 7.620691649688039e-06, "loss": 1.3964455127716064, "step": 6104 }, { "epoch": 1.1114043870028216, "grad_norm": 11.1875, "learning_rate": 7.619290643161006e-06, "loss": 1.163963794708252, "step": 6106 }, { "epoch": 1.1117684536270138, "grad_norm": 8.625, "learning_rate": 7.617889399009635e-06, "loss": 1.1876957416534424, "step": 6108 }, { "epoch": 1.112132520251206, "grad_norm": 9.375, "learning_rate": 7.61648791743955e-06, "loss": 1.3723902702331543, "step": 6110 }, { "epoch": 1.1124965868753982, "grad_norm": 3.6875, "learning_rate": 7.615086198656414e-06, "loss": 1.1908726692199707, "step": 6112 }, { "epoch": 1.1128606534995904, "grad_norm": 8.5, "learning_rate": 7.613684242865924e-06, "loss": 1.3020076751708984, "step": 6114 }, { "epoch": 1.1132247201237826, "grad_norm": 14.8125, "learning_rate": 7.612282050273812e-06, "loss": 1.5242159366607666, "step": 6116 }, { "epoch": 1.1135887867479748, "grad_norm": 17.0, "learning_rate": 7.6108796210858425e-06, "loss": 1.308373212814331, "step": 6118 }, { "epoch": 1.1139528533721672, "grad_norm": 21.25, "learning_rate": 7.6094769555078175e-06, "loss": 1.6335344314575195, "step": 6120 }, { "epoch": 1.1143169199963594, "grad_norm": 15.8125, "learning_rate": 7.608074053745571e-06, "loss": 1.460789680480957, "step": 6122 }, { "epoch": 1.1146809866205516, "grad_norm": 13.875, "learning_rate": 7.606670916004975e-06, "loss": 1.5344470739364624, "step": 6124 }, { "epoch": 1.1150450532447438, "grad_norm": 15.8125, "learning_rate": 7.605267542491932e-06, "loss": 1.3683898448944092, "step": 6126 }, { "epoch": 1.115409119868936, "grad_norm": 24.25, "learning_rate": 7.603863933412385e-06, "loss": 1.812874436378479, "step": 6128 }, { "epoch": 1.1157731864931282, "grad_norm": 8.5, "learning_rate": 7.602460088972303e-06, "loss": 1.38902747631073, "step": 6130 }, { "epoch": 1.1161372531173204, "grad_norm": 10.9375, "learning_rate": 7.601056009377699e-06, "loss": 1.4262350797653198, "step": 6132 }, { "epoch": 1.1165013197415128, "grad_norm": 18.875, "learning_rate": 7.5996516948346135e-06, "loss": 1.5712898969650269, "step": 6134 }, { "epoch": 1.116865386365705, "grad_norm": 13.3125, "learning_rate": 7.5982471455491244e-06, "loss": 1.4469237327575684, "step": 6136 }, { "epoch": 1.1172294529898972, "grad_norm": 9.4375, "learning_rate": 7.596842361727346e-06, "loss": 1.220457673072815, "step": 6138 }, { "epoch": 1.1175935196140894, "grad_norm": 14.375, "learning_rate": 7.595437343575421e-06, "loss": 1.317972183227539, "step": 6140 }, { "epoch": 1.1179575862382816, "grad_norm": 10.3125, "learning_rate": 7.5940320912995304e-06, "loss": 1.4042589664459229, "step": 6142 }, { "epoch": 1.1183216528624738, "grad_norm": 11.0625, "learning_rate": 7.592626605105891e-06, "loss": 0.9241873025894165, "step": 6144 }, { "epoch": 1.118685719486666, "grad_norm": 10.6875, "learning_rate": 7.591220885200752e-06, "loss": 1.404044270515442, "step": 6146 }, { "epoch": 1.1190497861108584, "grad_norm": 12.3125, "learning_rate": 7.589814931790396e-06, "loss": 2.0191550254821777, "step": 6148 }, { "epoch": 1.1194138527350506, "grad_norm": 11.125, "learning_rate": 7.5884087450811414e-06, "loss": 0.9703912734985352, "step": 6150 }, { "epoch": 1.1197779193592428, "grad_norm": 12.0, "learning_rate": 7.587002325279342e-06, "loss": 0.9269189834594727, "step": 6152 }, { "epoch": 1.120141985983435, "grad_norm": 4.8125, "learning_rate": 7.585595672591382e-06, "loss": 0.8698933720588684, "step": 6154 }, { "epoch": 1.1205060526076271, "grad_norm": 4.75, "learning_rate": 7.584188787223683e-06, "loss": 0.9515825510025024, "step": 6156 }, { "epoch": 1.1208701192318193, "grad_norm": 12.5, "learning_rate": 7.5827816693827e-06, "loss": 1.3375169038772583, "step": 6158 }, { "epoch": 1.1212341858560118, "grad_norm": 15.5625, "learning_rate": 7.581374319274924e-06, "loss": 1.3456029891967773, "step": 6160 }, { "epoch": 1.121598252480204, "grad_norm": 28.75, "learning_rate": 7.579966737106872e-06, "loss": 1.425205111503601, "step": 6162 }, { "epoch": 1.1219623191043961, "grad_norm": 10.375, "learning_rate": 7.578558923085104e-06, "loss": 1.4412906169891357, "step": 6164 }, { "epoch": 1.1223263857285883, "grad_norm": 28.5, "learning_rate": 7.577150877416213e-06, "loss": 2.089081287384033, "step": 6166 }, { "epoch": 1.1226904523527805, "grad_norm": 7.21875, "learning_rate": 7.575742600306821e-06, "loss": 1.1414821147918701, "step": 6168 }, { "epoch": 1.1230545189769727, "grad_norm": 8.875, "learning_rate": 7.574334091963591e-06, "loss": 1.154106616973877, "step": 6170 }, { "epoch": 1.123418585601165, "grad_norm": 33.25, "learning_rate": 7.572925352593212e-06, "loss": 1.2359026670455933, "step": 6172 }, { "epoch": 1.1237826522253573, "grad_norm": 8.625, "learning_rate": 7.571516382402411e-06, "loss": 1.3775465488433838, "step": 6174 }, { "epoch": 1.1241467188495495, "grad_norm": 13.25, "learning_rate": 7.57010718159795e-06, "loss": 1.2567527294158936, "step": 6176 }, { "epoch": 1.1245107854737417, "grad_norm": 2.28125, "learning_rate": 7.5686977503866245e-06, "loss": 0.9441748857498169, "step": 6178 }, { "epoch": 1.124874852097934, "grad_norm": 9.625, "learning_rate": 7.56728808897526e-06, "loss": 1.390716791152954, "step": 6180 }, { "epoch": 1.1252389187221261, "grad_norm": 10.0625, "learning_rate": 7.565878197570719e-06, "loss": 1.4376145601272583, "step": 6182 }, { "epoch": 1.1256029853463183, "grad_norm": 12.125, "learning_rate": 7.5644680763799e-06, "loss": 1.4083622694015503, "step": 6184 }, { "epoch": 1.1259670519705107, "grad_norm": 11.8125, "learning_rate": 7.563057725609733e-06, "loss": 1.1680452823638916, "step": 6186 }, { "epoch": 1.126331118594703, "grad_norm": 3.796875, "learning_rate": 7.561647145467177e-06, "loss": 1.1869986057281494, "step": 6188 }, { "epoch": 1.1266951852188951, "grad_norm": 20.625, "learning_rate": 7.56023633615923e-06, "loss": 1.40728759765625, "step": 6190 }, { "epoch": 1.1270592518430873, "grad_norm": 5.75, "learning_rate": 7.558825297892927e-06, "loss": 1.409633755683899, "step": 6192 }, { "epoch": 1.1274233184672795, "grad_norm": 37.75, "learning_rate": 7.557414030875325e-06, "loss": 1.965872049331665, "step": 6194 }, { "epoch": 1.1277873850914717, "grad_norm": 29.5, "learning_rate": 7.556002535313529e-06, "loss": 1.337764859199524, "step": 6196 }, { "epoch": 1.1281514517156639, "grad_norm": 25.375, "learning_rate": 7.554590811414666e-06, "loss": 1.0945656299591064, "step": 6198 }, { "epoch": 1.128515518339856, "grad_norm": 7.4375, "learning_rate": 7.553178859385901e-06, "loss": 1.225588083267212, "step": 6200 }, { "epoch": 1.1288795849640485, "grad_norm": 25.125, "learning_rate": 7.551766679434433e-06, "loss": 1.7256288528442383, "step": 6202 }, { "epoch": 1.1292436515882407, "grad_norm": 4.625, "learning_rate": 7.550354271767495e-06, "loss": 0.9670466184616089, "step": 6204 }, { "epoch": 1.1296077182124329, "grad_norm": 7.875, "learning_rate": 7.5489416365923485e-06, "loss": 1.3122233152389526, "step": 6206 }, { "epoch": 1.129971784836625, "grad_norm": 12.75, "learning_rate": 7.547528774116295e-06, "loss": 0.5349314212799072, "step": 6208 }, { "epoch": 1.1303358514608173, "grad_norm": 18.625, "learning_rate": 7.546115684546667e-06, "loss": 1.282565951347351, "step": 6210 }, { "epoch": 1.1306999180850095, "grad_norm": 8.875, "learning_rate": 7.544702368090826e-06, "loss": 0.9437564611434937, "step": 6212 }, { "epoch": 1.1310639847092019, "grad_norm": 24.25, "learning_rate": 7.543288824956172e-06, "loss": 1.8608406782150269, "step": 6214 }, { "epoch": 1.131428051333394, "grad_norm": 12.1875, "learning_rate": 7.541875055350138e-06, "loss": 1.468324899673462, "step": 6216 }, { "epoch": 1.1317921179575863, "grad_norm": 20.125, "learning_rate": 7.540461059480191e-06, "loss": 1.348197102546692, "step": 6218 }, { "epoch": 1.1321561845817785, "grad_norm": 17.875, "learning_rate": 7.539046837553823e-06, "loss": 1.6343202590942383, "step": 6220 }, { "epoch": 1.1325202512059707, "grad_norm": 10.4375, "learning_rate": 7.537632389778571e-06, "loss": 1.754242181777954, "step": 6222 }, { "epoch": 1.1328843178301629, "grad_norm": 8.375, "learning_rate": 7.536217716361995e-06, "loss": 0.8625420928001404, "step": 6224 }, { "epoch": 1.133248384454355, "grad_norm": 4.5, "learning_rate": 7.5348028175116975e-06, "loss": 1.041172742843628, "step": 6226 }, { "epoch": 1.1336124510785475, "grad_norm": 23.25, "learning_rate": 7.533387693435305e-06, "loss": 1.2522087097167969, "step": 6228 }, { "epoch": 1.1339765177027397, "grad_norm": 7.5625, "learning_rate": 7.531972344340483e-06, "loss": 1.4327716827392578, "step": 6230 }, { "epoch": 1.1343405843269319, "grad_norm": 15.0625, "learning_rate": 7.5305567704349294e-06, "loss": 1.3205615282058716, "step": 6232 }, { "epoch": 1.134704650951124, "grad_norm": 13.125, "learning_rate": 7.5291409719263696e-06, "loss": 1.954979419708252, "step": 6234 }, { "epoch": 1.1350687175753162, "grad_norm": 8.3125, "learning_rate": 7.527724949022572e-06, "loss": 1.3674852848052979, "step": 6236 }, { "epoch": 1.1354327841995084, "grad_norm": 6.40625, "learning_rate": 7.526308701931328e-06, "loss": 1.3474839925765991, "step": 6238 }, { "epoch": 1.1357968508237009, "grad_norm": 14.1875, "learning_rate": 7.52489223086047e-06, "loss": 0.5992215275764465, "step": 6240 }, { "epoch": 1.136160917447893, "grad_norm": 24.875, "learning_rate": 7.523475536017855e-06, "loss": 1.400850772857666, "step": 6242 }, { "epoch": 1.1365249840720852, "grad_norm": 9.875, "learning_rate": 7.522058617611379e-06, "loss": 1.1672260761260986, "step": 6244 }, { "epoch": 1.1368890506962774, "grad_norm": 9.375, "learning_rate": 7.520641475848969e-06, "loss": 1.4604942798614502, "step": 6246 }, { "epoch": 1.1372531173204696, "grad_norm": 10.0625, "learning_rate": 7.519224110938583e-06, "loss": 1.3994964361190796, "step": 6248 }, { "epoch": 1.1376171839446618, "grad_norm": 7.9375, "learning_rate": 7.5178065230882205e-06, "loss": 1.3514472246170044, "step": 6250 }, { "epoch": 1.137981250568854, "grad_norm": 9.5625, "learning_rate": 7.5163887125058975e-06, "loss": 1.2155338525772095, "step": 6252 }, { "epoch": 1.1383453171930462, "grad_norm": 8.875, "learning_rate": 7.514970679399678e-06, "loss": 1.4023786783218384, "step": 6254 }, { "epoch": 1.1387093838172386, "grad_norm": 38.75, "learning_rate": 7.5135524239776525e-06, "loss": 1.0800318717956543, "step": 6256 }, { "epoch": 1.1390734504414308, "grad_norm": 8.5625, "learning_rate": 7.5121339464479395e-06, "loss": 1.371211290359497, "step": 6258 }, { "epoch": 1.139437517065623, "grad_norm": 10.125, "learning_rate": 7.510715247018701e-06, "loss": 1.4171255826950073, "step": 6260 }, { "epoch": 1.1398015836898152, "grad_norm": 11.125, "learning_rate": 7.509296325898121e-06, "loss": 1.3335071802139282, "step": 6262 }, { "epoch": 1.1401656503140074, "grad_norm": 9.625, "learning_rate": 7.507877183294423e-06, "loss": 1.3642916679382324, "step": 6264 }, { "epoch": 1.1405297169381996, "grad_norm": 10.8125, "learning_rate": 7.506457819415858e-06, "loss": 1.3994640111923218, "step": 6266 }, { "epoch": 1.140893783562392, "grad_norm": 15.8125, "learning_rate": 7.505038234470712e-06, "loss": 1.3100051879882812, "step": 6268 }, { "epoch": 1.1412578501865842, "grad_norm": 19.125, "learning_rate": 7.503618428667308e-06, "loss": 1.3815580606460571, "step": 6270 }, { "epoch": 1.1416219168107764, "grad_norm": 15.375, "learning_rate": 7.502198402213989e-06, "loss": 1.344904899597168, "step": 6272 }, { "epoch": 1.1419859834349686, "grad_norm": 13.5, "learning_rate": 7.500778155319146e-06, "loss": 1.5381231307983398, "step": 6274 }, { "epoch": 1.1423500500591608, "grad_norm": 10.875, "learning_rate": 7.499357688191189e-06, "loss": 1.9140883684158325, "step": 6276 }, { "epoch": 1.142714116683353, "grad_norm": 14.125, "learning_rate": 7.497937001038567e-06, "loss": 1.6217155456542969, "step": 6278 }, { "epoch": 1.1430781833075452, "grad_norm": 17.5, "learning_rate": 7.496516094069761e-06, "loss": 1.1441524028778076, "step": 6280 }, { "epoch": 1.1434422499317376, "grad_norm": 47.25, "learning_rate": 7.495094967493286e-06, "loss": 1.0068614482879639, "step": 6282 }, { "epoch": 1.1438063165559298, "grad_norm": 7.9375, "learning_rate": 7.493673621517681e-06, "loss": 1.4224953651428223, "step": 6284 }, { "epoch": 1.144170383180122, "grad_norm": 20.0, "learning_rate": 7.492252056351525e-06, "loss": 1.3754998445510864, "step": 6286 }, { "epoch": 1.1445344498043142, "grad_norm": 10.625, "learning_rate": 7.4908302722034286e-06, "loss": 1.2591030597686768, "step": 6288 }, { "epoch": 1.1448985164285064, "grad_norm": 8.625, "learning_rate": 7.48940826928203e-06, "loss": 1.1596477031707764, "step": 6290 }, { "epoch": 1.1452625830526986, "grad_norm": 14.1875, "learning_rate": 7.487986047796005e-06, "loss": 2.2149932384490967, "step": 6292 }, { "epoch": 1.145626649676891, "grad_norm": 5.25, "learning_rate": 7.486563607954058e-06, "loss": 1.5302165746688843, "step": 6294 }, { "epoch": 1.1459907163010832, "grad_norm": 5.75, "learning_rate": 7.485140949964926e-06, "loss": 1.1800148487091064, "step": 6296 }, { "epoch": 1.1463547829252754, "grad_norm": 3.203125, "learning_rate": 7.483718074037376e-06, "loss": 1.3644418716430664, "step": 6298 }, { "epoch": 1.1467188495494676, "grad_norm": 17.125, "learning_rate": 7.482294980380216e-06, "loss": 0.949133038520813, "step": 6300 }, { "epoch": 1.1470829161736598, "grad_norm": 39.75, "learning_rate": 7.480871669202272e-06, "loss": 1.9215071201324463, "step": 6302 }, { "epoch": 1.147446982797852, "grad_norm": 13.5, "learning_rate": 7.479448140712413e-06, "loss": 1.995676040649414, "step": 6304 }, { "epoch": 1.1478110494220441, "grad_norm": 34.5, "learning_rate": 7.478024395119534e-06, "loss": 0.7667779922485352, "step": 6306 }, { "epoch": 1.1481751160462363, "grad_norm": 46.75, "learning_rate": 7.476600432632564e-06, "loss": 1.5224629640579224, "step": 6308 }, { "epoch": 1.1485391826704288, "grad_norm": 7.625, "learning_rate": 7.475176253460466e-06, "loss": 1.2831366062164307, "step": 6310 }, { "epoch": 1.148903249294621, "grad_norm": 11.8125, "learning_rate": 7.473751857812232e-06, "loss": 1.342350721359253, "step": 6312 }, { "epoch": 1.1492673159188131, "grad_norm": 37.5, "learning_rate": 7.472327245896884e-06, "loss": 1.3044123649597168, "step": 6314 }, { "epoch": 1.1496313825430053, "grad_norm": 156.0, "learning_rate": 7.4709024179234824e-06, "loss": 1.45254647731781, "step": 6316 }, { "epoch": 1.1499954491671975, "grad_norm": 21.5, "learning_rate": 7.469477374101108e-06, "loss": 1.532165288925171, "step": 6318 }, { "epoch": 1.1503595157913897, "grad_norm": 8.6875, "learning_rate": 7.468052114638886e-06, "loss": 1.1146246194839478, "step": 6320 }, { "epoch": 1.1507235824155821, "grad_norm": 11.625, "learning_rate": 7.466626639745966e-06, "loss": 0.7958694100379944, "step": 6322 }, { "epoch": 1.1510876490397743, "grad_norm": 17.625, "learning_rate": 7.46520094963153e-06, "loss": 1.5166926383972168, "step": 6324 }, { "epoch": 1.1514517156639665, "grad_norm": 6.21875, "learning_rate": 7.463775044504793e-06, "loss": 1.0953150987625122, "step": 6326 }, { "epoch": 1.1518157822881587, "grad_norm": 13.25, "learning_rate": 7.462348924575e-06, "loss": 1.311816930770874, "step": 6328 }, { "epoch": 1.152179848912351, "grad_norm": 13.8125, "learning_rate": 7.460922590051427e-06, "loss": 0.9509832262992859, "step": 6330 }, { "epoch": 1.1525439155365431, "grad_norm": 18.5, "learning_rate": 7.459496041143388e-06, "loss": 1.926865816116333, "step": 6332 }, { "epoch": 1.1529079821607353, "grad_norm": 8.5625, "learning_rate": 7.4580692780602185e-06, "loss": 1.442535638809204, "step": 6334 }, { "epoch": 1.1532720487849277, "grad_norm": 8.125, "learning_rate": 7.45664230101129e-06, "loss": 1.306257724761963, "step": 6336 }, { "epoch": 1.15363611540912, "grad_norm": 10.0625, "learning_rate": 7.455215110206006e-06, "loss": 1.2971405982971191, "step": 6338 }, { "epoch": 1.1540001820333121, "grad_norm": 12.75, "learning_rate": 7.453787705853805e-06, "loss": 1.462487816810608, "step": 6340 }, { "epoch": 1.1543642486575043, "grad_norm": 13.1875, "learning_rate": 7.45236008816415e-06, "loss": 1.573169469833374, "step": 6342 }, { "epoch": 1.1547283152816965, "grad_norm": 17.125, "learning_rate": 7.450932257346537e-06, "loss": 2.021141529083252, "step": 6344 }, { "epoch": 1.1550923819058887, "grad_norm": 13.5625, "learning_rate": 7.449504213610494e-06, "loss": 1.3924634456634521, "step": 6346 }, { "epoch": 1.1554564485300811, "grad_norm": 17.0, "learning_rate": 7.448075957165584e-06, "loss": 1.3201172351837158, "step": 6348 }, { "epoch": 1.1558205151542733, "grad_norm": 14.875, "learning_rate": 7.446647488221394e-06, "loss": 1.371399998664856, "step": 6350 }, { "epoch": 1.1561845817784655, "grad_norm": 11.9375, "learning_rate": 7.445218806987551e-06, "loss": 1.3123196363449097, "step": 6352 }, { "epoch": 1.1565486484026577, "grad_norm": 3.734375, "learning_rate": 7.443789913673702e-06, "loss": 1.211283564567566, "step": 6354 }, { "epoch": 1.15691271502685, "grad_norm": 10.0, "learning_rate": 7.442360808489535e-06, "loss": 1.22006094455719, "step": 6356 }, { "epoch": 1.157276781651042, "grad_norm": 31.375, "learning_rate": 7.440931491644765e-06, "loss": 0.45160406827926636, "step": 6358 }, { "epoch": 1.1576408482752343, "grad_norm": 9.5625, "learning_rate": 7.439501963349139e-06, "loss": 1.3940057754516602, "step": 6360 }, { "epoch": 1.1580049148994265, "grad_norm": 5.0, "learning_rate": 7.438072223812434e-06, "loss": 1.384699821472168, "step": 6362 }, { "epoch": 1.158368981523619, "grad_norm": 4.25, "learning_rate": 7.436642273244457e-06, "loss": 1.4117671251296997, "step": 6364 }, { "epoch": 1.158733048147811, "grad_norm": 11.4375, "learning_rate": 7.435212111855048e-06, "loss": 1.1795032024383545, "step": 6366 }, { "epoch": 1.1590971147720033, "grad_norm": 18.0, "learning_rate": 7.4337817398540775e-06, "loss": 1.5126287937164307, "step": 6368 }, { "epoch": 1.1594611813961955, "grad_norm": 9.125, "learning_rate": 7.432351157451447e-06, "loss": 1.2650301456451416, "step": 6370 }, { "epoch": 1.1598252480203877, "grad_norm": 11.0, "learning_rate": 7.430920364857092e-06, "loss": 1.3520147800445557, "step": 6372 }, { "epoch": 1.1601893146445799, "grad_norm": 18.75, "learning_rate": 7.429489362280971e-06, "loss": 1.5229713916778564, "step": 6374 }, { "epoch": 1.1605533812687723, "grad_norm": 11.375, "learning_rate": 7.428058149933079e-06, "loss": 1.7363439798355103, "step": 6376 }, { "epoch": 1.1609174478929645, "grad_norm": 12.4375, "learning_rate": 7.4266267280234405e-06, "loss": 1.5316307544708252, "step": 6378 }, { "epoch": 1.1612815145171567, "grad_norm": 30.0, "learning_rate": 7.4251950967621125e-06, "loss": 1.9637465476989746, "step": 6380 }, { "epoch": 1.1616455811413489, "grad_norm": 34.75, "learning_rate": 7.42376325635918e-06, "loss": 1.2630438804626465, "step": 6382 }, { "epoch": 1.162009647765541, "grad_norm": 3.296875, "learning_rate": 7.422331207024757e-06, "loss": 1.0641576051712036, "step": 6384 }, { "epoch": 1.1623737143897332, "grad_norm": 6.65625, "learning_rate": 7.420898948968995e-06, "loss": 1.1182897090911865, "step": 6386 }, { "epoch": 1.1627377810139254, "grad_norm": 17.875, "learning_rate": 7.419466482402071e-06, "loss": 0.9243950843811035, "step": 6388 }, { "epoch": 1.1631018476381179, "grad_norm": 9.75, "learning_rate": 7.418033807534193e-06, "loss": 1.0089322328567505, "step": 6390 }, { "epoch": 1.16346591426231, "grad_norm": 8.5, "learning_rate": 7.416600924575604e-06, "loss": 1.3400102853775024, "step": 6392 }, { "epoch": 1.1638299808865022, "grad_norm": 7.0, "learning_rate": 7.415167833736565e-06, "loss": 1.4119009971618652, "step": 6394 }, { "epoch": 1.1641940475106944, "grad_norm": 14.0625, "learning_rate": 7.413734535227386e-06, "loss": 1.4507344961166382, "step": 6396 }, { "epoch": 1.1645581141348866, "grad_norm": 25.125, "learning_rate": 7.412301029258389e-06, "loss": 1.4036744832992554, "step": 6398 }, { "epoch": 1.1649221807590788, "grad_norm": 9.25, "learning_rate": 7.410867316039943e-06, "loss": 1.3667032718658447, "step": 6400 }, { "epoch": 1.1652862473832712, "grad_norm": 4.0625, "learning_rate": 7.409433395782433e-06, "loss": 1.1058387756347656, "step": 6402 }, { "epoch": 1.1656503140074634, "grad_norm": 8.375, "learning_rate": 7.407999268696287e-06, "loss": 1.0475088357925415, "step": 6404 }, { "epoch": 1.1660143806316556, "grad_norm": 10.625, "learning_rate": 7.406564934991953e-06, "loss": 0.9343958497047424, "step": 6406 }, { "epoch": 1.1663784472558478, "grad_norm": 8.125, "learning_rate": 7.4051303948799135e-06, "loss": 1.419680118560791, "step": 6408 }, { "epoch": 1.16674251388004, "grad_norm": 47.0, "learning_rate": 7.403695648570685e-06, "loss": 1.5038814544677734, "step": 6410 }, { "epoch": 1.1671065805042322, "grad_norm": 17.625, "learning_rate": 7.40226069627481e-06, "loss": 1.248150110244751, "step": 6412 }, { "epoch": 1.1674706471284244, "grad_norm": 9.25, "learning_rate": 7.400825538202861e-06, "loss": 1.5459160804748535, "step": 6414 }, { "epoch": 1.1678347137526166, "grad_norm": 21.25, "learning_rate": 7.399390174565438e-06, "loss": 1.4511265754699707, "step": 6416 }, { "epoch": 1.168198780376809, "grad_norm": 12.1875, "learning_rate": 7.3979546055731784e-06, "loss": 1.095632553100586, "step": 6418 }, { "epoch": 1.1685628470010012, "grad_norm": 15.1875, "learning_rate": 7.396518831436748e-06, "loss": 1.3852022886276245, "step": 6420 }, { "epoch": 1.1689269136251934, "grad_norm": 13.1875, "learning_rate": 7.395082852366837e-06, "loss": 1.4753453731536865, "step": 6422 }, { "epoch": 1.1692909802493856, "grad_norm": 23.0, "learning_rate": 7.393646668574172e-06, "loss": 1.6241109371185303, "step": 6424 }, { "epoch": 1.1696550468735778, "grad_norm": 16.125, "learning_rate": 7.392210280269507e-06, "loss": 1.8432515859603882, "step": 6426 }, { "epoch": 1.1700191134977702, "grad_norm": 10.5, "learning_rate": 7.390773687663626e-06, "loss": 1.3995847702026367, "step": 6428 }, { "epoch": 1.1703831801219624, "grad_norm": 5.90625, "learning_rate": 7.38933689096734e-06, "loss": 1.1835557222366333, "step": 6430 }, { "epoch": 1.1707472467461546, "grad_norm": 17.25, "learning_rate": 7.387899890391499e-06, "loss": 1.3437449932098389, "step": 6432 }, { "epoch": 1.1711113133703468, "grad_norm": 9.25, "learning_rate": 7.386462686146971e-06, "loss": 1.2797107696533203, "step": 6434 }, { "epoch": 1.171475379994539, "grad_norm": 12.8125, "learning_rate": 7.385025278444664e-06, "loss": 1.510704517364502, "step": 6436 }, { "epoch": 1.1718394466187312, "grad_norm": 16.75, "learning_rate": 7.3835876674955085e-06, "loss": 1.4765868186950684, "step": 6438 }, { "epoch": 1.1722035132429234, "grad_norm": 13.5625, "learning_rate": 7.3821498535104715e-06, "loss": 1.404543399810791, "step": 6440 }, { "epoch": 1.1725675798671156, "grad_norm": 40.5, "learning_rate": 7.380711836700547e-06, "loss": 1.0411429405212402, "step": 6442 }, { "epoch": 1.172931646491308, "grad_norm": 9.75, "learning_rate": 7.379273617276755e-06, "loss": 1.3396193981170654, "step": 6444 }, { "epoch": 1.1732957131155002, "grad_norm": 15.0625, "learning_rate": 7.377835195450147e-06, "loss": 1.489682912826538, "step": 6446 }, { "epoch": 1.1736597797396924, "grad_norm": 9.875, "learning_rate": 7.376396571431808e-06, "loss": 1.307582974433899, "step": 6448 }, { "epoch": 1.1740238463638846, "grad_norm": 14.125, "learning_rate": 7.374957745432853e-06, "loss": 1.4828547239303589, "step": 6450 }, { "epoch": 1.1743879129880768, "grad_norm": 14.75, "learning_rate": 7.373518717664418e-06, "loss": 1.586402416229248, "step": 6452 }, { "epoch": 1.174751979612269, "grad_norm": 116.0, "learning_rate": 7.37207948833768e-06, "loss": 1.8769099712371826, "step": 6454 }, { "epoch": 1.1751160462364614, "grad_norm": 12.5625, "learning_rate": 7.3706400576638385e-06, "loss": 1.3780337572097778, "step": 6456 }, { "epoch": 1.1754801128606536, "grad_norm": 14.25, "learning_rate": 7.369200425854119e-06, "loss": 1.3824583292007446, "step": 6458 }, { "epoch": 1.1758441794848458, "grad_norm": 34.75, "learning_rate": 7.367760593119788e-06, "loss": 1.509294867515564, "step": 6460 }, { "epoch": 1.176208246109038, "grad_norm": 37.0, "learning_rate": 7.366320559672136e-06, "loss": 2.1823012828826904, "step": 6462 }, { "epoch": 1.1765723127332302, "grad_norm": 6.96875, "learning_rate": 7.364880325722474e-06, "loss": 1.2436455488204956, "step": 6464 }, { "epoch": 1.1769363793574223, "grad_norm": 5.40625, "learning_rate": 7.3634398914821604e-06, "loss": 1.1496533155441284, "step": 6466 }, { "epoch": 1.1773004459816145, "grad_norm": 19.625, "learning_rate": 7.361999257162564e-06, "loss": 1.324145793914795, "step": 6468 }, { "epoch": 1.177664512605807, "grad_norm": 15.8125, "learning_rate": 7.360558422975099e-06, "loss": 1.561884880065918, "step": 6470 }, { "epoch": 1.1780285792299992, "grad_norm": 26.5, "learning_rate": 7.3591173891311985e-06, "loss": 1.5190938711166382, "step": 6472 }, { "epoch": 1.1783926458541913, "grad_norm": 11.6875, "learning_rate": 7.35767615584233e-06, "loss": 1.820457935333252, "step": 6474 }, { "epoch": 1.1787567124783835, "grad_norm": 10.125, "learning_rate": 7.356234723319986e-06, "loss": 1.3109452724456787, "step": 6476 }, { "epoch": 1.1791207791025757, "grad_norm": 6.0625, "learning_rate": 7.354793091775694e-06, "loss": 1.2659820318222046, "step": 6478 }, { "epoch": 1.179484845726768, "grad_norm": 10.125, "learning_rate": 7.353351261421005e-06, "loss": 1.1349012851715088, "step": 6480 }, { "epoch": 1.1798489123509603, "grad_norm": 17.5, "learning_rate": 7.351909232467505e-06, "loss": 1.104724407196045, "step": 6482 }, { "epoch": 1.1802129789751525, "grad_norm": 13.875, "learning_rate": 7.350467005126802e-06, "loss": 1.198919415473938, "step": 6484 }, { "epoch": 1.1805770455993447, "grad_norm": 13.125, "learning_rate": 7.349024579610542e-06, "loss": 1.4486275911331177, "step": 6486 }, { "epoch": 1.180941112223537, "grad_norm": 8.6875, "learning_rate": 7.347581956130387e-06, "loss": 1.340187430381775, "step": 6488 }, { "epoch": 1.1813051788477291, "grad_norm": 14.5625, "learning_rate": 7.346139134898045e-06, "loss": 1.5551060438156128, "step": 6490 }, { "epoch": 1.1816692454719213, "grad_norm": 14.8125, "learning_rate": 7.344696116125241e-06, "loss": 2.0186920166015625, "step": 6492 }, { "epoch": 1.1820333120961135, "grad_norm": 7.5, "learning_rate": 7.34325290002373e-06, "loss": 1.1832094192504883, "step": 6494 }, { "epoch": 1.1823973787203057, "grad_norm": 8.375, "learning_rate": 7.3418094868053e-06, "loss": 1.1233105659484863, "step": 6496 }, { "epoch": 1.1827614453444981, "grad_norm": 27.0, "learning_rate": 7.340365876681763e-06, "loss": 0.8370498418807983, "step": 6498 }, { "epoch": 1.1831255119686903, "grad_norm": 21.0, "learning_rate": 7.3389220698649685e-06, "loss": 0.9888896346092224, "step": 6500 }, { "epoch": 1.1834895785928825, "grad_norm": 10.9375, "learning_rate": 7.337478066566787e-06, "loss": 0.07581327855587006, "step": 6502 }, { "epoch": 1.1838536452170747, "grad_norm": 11.1875, "learning_rate": 7.336033866999119e-06, "loss": 0.45197027921676636, "step": 6504 }, { "epoch": 1.184217711841267, "grad_norm": 11.125, "learning_rate": 7.334589471373894e-06, "loss": 1.5227274894714355, "step": 6506 }, { "epoch": 1.184581778465459, "grad_norm": 21.125, "learning_rate": 7.3331448799030735e-06, "loss": 1.7064509391784668, "step": 6508 }, { "epoch": 1.1849458450896515, "grad_norm": 12.0, "learning_rate": 7.331700092798646e-06, "loss": 1.5975301265716553, "step": 6510 }, { "epoch": 1.1853099117138437, "grad_norm": 27.25, "learning_rate": 7.330255110272626e-06, "loss": 2.0628128051757812, "step": 6512 }, { "epoch": 1.185673978338036, "grad_norm": 11.4375, "learning_rate": 7.32880993253706e-06, "loss": 1.7338371276855469, "step": 6514 }, { "epoch": 1.186038044962228, "grad_norm": 14.6875, "learning_rate": 7.32736455980402e-06, "loss": 1.141144871711731, "step": 6516 }, { "epoch": 1.1864021115864203, "grad_norm": 11.3125, "learning_rate": 7.32591899228561e-06, "loss": 1.7801835536956787, "step": 6518 }, { "epoch": 1.1867661782106125, "grad_norm": 6.4375, "learning_rate": 7.3244732301939625e-06, "loss": 1.399308204650879, "step": 6520 }, { "epoch": 1.1871302448348047, "grad_norm": 11.1875, "learning_rate": 7.323027273741237e-06, "loss": 1.177535057067871, "step": 6522 }, { "epoch": 1.187494311458997, "grad_norm": 13.5, "learning_rate": 7.32158112313962e-06, "loss": 1.6235177516937256, "step": 6524 }, { "epoch": 1.1878583780831893, "grad_norm": 11.5, "learning_rate": 7.320134778601329e-06, "loss": 1.449987769126892, "step": 6526 }, { "epoch": 1.1882224447073815, "grad_norm": 14.3125, "learning_rate": 7.318688240338607e-06, "loss": 1.4296740293502808, "step": 6528 }, { "epoch": 1.1885865113315737, "grad_norm": 16.75, "learning_rate": 7.317241508563733e-06, "loss": 1.3188289403915405, "step": 6530 }, { "epoch": 1.1889505779557659, "grad_norm": 11.75, "learning_rate": 7.315794583489006e-06, "loss": 1.4860177040100098, "step": 6532 }, { "epoch": 1.189314644579958, "grad_norm": 26.5, "learning_rate": 7.314347465326757e-06, "loss": 1.426046371459961, "step": 6534 }, { "epoch": 1.1896787112041505, "grad_norm": 25.75, "learning_rate": 7.31290015428934e-06, "loss": 1.3271759748458862, "step": 6536 }, { "epoch": 1.1900427778283427, "grad_norm": 11.1875, "learning_rate": 7.311452650589148e-06, "loss": 1.1695268154144287, "step": 6538 }, { "epoch": 1.1904068444525349, "grad_norm": 13.1875, "learning_rate": 7.310004954438594e-06, "loss": 1.1791671514511108, "step": 6540 }, { "epoch": 1.190770911076727, "grad_norm": 15.125, "learning_rate": 7.308557066050126e-06, "loss": 1.3848741054534912, "step": 6542 }, { "epoch": 1.1911349777009193, "grad_norm": 9.1875, "learning_rate": 7.307108985636206e-06, "loss": 1.4438623189926147, "step": 6544 }, { "epoch": 1.1914990443251114, "grad_norm": 12.0, "learning_rate": 7.305660713409343e-06, "loss": 1.3221287727355957, "step": 6546 }, { "epoch": 1.1918631109493036, "grad_norm": 13.375, "learning_rate": 7.304212249582059e-06, "loss": 1.3651946783065796, "step": 6548 }, { "epoch": 1.1922271775734958, "grad_norm": 9.875, "learning_rate": 7.302763594366915e-06, "loss": 1.4340474605560303, "step": 6550 }, { "epoch": 1.1925912441976882, "grad_norm": 15.6875, "learning_rate": 7.3013147479764936e-06, "loss": 1.5670411586761475, "step": 6552 }, { "epoch": 1.1929553108218804, "grad_norm": 16.625, "learning_rate": 7.299865710623406e-06, "loss": 1.788050651550293, "step": 6554 }, { "epoch": 1.1933193774460726, "grad_norm": 8.25, "learning_rate": 7.298416482520294e-06, "loss": 1.226399540901184, "step": 6556 }, { "epoch": 1.1936834440702648, "grad_norm": 22.625, "learning_rate": 7.296967063879823e-06, "loss": 0.8390741348266602, "step": 6558 }, { "epoch": 1.194047510694457, "grad_norm": 5.21875, "learning_rate": 7.295517454914694e-06, "loss": 1.1856259107589722, "step": 6560 }, { "epoch": 1.1944115773186492, "grad_norm": 17.125, "learning_rate": 7.294067655837629e-06, "loss": 1.42365562915802, "step": 6562 }, { "epoch": 1.1947756439428416, "grad_norm": 8.0625, "learning_rate": 7.292617666861377e-06, "loss": 1.3596998453140259, "step": 6564 }, { "epoch": 1.1951397105670338, "grad_norm": 10.1875, "learning_rate": 7.291167488198723e-06, "loss": 1.0605143308639526, "step": 6566 }, { "epoch": 1.195503777191226, "grad_norm": 9.3125, "learning_rate": 7.289717120062471e-06, "loss": 1.1264477968215942, "step": 6568 }, { "epoch": 1.1958678438154182, "grad_norm": 8.5625, "learning_rate": 7.28826656266546e-06, "loss": 1.0780694484710693, "step": 6570 }, { "epoch": 1.1962319104396104, "grad_norm": 7.1875, "learning_rate": 7.286815816220551e-06, "loss": 1.5786877870559692, "step": 6572 }, { "epoch": 1.1965959770638026, "grad_norm": 13.9375, "learning_rate": 7.285364880940637e-06, "loss": 1.3698921203613281, "step": 6574 }, { "epoch": 1.1969600436879948, "grad_norm": 10.8125, "learning_rate": 7.283913757038636e-06, "loss": 1.3198425769805908, "step": 6576 }, { "epoch": 1.1973241103121872, "grad_norm": 13.375, "learning_rate": 7.282462444727492e-06, "loss": 1.289044737815857, "step": 6578 }, { "epoch": 1.1976881769363794, "grad_norm": 6.375, "learning_rate": 7.281010944220184e-06, "loss": 1.0620331764221191, "step": 6580 }, { "epoch": 1.1980522435605716, "grad_norm": 147.0, "learning_rate": 7.279559255729711e-06, "loss": 1.3790700435638428, "step": 6582 }, { "epoch": 1.1984163101847638, "grad_norm": 3.765625, "learning_rate": 7.278107379469103e-06, "loss": 0.8620489835739136, "step": 6584 }, { "epoch": 1.198780376808956, "grad_norm": 12.1875, "learning_rate": 7.276655315651415e-06, "loss": 1.2727453708648682, "step": 6586 }, { "epoch": 1.1991444434331482, "grad_norm": 11.25, "learning_rate": 7.275203064489735e-06, "loss": 1.532165288925171, "step": 6588 }, { "epoch": 1.1995085100573406, "grad_norm": 12.5, "learning_rate": 7.273750626197173e-06, "loss": 1.5772807598114014, "step": 6590 }, { "epoch": 1.1998725766815328, "grad_norm": 9.375, "learning_rate": 7.272298000986868e-06, "loss": 0.9821051359176636, "step": 6592 }, { "epoch": 1.200236643305725, "grad_norm": 6.0, "learning_rate": 7.270845189071989e-06, "loss": 1.3076342344284058, "step": 6594 }, { "epoch": 1.2006007099299172, "grad_norm": 14.9375, "learning_rate": 7.269392190665727e-06, "loss": 1.369358777999878, "step": 6596 }, { "epoch": 1.2009647765541094, "grad_norm": 7.90625, "learning_rate": 7.267939005981306e-06, "loss": 1.7717223167419434, "step": 6598 }, { "epoch": 1.2013288431783016, "grad_norm": 8.625, "learning_rate": 7.266485635231975e-06, "loss": 1.5751750469207764, "step": 6600 }, { "epoch": 1.2016929098024938, "grad_norm": 13.125, "learning_rate": 7.2650320786310095e-06, "loss": 1.4999746084213257, "step": 6602 }, { "epoch": 1.202056976426686, "grad_norm": 9.875, "learning_rate": 7.2635783363917125e-06, "loss": 1.3215546607971191, "step": 6604 }, { "epoch": 1.2024210430508784, "grad_norm": 18.5, "learning_rate": 7.262124408727416e-06, "loss": 1.3302925825119019, "step": 6606 }, { "epoch": 1.2027851096750706, "grad_norm": 4.25, "learning_rate": 7.2606702958514775e-06, "loss": 1.225448727607727, "step": 6608 }, { "epoch": 1.2031491762992628, "grad_norm": 13.0, "learning_rate": 7.259215997977282e-06, "loss": 1.221543312072754, "step": 6610 }, { "epoch": 1.203513242923455, "grad_norm": 27.0, "learning_rate": 7.257761515318243e-06, "loss": 1.6428560018539429, "step": 6612 }, { "epoch": 1.2038773095476472, "grad_norm": 20.5, "learning_rate": 7.256306848087797e-06, "loss": 1.39435613155365, "step": 6614 }, { "epoch": 1.2042413761718394, "grad_norm": 19.125, "learning_rate": 7.254851996499412e-06, "loss": 1.7749639749526978, "step": 6616 }, { "epoch": 1.2046054427960318, "grad_norm": 14.875, "learning_rate": 7.253396960766583e-06, "loss": 1.3632102012634277, "step": 6618 }, { "epoch": 1.204969509420224, "grad_norm": 11.375, "learning_rate": 7.25194174110283e-06, "loss": 1.3126219511032104, "step": 6620 }, { "epoch": 1.2053335760444162, "grad_norm": 9.5625, "learning_rate": 7.2504863377217e-06, "loss": 0.8591881990432739, "step": 6622 }, { "epoch": 1.2056976426686083, "grad_norm": 5.5625, "learning_rate": 7.249030750836767e-06, "loss": 1.1397168636322021, "step": 6624 }, { "epoch": 1.2060617092928005, "grad_norm": 11.75, "learning_rate": 7.247574980661635e-06, "loss": 0.9087386131286621, "step": 6626 }, { "epoch": 1.2064257759169927, "grad_norm": 25.375, "learning_rate": 7.246119027409928e-06, "loss": 1.1396706104278564, "step": 6628 }, { "epoch": 1.206789842541185, "grad_norm": 22.375, "learning_rate": 7.244662891295307e-06, "loss": 1.324886679649353, "step": 6630 }, { "epoch": 1.2071539091653773, "grad_norm": 9.9375, "learning_rate": 7.24320657253145e-06, "loss": 1.511910080909729, "step": 6632 }, { "epoch": 1.2075179757895695, "grad_norm": 11.4375, "learning_rate": 7.241750071332065e-06, "loss": 1.3533015251159668, "step": 6634 }, { "epoch": 1.2078820424137617, "grad_norm": 18.25, "learning_rate": 7.2402933879108905e-06, "loss": 1.3343080282211304, "step": 6636 }, { "epoch": 1.208246109037954, "grad_norm": 10.3125, "learning_rate": 7.238836522481687e-06, "loss": 1.3347954750061035, "step": 6638 }, { "epoch": 1.2086101756621461, "grad_norm": 17.0, "learning_rate": 7.237379475258244e-06, "loss": 1.3202630281448364, "step": 6640 }, { "epoch": 1.2089742422863383, "grad_norm": 7.625, "learning_rate": 7.235922246454381e-06, "loss": 1.1637362241744995, "step": 6642 }, { "epoch": 1.2093383089105307, "grad_norm": 3.703125, "learning_rate": 7.234464836283935e-06, "loss": 0.8209491968154907, "step": 6644 }, { "epoch": 1.209702375534723, "grad_norm": 3.90625, "learning_rate": 7.233007244960775e-06, "loss": 0.9462370276451111, "step": 6646 }, { "epoch": 1.2100664421589151, "grad_norm": 10.6875, "learning_rate": 7.2315494726988e-06, "loss": 0.38805603981018066, "step": 6648 }, { "epoch": 1.2104305087831073, "grad_norm": 9.75, "learning_rate": 7.23009151971193e-06, "loss": 1.400206208229065, "step": 6650 }, { "epoch": 1.2107945754072995, "grad_norm": 4.0625, "learning_rate": 7.228633386214119e-06, "loss": 0.9574282169342041, "step": 6652 }, { "epoch": 1.2111586420314917, "grad_norm": 10.0625, "learning_rate": 7.227175072419335e-06, "loss": 1.3425127267837524, "step": 6654 }, { "epoch": 1.211522708655684, "grad_norm": 75.0, "learning_rate": 7.225716578541582e-06, "loss": 1.3184183835983276, "step": 6656 }, { "epoch": 1.211886775279876, "grad_norm": 31.375, "learning_rate": 7.224257904794887e-06, "loss": 1.531571626663208, "step": 6658 }, { "epoch": 1.2122508419040685, "grad_norm": 151.0, "learning_rate": 7.222799051393308e-06, "loss": 0.5535272359848022, "step": 6660 }, { "epoch": 1.2126149085282607, "grad_norm": 5.46875, "learning_rate": 7.2213400185509245e-06, "loss": 1.0640232563018799, "step": 6662 }, { "epoch": 1.212978975152453, "grad_norm": 32.25, "learning_rate": 7.2198808064818425e-06, "loss": 1.5700925588607788, "step": 6664 }, { "epoch": 1.213343041776645, "grad_norm": 9.0, "learning_rate": 7.2184214154001965e-06, "loss": 1.416872501373291, "step": 6666 }, { "epoch": 1.2137071084008373, "grad_norm": 23.75, "learning_rate": 7.216961845520143e-06, "loss": 1.3927534818649292, "step": 6668 }, { "epoch": 1.2140711750250297, "grad_norm": 10.4375, "learning_rate": 7.215502097055871e-06, "loss": 1.3334345817565918, "step": 6670 }, { "epoch": 1.214435241649222, "grad_norm": 4.84375, "learning_rate": 7.214042170221596e-06, "loss": 1.010744333267212, "step": 6672 }, { "epoch": 1.214799308273414, "grad_norm": 34.0, "learning_rate": 7.212582065231548e-06, "loss": 1.3117693662643433, "step": 6674 }, { "epoch": 1.2151633748976063, "grad_norm": 22.75, "learning_rate": 7.211121782299999e-06, "loss": 1.4588491916656494, "step": 6676 }, { "epoch": 1.2155274415217985, "grad_norm": 21.125, "learning_rate": 7.209661321641235e-06, "loss": 1.8186008930206299, "step": 6678 }, { "epoch": 1.2158915081459907, "grad_norm": 31.125, "learning_rate": 7.208200683469575e-06, "loss": 1.4026299715042114, "step": 6680 }, { "epoch": 1.2162555747701829, "grad_norm": 23.75, "learning_rate": 7.2067398679993615e-06, "loss": 1.360709309577942, "step": 6682 }, { "epoch": 1.216619641394375, "grad_norm": 8.75, "learning_rate": 7.205278875444963e-06, "loss": 1.3849159479141235, "step": 6684 }, { "epoch": 1.2169837080185675, "grad_norm": 9.375, "learning_rate": 7.203817706020773e-06, "loss": 1.5182673931121826, "step": 6686 }, { "epoch": 1.2173477746427597, "grad_norm": 18.75, "learning_rate": 7.202356359941214e-06, "loss": 1.2989633083343506, "step": 6688 }, { "epoch": 1.2177118412669519, "grad_norm": 7.5625, "learning_rate": 7.200894837420731e-06, "loss": 1.3390648365020752, "step": 6690 }, { "epoch": 1.218075907891144, "grad_norm": 18.875, "learning_rate": 7.199433138673799e-06, "loss": 1.4473764896392822, "step": 6692 }, { "epoch": 1.2184399745153363, "grad_norm": 8.25, "learning_rate": 7.197971263914916e-06, "loss": 1.149372935295105, "step": 6694 }, { "epoch": 1.2188040411395284, "grad_norm": 11.0, "learning_rate": 7.196509213358602e-06, "loss": 0.6737701296806335, "step": 6696 }, { "epoch": 1.2191681077637209, "grad_norm": 16.875, "learning_rate": 7.1950469872194095e-06, "loss": 0.5430116057395935, "step": 6698 }, { "epoch": 1.219532174387913, "grad_norm": 12.5625, "learning_rate": 7.193584585711917e-06, "loss": 1.1017062664031982, "step": 6700 }, { "epoch": 1.2198962410121053, "grad_norm": 12.9375, "learning_rate": 7.1921220090507235e-06, "loss": 0.9444236755371094, "step": 6702 }, { "epoch": 1.2202603076362974, "grad_norm": 13.4375, "learning_rate": 7.190659257450454e-06, "loss": 1.5194355249404907, "step": 6704 }, { "epoch": 1.2206243742604896, "grad_norm": 36.5, "learning_rate": 7.1891963311257675e-06, "loss": 1.3496087789535522, "step": 6706 }, { "epoch": 1.2209884408846818, "grad_norm": 17.875, "learning_rate": 7.187733230291336e-06, "loss": 1.2691317796707153, "step": 6708 }, { "epoch": 1.221352507508874, "grad_norm": 34.0, "learning_rate": 7.186269955161867e-06, "loss": 1.3138542175292969, "step": 6710 }, { "epoch": 1.2217165741330664, "grad_norm": 3.5625, "learning_rate": 7.184806505952091e-06, "loss": 1.1663285493850708, "step": 6712 }, { "epoch": 1.2220806407572586, "grad_norm": 5.34375, "learning_rate": 7.18334288287676e-06, "loss": 1.0912249088287354, "step": 6714 }, { "epoch": 1.2224447073814508, "grad_norm": 13.5625, "learning_rate": 7.181879086150658e-06, "loss": 1.0867218971252441, "step": 6716 }, { "epoch": 1.222808774005643, "grad_norm": 268.0, "learning_rate": 7.180415115988588e-06, "loss": 0.7946165800094604, "step": 6718 }, { "epoch": 1.2231728406298352, "grad_norm": 15.0, "learning_rate": 7.178950972605385e-06, "loss": 1.2604389190673828, "step": 6720 }, { "epoch": 1.2235369072540274, "grad_norm": 6.40625, "learning_rate": 7.177486656215906e-06, "loss": 1.27571439743042, "step": 6722 }, { "epoch": 1.2239009738782198, "grad_norm": 11.1875, "learning_rate": 7.176022167035031e-06, "loss": 1.2737550735473633, "step": 6724 }, { "epoch": 1.224265040502412, "grad_norm": 19.0, "learning_rate": 7.174557505277669e-06, "loss": 1.4829754829406738, "step": 6726 }, { "epoch": 1.2246291071266042, "grad_norm": 18.25, "learning_rate": 7.17309267115875e-06, "loss": 1.2702488899230957, "step": 6728 }, { "epoch": 1.2249931737507964, "grad_norm": 65.5, "learning_rate": 7.171627664893239e-06, "loss": 1.223417043685913, "step": 6730 }, { "epoch": 1.2253572403749886, "grad_norm": 20.875, "learning_rate": 7.170162486696117e-06, "loss": 0.8921594619750977, "step": 6732 }, { "epoch": 1.2257213069991808, "grad_norm": 18.75, "learning_rate": 7.16869713678239e-06, "loss": 0.931276261806488, "step": 6734 }, { "epoch": 1.226085373623373, "grad_norm": 16.25, "learning_rate": 7.167231615367096e-06, "loss": 1.604603886604309, "step": 6736 }, { "epoch": 1.2264494402475652, "grad_norm": 4.96875, "learning_rate": 7.1657659226652935e-06, "loss": 1.1619961261749268, "step": 6738 }, { "epoch": 1.2268135068717576, "grad_norm": 8.625, "learning_rate": 7.164300058892064e-06, "loss": 1.0576145648956299, "step": 6740 }, { "epoch": 1.2271775734959498, "grad_norm": 13.0625, "learning_rate": 7.162834024262522e-06, "loss": 1.3183400630950928, "step": 6742 }, { "epoch": 1.227541640120142, "grad_norm": 15.5, "learning_rate": 7.161367818991796e-06, "loss": 1.1441850662231445, "step": 6744 }, { "epoch": 1.2279057067443342, "grad_norm": 12.75, "learning_rate": 7.159901443295052e-06, "loss": 0.7580921053886414, "step": 6746 }, { "epoch": 1.2282697733685264, "grad_norm": 14.625, "learning_rate": 7.15843489738747e-06, "loss": 0.856759786605835, "step": 6748 }, { "epoch": 1.2286338399927186, "grad_norm": 13.0625, "learning_rate": 7.156968181484263e-06, "loss": 1.4152673482894897, "step": 6750 }, { "epoch": 1.228997906616911, "grad_norm": 15.125, "learning_rate": 7.155501295800664e-06, "loss": 1.6950697898864746, "step": 6752 }, { "epoch": 1.2293619732411032, "grad_norm": 8.375, "learning_rate": 7.154034240551933e-06, "loss": 1.3276128768920898, "step": 6754 }, { "epoch": 1.2297260398652954, "grad_norm": 7.53125, "learning_rate": 7.152567015953354e-06, "loss": 1.2459003925323486, "step": 6756 }, { "epoch": 1.2300901064894876, "grad_norm": 12.3125, "learning_rate": 7.151099622220234e-06, "loss": 1.3761324882507324, "step": 6758 }, { "epoch": 1.2304541731136798, "grad_norm": 16.75, "learning_rate": 7.149632059567912e-06, "loss": 1.483574628829956, "step": 6760 }, { "epoch": 1.230818239737872, "grad_norm": 7.875, "learning_rate": 7.1481643282117445e-06, "loss": 1.220634937286377, "step": 6762 }, { "epoch": 1.2311823063620642, "grad_norm": 16.625, "learning_rate": 7.146696428367115e-06, "loss": 1.0857765674591064, "step": 6764 }, { "epoch": 1.2315463729862566, "grad_norm": 15.6875, "learning_rate": 7.14522836024943e-06, "loss": 1.382385015487671, "step": 6766 }, { "epoch": 1.2319104396104488, "grad_norm": 7.0625, "learning_rate": 7.143760124074124e-06, "loss": 0.7194342613220215, "step": 6768 }, { "epoch": 1.232274506234641, "grad_norm": 23.0, "learning_rate": 7.142291720056655e-06, "loss": 1.3154785633087158, "step": 6770 }, { "epoch": 1.2326385728588332, "grad_norm": 7.6875, "learning_rate": 7.140823148412508e-06, "loss": 0.8829584121704102, "step": 6772 }, { "epoch": 1.2330026394830254, "grad_norm": 10.75, "learning_rate": 7.139354409357187e-06, "loss": 1.566400170326233, "step": 6774 }, { "epoch": 1.2333667061072175, "grad_norm": 9.875, "learning_rate": 7.137885503106223e-06, "loss": 1.353212833404541, "step": 6776 }, { "epoch": 1.23373077273141, "grad_norm": 12.0625, "learning_rate": 7.136416429875172e-06, "loss": 1.356162190437317, "step": 6778 }, { "epoch": 1.2340948393556022, "grad_norm": 7.9375, "learning_rate": 7.134947189879615e-06, "loss": 1.4055001735687256, "step": 6780 }, { "epoch": 1.2344589059797944, "grad_norm": 23.125, "learning_rate": 7.133477783335159e-06, "loss": 0.9110021591186523, "step": 6782 }, { "epoch": 1.2348229726039865, "grad_norm": 27.25, "learning_rate": 7.132008210457433e-06, "loss": 0.4519428014755249, "step": 6784 }, { "epoch": 1.2351870392281787, "grad_norm": 6.5, "learning_rate": 7.130538471462087e-06, "loss": 1.2690303325653076, "step": 6786 }, { "epoch": 1.235551105852371, "grad_norm": 10.375, "learning_rate": 7.1290685665648005e-06, "loss": 0.9783637523651123, "step": 6788 }, { "epoch": 1.2359151724765631, "grad_norm": 44.25, "learning_rate": 7.127598495981283e-06, "loss": 0.9811915159225464, "step": 6790 }, { "epoch": 1.2362792391007553, "grad_norm": 24.125, "learning_rate": 7.126128259927252e-06, "loss": 0.6517534255981445, "step": 6792 }, { "epoch": 1.2366433057249477, "grad_norm": 18.0, "learning_rate": 7.1246578586184645e-06, "loss": 1.4276130199432373, "step": 6794 }, { "epoch": 1.23700737234914, "grad_norm": 5.96875, "learning_rate": 7.123187292270695e-06, "loss": 1.326648473739624, "step": 6796 }, { "epoch": 1.2373714389733321, "grad_norm": 8.0625, "learning_rate": 7.121716561099738e-06, "loss": 1.4149478673934937, "step": 6798 }, { "epoch": 1.2377355055975243, "grad_norm": 15.1875, "learning_rate": 7.1202456653214236e-06, "loss": 1.5907409191131592, "step": 6800 }, { "epoch": 1.2380995722217165, "grad_norm": 14.75, "learning_rate": 7.118774605151599e-06, "loss": 1.7426486015319824, "step": 6802 }, { "epoch": 1.2384636388459087, "grad_norm": 2.28125, "learning_rate": 7.117303380806135e-06, "loss": 0.8373295068740845, "step": 6804 }, { "epoch": 1.2388277054701011, "grad_norm": 15.125, "learning_rate": 7.115831992500928e-06, "loss": 1.1873797178268433, "step": 6806 }, { "epoch": 1.2391917720942933, "grad_norm": 14.875, "learning_rate": 7.114360440451895e-06, "loss": 1.3135144710540771, "step": 6808 }, { "epoch": 1.2395558387184855, "grad_norm": 9.1875, "learning_rate": 7.112888724874987e-06, "loss": 1.4339075088500977, "step": 6810 }, { "epoch": 1.2399199053426777, "grad_norm": 13.9375, "learning_rate": 7.111416845986168e-06, "loss": 1.3428232669830322, "step": 6812 }, { "epoch": 1.24028397196687, "grad_norm": 14.5, "learning_rate": 7.109944804001432e-06, "loss": 0.9346180558204651, "step": 6814 }, { "epoch": 1.240648038591062, "grad_norm": 12.0, "learning_rate": 7.108472599136793e-06, "loss": 1.8763840198516846, "step": 6816 }, { "epoch": 1.2410121052152543, "grad_norm": 39.25, "learning_rate": 7.107000231608292e-06, "loss": 1.7168290615081787, "step": 6818 }, { "epoch": 1.2413761718394467, "grad_norm": 13.8125, "learning_rate": 7.105527701631994e-06, "loss": 1.448686957359314, "step": 6820 }, { "epoch": 1.241740238463639, "grad_norm": 9.8125, "learning_rate": 7.104055009423985e-06, "loss": 1.3222360610961914, "step": 6822 }, { "epoch": 1.242104305087831, "grad_norm": 9.4375, "learning_rate": 7.102582155200379e-06, "loss": 1.5577512979507446, "step": 6824 }, { "epoch": 1.2424683717120233, "grad_norm": 8.5625, "learning_rate": 7.101109139177309e-06, "loss": 1.632058024406433, "step": 6826 }, { "epoch": 1.2428324383362155, "grad_norm": 19.875, "learning_rate": 7.099635961570934e-06, "loss": 1.3941521644592285, "step": 6828 }, { "epoch": 1.2431965049604077, "grad_norm": 14.75, "learning_rate": 7.09816262259744e-06, "loss": 1.3943427801132202, "step": 6830 }, { "epoch": 1.2435605715846, "grad_norm": 12.0625, "learning_rate": 7.096689122473033e-06, "loss": 1.5009510517120361, "step": 6832 }, { "epoch": 1.2439246382087923, "grad_norm": 26.125, "learning_rate": 7.09521546141394e-06, "loss": 1.3903570175170898, "step": 6834 }, { "epoch": 1.2442887048329845, "grad_norm": 12.3125, "learning_rate": 7.093741639636418e-06, "loss": 1.4346972703933716, "step": 6836 }, { "epoch": 1.2446527714571767, "grad_norm": 4.78125, "learning_rate": 7.0922676573567395e-06, "loss": 1.3681694269180298, "step": 6838 }, { "epoch": 1.2450168380813689, "grad_norm": 15.5, "learning_rate": 7.0907935147912125e-06, "loss": 1.421314001083374, "step": 6840 }, { "epoch": 1.245380904705561, "grad_norm": 17.0, "learning_rate": 7.089319212156156e-06, "loss": 1.6846474409103394, "step": 6842 }, { "epoch": 1.2457449713297533, "grad_norm": 9.1875, "learning_rate": 7.08784474966792e-06, "loss": 1.434504508972168, "step": 6844 }, { "epoch": 1.2461090379539455, "grad_norm": 12.0, "learning_rate": 7.086370127542876e-06, "loss": 1.5476224422454834, "step": 6846 }, { "epoch": 1.2464731045781379, "grad_norm": 16.5, "learning_rate": 7.084895345997418e-06, "loss": 1.3673689365386963, "step": 6848 }, { "epoch": 1.24683717120233, "grad_norm": 7.0625, "learning_rate": 7.083420405247965e-06, "loss": 1.2037729024887085, "step": 6850 }, { "epoch": 1.2472012378265223, "grad_norm": 17.0, "learning_rate": 7.081945305510958e-06, "loss": 1.3675122261047363, "step": 6852 }, { "epoch": 1.2475653044507145, "grad_norm": 10.6875, "learning_rate": 7.0804700470028635e-06, "loss": 1.4422657489776611, "step": 6854 }, { "epoch": 1.2479293710749066, "grad_norm": 4.9375, "learning_rate": 7.078994629940166e-06, "loss": 0.8554068803787231, "step": 6856 }, { "epoch": 1.2482934376990988, "grad_norm": 19.125, "learning_rate": 7.077519054539379e-06, "loss": 1.142959475517273, "step": 6858 }, { "epoch": 1.2486575043232913, "grad_norm": 18.75, "learning_rate": 7.076043321017041e-06, "loss": 1.5435700416564941, "step": 6860 }, { "epoch": 1.2490215709474835, "grad_norm": 13.75, "learning_rate": 7.074567429589703e-06, "loss": 1.4103915691375732, "step": 6862 }, { "epoch": 1.2493856375716756, "grad_norm": 9.4375, "learning_rate": 7.073091380473951e-06, "loss": 1.32807457447052, "step": 6864 }, { "epoch": 1.2497497041958678, "grad_norm": 15.25, "learning_rate": 7.071615173886388e-06, "loss": 1.4018455743789673, "step": 6866 }, { "epoch": 1.25011377082006, "grad_norm": 11.75, "learning_rate": 7.070138810043641e-06, "loss": 1.264844536781311, "step": 6868 }, { "epoch": 1.2504778374442522, "grad_norm": 9.125, "learning_rate": 7.06866228916236e-06, "loss": 1.3887810707092285, "step": 6870 }, { "epoch": 1.2508419040684444, "grad_norm": 11.375, "learning_rate": 7.06718561145922e-06, "loss": 1.4866645336151123, "step": 6872 }, { "epoch": 1.2512059706926366, "grad_norm": 22.25, "learning_rate": 7.065708777150917e-06, "loss": 1.288819432258606, "step": 6874 }, { "epoch": 1.251570037316829, "grad_norm": 10.0625, "learning_rate": 7.064231786454168e-06, "loss": 1.5728092193603516, "step": 6876 }, { "epoch": 1.2519341039410212, "grad_norm": 13.125, "learning_rate": 7.062754639585716e-06, "loss": 1.339041829109192, "step": 6878 }, { "epoch": 1.2522981705652134, "grad_norm": 52.75, "learning_rate": 7.061277336762331e-06, "loss": 1.5896389484405518, "step": 6880 }, { "epoch": 1.2526622371894056, "grad_norm": 10.9375, "learning_rate": 7.059799878200797e-06, "loss": 1.3721157312393188, "step": 6882 }, { "epoch": 1.2530263038135978, "grad_norm": 10.5, "learning_rate": 7.058322264117925e-06, "loss": 1.472983717918396, "step": 6884 }, { "epoch": 1.2533903704377902, "grad_norm": 16.125, "learning_rate": 7.0568444947305504e-06, "loss": 1.5336334705352783, "step": 6886 }, { "epoch": 1.2537544370619824, "grad_norm": 11.25, "learning_rate": 7.0553665702555286e-06, "loss": 1.7266628742218018, "step": 6888 }, { "epoch": 1.2541185036861746, "grad_norm": 9.375, "learning_rate": 7.0538884909097395e-06, "loss": 1.6979012489318848, "step": 6890 }, { "epoch": 1.2544825703103668, "grad_norm": 9.75, "learning_rate": 7.052410256910085e-06, "loss": 1.4501219987869263, "step": 6892 }, { "epoch": 1.254846636934559, "grad_norm": 38.0, "learning_rate": 7.050931868473492e-06, "loss": 1.315745234489441, "step": 6894 }, { "epoch": 1.2552107035587512, "grad_norm": 15.0625, "learning_rate": 7.0494533258169065e-06, "loss": 1.4888767004013062, "step": 6896 }, { "epoch": 1.2555747701829434, "grad_norm": 15.75, "learning_rate": 7.047974629157297e-06, "loss": 1.5987426042556763, "step": 6898 }, { "epoch": 1.2559388368071356, "grad_norm": 8.5625, "learning_rate": 7.0464957787116575e-06, "loss": 1.172560214996338, "step": 6900 }, { "epoch": 1.256302903431328, "grad_norm": 11.4375, "learning_rate": 7.045016774697004e-06, "loss": 1.3965671062469482, "step": 6902 }, { "epoch": 1.2566669700555202, "grad_norm": 115.0, "learning_rate": 7.043537617330376e-06, "loss": 1.2098493576049805, "step": 6904 }, { "epoch": 1.2570310366797124, "grad_norm": 8.75, "learning_rate": 7.042058306828829e-06, "loss": 1.3815102577209473, "step": 6906 }, { "epoch": 1.2573951033039046, "grad_norm": 15.125, "learning_rate": 7.040578843409449e-06, "loss": 1.2485014200210571, "step": 6908 }, { "epoch": 1.2577591699280968, "grad_norm": 13.375, "learning_rate": 7.039099227289341e-06, "loss": 1.252556324005127, "step": 6910 }, { "epoch": 1.2581232365522892, "grad_norm": 17.125, "learning_rate": 7.037619458685634e-06, "loss": 1.1102707386016846, "step": 6912 }, { "epoch": 1.2584873031764814, "grad_norm": 11.875, "learning_rate": 7.036139537815476e-06, "loss": 1.3073909282684326, "step": 6914 }, { "epoch": 1.2588513698006736, "grad_norm": 11.3125, "learning_rate": 7.034659464896039e-06, "loss": 1.6627213954925537, "step": 6916 }, { "epoch": 1.2592154364248658, "grad_norm": 11.375, "learning_rate": 7.0331792401445165e-06, "loss": 1.945440411567688, "step": 6918 }, { "epoch": 1.259579503049058, "grad_norm": 24.5, "learning_rate": 7.031698863778132e-06, "loss": 1.3761060237884521, "step": 6920 }, { "epoch": 1.2599435696732502, "grad_norm": 11.75, "learning_rate": 7.030218336014119e-06, "loss": 1.4903113842010498, "step": 6922 }, { "epoch": 1.2603076362974424, "grad_norm": 5.0625, "learning_rate": 7.0287376570697395e-06, "loss": 1.3640656471252441, "step": 6924 }, { "epoch": 1.2606717029216346, "grad_norm": 17.0, "learning_rate": 7.027256827162279e-06, "loss": 0.8654718995094299, "step": 6926 }, { "epoch": 1.261035769545827, "grad_norm": 15.0, "learning_rate": 7.02577584650904e-06, "loss": 0.591568112373352, "step": 6928 }, { "epoch": 1.2613998361700192, "grad_norm": 18.5, "learning_rate": 7.024294715327353e-06, "loss": 1.6607918739318848, "step": 6930 }, { "epoch": 1.2617639027942114, "grad_norm": 5.65625, "learning_rate": 7.0228134338345695e-06, "loss": 1.1746143102645874, "step": 6932 }, { "epoch": 1.2621279694184036, "grad_norm": 8.3125, "learning_rate": 7.02133200224806e-06, "loss": 1.3098299503326416, "step": 6934 }, { "epoch": 1.2624920360425957, "grad_norm": 11.875, "learning_rate": 7.019850420785217e-06, "loss": 1.531429409980774, "step": 6936 }, { "epoch": 1.2628561026667882, "grad_norm": 7.0, "learning_rate": 7.018368689663457e-06, "loss": 1.0970335006713867, "step": 6938 }, { "epoch": 1.2632201692909804, "grad_norm": 15.875, "learning_rate": 7.016886809100219e-06, "loss": 1.2609821557998657, "step": 6940 }, { "epoch": 1.2635842359151725, "grad_norm": 9.1875, "learning_rate": 7.0154047793129646e-06, "loss": 1.3810995817184448, "step": 6942 }, { "epoch": 1.2639483025393647, "grad_norm": 21.375, "learning_rate": 7.013922600519174e-06, "loss": 1.3613288402557373, "step": 6944 }, { "epoch": 1.264312369163557, "grad_norm": 17.375, "learning_rate": 7.0124402729363496e-06, "loss": 1.465685486793518, "step": 6946 }, { "epoch": 1.2646764357877491, "grad_norm": 6.09375, "learning_rate": 7.0109577967820165e-06, "loss": 1.2737704515457153, "step": 6948 }, { "epoch": 1.2650405024119413, "grad_norm": 13.4375, "learning_rate": 7.009475172273725e-06, "loss": 1.0695053339004517, "step": 6950 }, { "epoch": 1.2654045690361335, "grad_norm": 31.75, "learning_rate": 7.0079923996290445e-06, "loss": 1.7231273651123047, "step": 6952 }, { "epoch": 1.2657686356603257, "grad_norm": 42.75, "learning_rate": 7.0065094790655645e-06, "loss": 1.360650658607483, "step": 6954 }, { "epoch": 1.2661327022845181, "grad_norm": 14.875, "learning_rate": 7.005026410800897e-06, "loss": 1.4140663146972656, "step": 6956 }, { "epoch": 1.2664967689087103, "grad_norm": 8.0625, "learning_rate": 7.003543195052675e-06, "loss": 1.3838446140289307, "step": 6958 }, { "epoch": 1.2668608355329025, "grad_norm": 11.875, "learning_rate": 7.002059832038557e-06, "loss": 1.3637138605117798, "step": 6960 }, { "epoch": 1.2672249021570947, "grad_norm": 23.875, "learning_rate": 7.00057632197622e-06, "loss": 1.8511745929718018, "step": 6962 }, { "epoch": 1.267588968781287, "grad_norm": 33.25, "learning_rate": 6.9990926650833646e-06, "loss": 1.1482704877853394, "step": 6964 }, { "epoch": 1.2679530354054793, "grad_norm": 8.8125, "learning_rate": 6.997608861577707e-06, "loss": 1.3576314449310303, "step": 6966 }, { "epoch": 1.2683171020296715, "grad_norm": 8.9375, "learning_rate": 6.996124911676991e-06, "loss": 1.3747107982635498, "step": 6968 }, { "epoch": 1.2686811686538637, "grad_norm": 9.0, "learning_rate": 6.994640815598983e-06, "loss": 1.4651124477386475, "step": 6970 }, { "epoch": 1.269045235278056, "grad_norm": 20.5, "learning_rate": 6.993156573561466e-06, "loss": 1.2367103099822998, "step": 6972 }, { "epoch": 1.269409301902248, "grad_norm": 7.84375, "learning_rate": 6.991672185782248e-06, "loss": 1.6774638891220093, "step": 6974 }, { "epoch": 1.2697733685264403, "grad_norm": 16.875, "learning_rate": 6.990187652479155e-06, "loss": 1.187595009803772, "step": 6976 }, { "epoch": 1.2701374351506325, "grad_norm": 22.375, "learning_rate": 6.988702973870035e-06, "loss": 1.4416632652282715, "step": 6978 }, { "epoch": 1.2705015017748247, "grad_norm": 15.5625, "learning_rate": 6.987218150172763e-06, "loss": 1.2919238805770874, "step": 6980 }, { "epoch": 1.270865568399017, "grad_norm": 12.0, "learning_rate": 6.985733181605227e-06, "loss": 1.7377707958221436, "step": 6982 }, { "epoch": 1.2712296350232093, "grad_norm": 24.0, "learning_rate": 6.984248068385342e-06, "loss": 1.735353708267212, "step": 6984 }, { "epoch": 1.2715937016474015, "grad_norm": 20.125, "learning_rate": 6.982762810731041e-06, "loss": 1.7191202640533447, "step": 6986 }, { "epoch": 1.2719577682715937, "grad_norm": 18.375, "learning_rate": 6.981277408860279e-06, "loss": 1.555033802986145, "step": 6988 }, { "epoch": 1.2723218348957859, "grad_norm": 10.125, "learning_rate": 6.979791862991037e-06, "loss": 0.9993138313293457, "step": 6990 }, { "epoch": 1.2726859015199783, "grad_norm": 29.25, "learning_rate": 6.978306173341307e-06, "loss": 0.9250248074531555, "step": 6992 }, { "epoch": 1.2730499681441705, "grad_norm": 6.5625, "learning_rate": 6.976820340129114e-06, "loss": 1.4093331098556519, "step": 6994 }, { "epoch": 1.2734140347683627, "grad_norm": 6.25, "learning_rate": 6.975334363572492e-06, "loss": 0.9326533079147339, "step": 6996 }, { "epoch": 1.2737781013925549, "grad_norm": 17.75, "learning_rate": 6.973848243889506e-06, "loss": 1.358811855316162, "step": 6998 }, { "epoch": 1.274142168016747, "grad_norm": 27.5, "learning_rate": 6.9723619812982365e-06, "loss": 1.8066385984420776, "step": 7000 }, { "epoch": 1.2745062346409393, "grad_norm": 11.5625, "learning_rate": 6.9708755760167865e-06, "loss": 1.475766897201538, "step": 7002 }, { "epoch": 1.2748703012651315, "grad_norm": 43.5, "learning_rate": 6.9693890282632826e-06, "loss": 1.4680222272872925, "step": 7004 }, { "epoch": 1.2752343678893237, "grad_norm": 15.1875, "learning_rate": 6.967902338255865e-06, "loss": 1.514327049255371, "step": 7006 }, { "epoch": 1.2755984345135158, "grad_norm": 10.4375, "learning_rate": 6.966415506212703e-06, "loss": 1.9397776126861572, "step": 7008 }, { "epoch": 1.2759625011377083, "grad_norm": 11.5625, "learning_rate": 6.96492853235198e-06, "loss": 1.5866397619247437, "step": 7010 }, { "epoch": 1.2763265677619005, "grad_norm": 12.25, "learning_rate": 6.9634414168919075e-06, "loss": 1.25542414188385, "step": 7012 }, { "epoch": 1.2766906343860926, "grad_norm": 5.84375, "learning_rate": 6.961954160050712e-06, "loss": 1.4755549430847168, "step": 7014 }, { "epoch": 1.2770547010102848, "grad_norm": 3.90625, "learning_rate": 6.96046676204664e-06, "loss": 1.080859899520874, "step": 7016 }, { "epoch": 1.277418767634477, "grad_norm": 23.75, "learning_rate": 6.958979223097964e-06, "loss": 1.2284001111984253, "step": 7018 }, { "epoch": 1.2777828342586695, "grad_norm": 13.375, "learning_rate": 6.957491543422974e-06, "loss": 1.3465055227279663, "step": 7020 }, { "epoch": 1.2781469008828616, "grad_norm": 12.8125, "learning_rate": 6.956003723239979e-06, "loss": 1.5102200508117676, "step": 7022 }, { "epoch": 1.2785109675070538, "grad_norm": 24.625, "learning_rate": 6.954515762767316e-06, "loss": 1.5981968641281128, "step": 7024 }, { "epoch": 1.278875034131246, "grad_norm": 18.5, "learning_rate": 6.953027662223329e-06, "loss": 1.6490386724472046, "step": 7026 }, { "epoch": 1.2792391007554382, "grad_norm": 18.25, "learning_rate": 6.951539421826394e-06, "loss": 1.101287603378296, "step": 7028 }, { "epoch": 1.2796031673796304, "grad_norm": 10.375, "learning_rate": 6.950051041794908e-06, "loss": 1.3279919624328613, "step": 7030 }, { "epoch": 1.2799672340038226, "grad_norm": 7.71875, "learning_rate": 6.948562522347279e-06, "loss": 1.2891145944595337, "step": 7032 }, { "epoch": 1.2803313006280148, "grad_norm": 10.3125, "learning_rate": 6.947073863701948e-06, "loss": 0.885400652885437, "step": 7034 }, { "epoch": 1.2806953672522072, "grad_norm": 30.875, "learning_rate": 6.945585066077363e-06, "loss": 1.391412615776062, "step": 7036 }, { "epoch": 1.2810594338763994, "grad_norm": 10.75, "learning_rate": 6.944096129692002e-06, "loss": 1.3074579238891602, "step": 7038 }, { "epoch": 1.2814235005005916, "grad_norm": 12.5, "learning_rate": 6.94260705476436e-06, "loss": 0.8693200349807739, "step": 7040 }, { "epoch": 1.2817875671247838, "grad_norm": 9.25, "learning_rate": 6.941117841512952e-06, "loss": 1.5484583377838135, "step": 7042 }, { "epoch": 1.282151633748976, "grad_norm": 17.875, "learning_rate": 6.939628490156317e-06, "loss": 1.2619388103485107, "step": 7044 }, { "epoch": 1.2825157003731684, "grad_norm": 9.1875, "learning_rate": 6.938139000913009e-06, "loss": 1.298797607421875, "step": 7046 }, { "epoch": 1.2828797669973606, "grad_norm": 10.75, "learning_rate": 6.936649374001603e-06, "loss": 1.4176117181777954, "step": 7048 }, { "epoch": 1.2832438336215528, "grad_norm": 9.6875, "learning_rate": 6.935159609640696e-06, "loss": 1.3727424144744873, "step": 7050 }, { "epoch": 1.283607900245745, "grad_norm": 8.3125, "learning_rate": 6.933669708048909e-06, "loss": 1.4126691818237305, "step": 7052 }, { "epoch": 1.2839719668699372, "grad_norm": 9.5, "learning_rate": 6.932179669444875e-06, "loss": 1.187784194946289, "step": 7054 }, { "epoch": 1.2843360334941294, "grad_norm": 6.59375, "learning_rate": 6.9306894940472515e-06, "loss": 1.0585685968399048, "step": 7056 }, { "epoch": 1.2847001001183216, "grad_norm": 7.28125, "learning_rate": 6.929199182074717e-06, "loss": 1.2256271839141846, "step": 7058 }, { "epoch": 1.2850641667425138, "grad_norm": 14.0, "learning_rate": 6.927708733745968e-06, "loss": 1.2778189182281494, "step": 7060 }, { "epoch": 1.285428233366706, "grad_norm": 9.875, "learning_rate": 6.926218149279723e-06, "loss": 1.4143060445785522, "step": 7062 }, { "epoch": 1.2857922999908984, "grad_norm": 7.34375, "learning_rate": 6.924727428894718e-06, "loss": 1.478899598121643, "step": 7064 }, { "epoch": 1.2861563666150906, "grad_norm": 8.8125, "learning_rate": 6.92323657280971e-06, "loss": 1.0721986293792725, "step": 7066 }, { "epoch": 1.2865204332392828, "grad_norm": 15.125, "learning_rate": 6.921745581243477e-06, "loss": 1.3027867078781128, "step": 7068 }, { "epoch": 1.286884499863475, "grad_norm": 11.375, "learning_rate": 6.920254454414814e-06, "loss": 1.2663110494613647, "step": 7070 }, { "epoch": 1.2872485664876672, "grad_norm": 10.5, "learning_rate": 6.918763192542542e-06, "loss": 1.5463697910308838, "step": 7072 }, { "epoch": 1.2876126331118596, "grad_norm": 10.0625, "learning_rate": 6.917271795845492e-06, "loss": 1.2594268321990967, "step": 7074 }, { "epoch": 1.2879766997360518, "grad_norm": 9.3125, "learning_rate": 6.915780264542526e-06, "loss": 1.5509223937988281, "step": 7076 }, { "epoch": 1.288340766360244, "grad_norm": 6.8125, "learning_rate": 6.9142885988525145e-06, "loss": 1.2842024564743042, "step": 7078 }, { "epoch": 1.2887048329844362, "grad_norm": 5.21875, "learning_rate": 6.912796798994359e-06, "loss": 0.9973230361938477, "step": 7080 }, { "epoch": 1.2890688996086284, "grad_norm": 3.5625, "learning_rate": 6.911304865186972e-06, "loss": 1.1330246925354004, "step": 7082 }, { "epoch": 1.2894329662328206, "grad_norm": 4.15625, "learning_rate": 6.909812797649289e-06, "loss": 1.3135521411895752, "step": 7084 }, { "epoch": 1.2897970328570127, "grad_norm": 9.4375, "learning_rate": 6.9083205966002645e-06, "loss": 1.0735105276107788, "step": 7086 }, { "epoch": 1.290161099481205, "grad_norm": 12.375, "learning_rate": 6.9068282622588735e-06, "loss": 1.3692142963409424, "step": 7088 }, { "epoch": 1.2905251661053974, "grad_norm": 8.9375, "learning_rate": 6.9053357948441105e-06, "loss": 1.3653674125671387, "step": 7090 }, { "epoch": 1.2908892327295896, "grad_norm": 7.5, "learning_rate": 6.9038431945749885e-06, "loss": 1.455733060836792, "step": 7092 }, { "epoch": 1.2912532993537817, "grad_norm": 11.875, "learning_rate": 6.902350461670542e-06, "loss": 1.2619668245315552, "step": 7094 }, { "epoch": 1.291617365977974, "grad_norm": 15.625, "learning_rate": 6.9008575963498206e-06, "loss": 1.0950919389724731, "step": 7096 }, { "epoch": 1.2919814326021661, "grad_norm": 11.1875, "learning_rate": 6.8993645988318965e-06, "loss": 0.5333462953567505, "step": 7098 }, { "epoch": 1.2923454992263586, "grad_norm": 9.0625, "learning_rate": 6.897871469335864e-06, "loss": 1.2034614086151123, "step": 7100 }, { "epoch": 1.2927095658505507, "grad_norm": 13.3125, "learning_rate": 6.896378208080832e-06, "loss": 1.4908605813980103, "step": 7102 }, { "epoch": 1.293073632474743, "grad_norm": 17.25, "learning_rate": 6.8948848152859316e-06, "loss": 1.31070077419281, "step": 7104 }, { "epoch": 1.2934376990989351, "grad_norm": 11.1875, "learning_rate": 6.89339129117031e-06, "loss": 1.2852356433868408, "step": 7106 }, { "epoch": 1.2938017657231273, "grad_norm": 8.1875, "learning_rate": 6.8918976359531366e-06, "loss": 1.4274932146072388, "step": 7108 }, { "epoch": 1.2941658323473195, "grad_norm": 8.1875, "learning_rate": 6.890403849853601e-06, "loss": 1.3167959451675415, "step": 7110 }, { "epoch": 1.2945298989715117, "grad_norm": 4.53125, "learning_rate": 6.888909933090908e-06, "loss": 1.193117618560791, "step": 7112 }, { "epoch": 1.294893965595704, "grad_norm": 5.28125, "learning_rate": 6.887415885884286e-06, "loss": 1.2229421138763428, "step": 7114 }, { "epoch": 1.295258032219896, "grad_norm": 7.84375, "learning_rate": 6.885921708452978e-06, "loss": 1.2449531555175781, "step": 7116 }, { "epoch": 1.2956220988440885, "grad_norm": 12.6875, "learning_rate": 6.884427401016249e-06, "loss": 1.2884869575500488, "step": 7118 }, { "epoch": 1.2959861654682807, "grad_norm": 12.0625, "learning_rate": 6.882932963793384e-06, "loss": 1.187927484512329, "step": 7120 }, { "epoch": 1.296350232092473, "grad_norm": 11.3125, "learning_rate": 6.881438397003684e-06, "loss": 1.4766616821289062, "step": 7122 }, { "epoch": 1.296714298716665, "grad_norm": 8.8125, "learning_rate": 6.879943700866474e-06, "loss": 1.7511173486709595, "step": 7124 }, { "epoch": 1.2970783653408573, "grad_norm": 9.3125, "learning_rate": 6.878448875601089e-06, "loss": 1.0145858526229858, "step": 7126 }, { "epoch": 1.2974424319650497, "grad_norm": 34.75, "learning_rate": 6.876953921426892e-06, "loss": 1.1911375522613525, "step": 7128 }, { "epoch": 1.297806498589242, "grad_norm": 4.25, "learning_rate": 6.875458838563263e-06, "loss": 0.0983295738697052, "step": 7130 }, { "epoch": 1.298170565213434, "grad_norm": 27.5, "learning_rate": 6.873963627229595e-06, "loss": 0.3917846977710724, "step": 7132 }, { "epoch": 1.2985346318376263, "grad_norm": 28.5, "learning_rate": 6.872468287645308e-06, "loss": 1.3198145627975464, "step": 7134 }, { "epoch": 1.2988986984618185, "grad_norm": 77.5, "learning_rate": 6.870972820029835e-06, "loss": 0.9498114585876465, "step": 7136 }, { "epoch": 1.2992627650860107, "grad_norm": 12.0, "learning_rate": 6.86947722460263e-06, "loss": 1.508488416671753, "step": 7138 }, { "epoch": 1.2996268317102029, "grad_norm": 22.25, "learning_rate": 6.867981501583168e-06, "loss": 1.3845430612564087, "step": 7140 }, { "epoch": 1.299990898334395, "grad_norm": 8.9375, "learning_rate": 6.866485651190937e-06, "loss": 0.8970973491668701, "step": 7142 }, { "epoch": 1.3003549649585875, "grad_norm": 18.0, "learning_rate": 6.864989673645448e-06, "loss": 1.5058352947235107, "step": 7144 }, { "epoch": 1.3007190315827797, "grad_norm": 10.4375, "learning_rate": 6.8634935691662305e-06, "loss": 1.2339940071105957, "step": 7146 }, { "epoch": 1.3010830982069719, "grad_norm": 23.25, "learning_rate": 6.86199733797283e-06, "loss": 0.9971280694007874, "step": 7148 }, { "epoch": 1.301447164831164, "grad_norm": 26.25, "learning_rate": 6.860500980284814e-06, "loss": 1.3777186870574951, "step": 7150 }, { "epoch": 1.3018112314553563, "grad_norm": 11.6875, "learning_rate": 6.859004496321766e-06, "loss": 1.247565746307373, "step": 7152 }, { "epoch": 1.3021752980795487, "grad_norm": 17.375, "learning_rate": 6.857507886303292e-06, "loss": 0.5558710694313049, "step": 7154 }, { "epoch": 1.3025393647037409, "grad_norm": 20.375, "learning_rate": 6.856011150449009e-06, "loss": 1.7666443586349487, "step": 7156 }, { "epoch": 1.302903431327933, "grad_norm": 15.0625, "learning_rate": 6.854514288978558e-06, "loss": 1.3393206596374512, "step": 7158 }, { "epoch": 1.3032674979521253, "grad_norm": 11.9375, "learning_rate": 6.853017302111597e-06, "loss": 1.424713134765625, "step": 7160 }, { "epoch": 1.3036315645763175, "grad_norm": 10.0, "learning_rate": 6.851520190067806e-06, "loss": 1.3376874923706055, "step": 7162 }, { "epoch": 1.3039956312005097, "grad_norm": 14.75, "learning_rate": 6.850022953066879e-06, "loss": 1.335485577583313, "step": 7164 }, { "epoch": 1.3043596978247018, "grad_norm": 40.0, "learning_rate": 6.848525591328528e-06, "loss": 0.8418655395507812, "step": 7166 }, { "epoch": 1.304723764448894, "grad_norm": 26.875, "learning_rate": 6.847028105072483e-06, "loss": 1.0361363887786865, "step": 7168 }, { "epoch": 1.3050878310730865, "grad_norm": 3.078125, "learning_rate": 6.8455304945184975e-06, "loss": 0.450295090675354, "step": 7170 }, { "epoch": 1.3054518976972787, "grad_norm": 14.375, "learning_rate": 6.844032759886339e-06, "loss": 1.2860678434371948, "step": 7172 }, { "epoch": 1.3058159643214708, "grad_norm": 14.125, "learning_rate": 6.842534901395794e-06, "loss": 1.2256741523742676, "step": 7174 }, { "epoch": 1.306180030945663, "grad_norm": 28.5, "learning_rate": 6.841036919266666e-06, "loss": 1.5396909713745117, "step": 7176 }, { "epoch": 1.3065440975698552, "grad_norm": 4.53125, "learning_rate": 6.839538813718778e-06, "loss": 0.9447013139724731, "step": 7178 }, { "epoch": 1.3069081641940474, "grad_norm": 11.6875, "learning_rate": 6.838040584971972e-06, "loss": 1.2523603439331055, "step": 7180 }, { "epoch": 1.3072722308182398, "grad_norm": 11.5625, "learning_rate": 6.836542233246106e-06, "loss": 1.3193187713623047, "step": 7182 }, { "epoch": 1.307636297442432, "grad_norm": 10.75, "learning_rate": 6.8350437587610594e-06, "loss": 1.2456010580062866, "step": 7184 }, { "epoch": 1.3080003640666242, "grad_norm": 11.8125, "learning_rate": 6.833545161736724e-06, "loss": 1.1271644830703735, "step": 7186 }, { "epoch": 1.3083644306908164, "grad_norm": 9.5, "learning_rate": 6.8320464423930145e-06, "loss": 1.2888271808624268, "step": 7188 }, { "epoch": 1.3087284973150086, "grad_norm": 107.5, "learning_rate": 6.830547600949859e-06, "loss": 1.1926859617233276, "step": 7190 }, { "epoch": 1.3090925639392008, "grad_norm": 23.875, "learning_rate": 6.829048637627212e-06, "loss": 0.6636847853660583, "step": 7192 }, { "epoch": 1.309456630563393, "grad_norm": 16.875, "learning_rate": 6.827549552645037e-06, "loss": 1.0432558059692383, "step": 7194 }, { "epoch": 1.3098206971875852, "grad_norm": 11.25, "learning_rate": 6.826050346223318e-06, "loss": 1.3708641529083252, "step": 7196 }, { "epoch": 1.3101847638117776, "grad_norm": 15.0625, "learning_rate": 6.82455101858206e-06, "loss": 1.497727632522583, "step": 7198 }, { "epoch": 1.3105488304359698, "grad_norm": 13.0625, "learning_rate": 6.823051569941279e-06, "loss": 1.3389322757720947, "step": 7200 }, { "epoch": 1.310912897060162, "grad_norm": 10.625, "learning_rate": 6.821552000521017e-06, "loss": 1.2424613237380981, "step": 7202 }, { "epoch": 1.3112769636843542, "grad_norm": 30.0, "learning_rate": 6.82005231054133e-06, "loss": 1.1347570419311523, "step": 7204 }, { "epoch": 1.3116410303085464, "grad_norm": 11.625, "learning_rate": 6.818552500222286e-06, "loss": 1.0207314491271973, "step": 7206 }, { "epoch": 1.3120050969327388, "grad_norm": 6.90625, "learning_rate": 6.817052569783982e-06, "loss": 1.2210968732833862, "step": 7208 }, { "epoch": 1.312369163556931, "grad_norm": 7.3125, "learning_rate": 6.815552519446524e-06, "loss": 1.3219958543777466, "step": 7210 }, { "epoch": 1.3127332301811232, "grad_norm": 112.5, "learning_rate": 6.814052349430041e-06, "loss": 1.1523452997207642, "step": 7212 }, { "epoch": 1.3130972968053154, "grad_norm": 7.96875, "learning_rate": 6.8125520599546735e-06, "loss": 1.1410261392593384, "step": 7214 }, { "epoch": 1.3134613634295076, "grad_norm": 38.75, "learning_rate": 6.811051651240585e-06, "loss": 1.4542841911315918, "step": 7216 }, { "epoch": 1.3138254300536998, "grad_norm": 16.875, "learning_rate": 6.809551123507951e-06, "loss": 1.584712028503418, "step": 7218 }, { "epoch": 1.314189496677892, "grad_norm": 12.9375, "learning_rate": 6.8080504769769725e-06, "loss": 1.3592419624328613, "step": 7220 }, { "epoch": 1.3145535633020842, "grad_norm": 8.3125, "learning_rate": 6.80654971186786e-06, "loss": 1.3977789878845215, "step": 7222 }, { "epoch": 1.3149176299262766, "grad_norm": 7.09375, "learning_rate": 6.805048828400849e-06, "loss": 1.1130414009094238, "step": 7224 }, { "epoch": 1.3152816965504688, "grad_norm": 12.9375, "learning_rate": 6.803547826796182e-06, "loss": 1.3731980323791504, "step": 7226 }, { "epoch": 1.315645763174661, "grad_norm": 9.125, "learning_rate": 6.802046707274128e-06, "loss": 1.3402268886566162, "step": 7228 }, { "epoch": 1.3160098297988532, "grad_norm": 6.09375, "learning_rate": 6.800545470054971e-06, "loss": 1.042785406112671, "step": 7230 }, { "epoch": 1.3163738964230454, "grad_norm": 18.625, "learning_rate": 6.79904411535901e-06, "loss": 1.2189916372299194, "step": 7232 }, { "epoch": 1.3167379630472378, "grad_norm": 13.125, "learning_rate": 6.797542643406565e-06, "loss": 1.481350302696228, "step": 7234 }, { "epoch": 1.31710202967143, "grad_norm": 73.5, "learning_rate": 6.7960410544179674e-06, "loss": 1.6615064144134521, "step": 7236 }, { "epoch": 1.3174660962956222, "grad_norm": 17.75, "learning_rate": 6.794539348613571e-06, "loss": 1.7068772315979004, "step": 7238 }, { "epoch": 1.3178301629198144, "grad_norm": 11.5, "learning_rate": 6.793037526213746e-06, "loss": 1.9035048484802246, "step": 7240 }, { "epoch": 1.3181942295440066, "grad_norm": 3.5, "learning_rate": 6.791535587438878e-06, "loss": 1.2285220623016357, "step": 7242 }, { "epoch": 1.3185582961681988, "grad_norm": 22.75, "learning_rate": 6.79003353250937e-06, "loss": 1.0185660123825073, "step": 7244 }, { "epoch": 1.318922362792391, "grad_norm": 20.125, "learning_rate": 6.788531361645644e-06, "loss": 1.4466512203216553, "step": 7246 }, { "epoch": 1.3192864294165831, "grad_norm": 15.3125, "learning_rate": 6.787029075068135e-06, "loss": 1.2173423767089844, "step": 7248 }, { "epoch": 1.3196504960407753, "grad_norm": 13.0625, "learning_rate": 6.785526672997298e-06, "loss": 1.4120014905929565, "step": 7250 }, { "epoch": 1.3200145626649677, "grad_norm": 26.25, "learning_rate": 6.7840241556536064e-06, "loss": 1.1994625329971313, "step": 7252 }, { "epoch": 1.32037862928916, "grad_norm": 8.25, "learning_rate": 6.782521523257548e-06, "loss": 1.2093708515167236, "step": 7254 }, { "epoch": 1.3207426959133521, "grad_norm": 6.15625, "learning_rate": 6.781018776029626e-06, "loss": 1.0442777872085571, "step": 7256 }, { "epoch": 1.3211067625375443, "grad_norm": 12.4375, "learning_rate": 6.779515914190365e-06, "loss": 1.5330231189727783, "step": 7258 }, { "epoch": 1.3214708291617365, "grad_norm": 5.125, "learning_rate": 6.778012937960301e-06, "loss": 1.3253178596496582, "step": 7260 }, { "epoch": 1.321834895785929, "grad_norm": 10.8125, "learning_rate": 6.776509847559993e-06, "loss": 1.2081809043884277, "step": 7262 }, { "epoch": 1.3221989624101211, "grad_norm": 19.25, "learning_rate": 6.775006643210012e-06, "loss": 1.3396077156066895, "step": 7264 }, { "epoch": 1.3225630290343133, "grad_norm": 25.0, "learning_rate": 6.773503325130946e-06, "loss": 1.098000168800354, "step": 7266 }, { "epoch": 1.3229270956585055, "grad_norm": 21.0, "learning_rate": 6.771999893543401e-06, "loss": 0.40661901235580444, "step": 7268 }, { "epoch": 1.3232911622826977, "grad_norm": 9.4375, "learning_rate": 6.770496348668001e-06, "loss": 1.2449018955230713, "step": 7270 }, { "epoch": 1.32365522890689, "grad_norm": 10.625, "learning_rate": 6.768992690725384e-06, "loss": 1.5068029165267944, "step": 7272 }, { "epoch": 1.324019295531082, "grad_norm": 780.0, "learning_rate": 6.767488919936208e-06, "loss": 1.423985481262207, "step": 7274 }, { "epoch": 1.3243833621552743, "grad_norm": 15.625, "learning_rate": 6.765985036521143e-06, "loss": 1.6611640453338623, "step": 7276 }, { "epoch": 1.3247474287794667, "grad_norm": 8.625, "learning_rate": 6.764481040700877e-06, "loss": 1.4383623600006104, "step": 7278 }, { "epoch": 1.325111495403659, "grad_norm": 12.0, "learning_rate": 6.762976932696116e-06, "loss": 1.1659802198410034, "step": 7280 }, { "epoch": 1.325475562027851, "grad_norm": 17.0, "learning_rate": 6.7614727127275815e-06, "loss": 1.6228660345077515, "step": 7282 }, { "epoch": 1.3258396286520433, "grad_norm": 17.125, "learning_rate": 6.759968381016016e-06, "loss": 1.8521240949630737, "step": 7284 }, { "epoch": 1.3262036952762355, "grad_norm": 13.9375, "learning_rate": 6.7584639377821686e-06, "loss": 1.559242844581604, "step": 7286 }, { "epoch": 1.326567761900428, "grad_norm": 6.28125, "learning_rate": 6.75695938324681e-06, "loss": 1.2156175374984741, "step": 7288 }, { "epoch": 1.32693182852462, "grad_norm": 19.0, "learning_rate": 6.755454717630732e-06, "loss": 0.848728358745575, "step": 7290 }, { "epoch": 1.3272958951488123, "grad_norm": 17.0, "learning_rate": 6.753949941154734e-06, "loss": 0.6268037557601929, "step": 7292 }, { "epoch": 1.3276599617730045, "grad_norm": 20.5, "learning_rate": 6.7524450540396395e-06, "loss": 1.7513140439987183, "step": 7294 }, { "epoch": 1.3280240283971967, "grad_norm": 9.5, "learning_rate": 6.750940056506282e-06, "loss": 1.3441169261932373, "step": 7296 }, { "epoch": 1.3283880950213889, "grad_norm": 13.4375, "learning_rate": 6.749434948775514e-06, "loss": 1.365588903427124, "step": 7298 }, { "epoch": 1.328752161645581, "grad_norm": 53.0, "learning_rate": 6.747929731068205e-06, "loss": 1.261444330215454, "step": 7300 }, { "epoch": 1.3291162282697733, "grad_norm": 7.9375, "learning_rate": 6.746424403605238e-06, "loss": 1.058774709701538, "step": 7302 }, { "epoch": 1.3294802948939655, "grad_norm": 15.375, "learning_rate": 6.7449189666075166e-06, "loss": 0.8494008779525757, "step": 7304 }, { "epoch": 1.3298443615181579, "grad_norm": 22.375, "learning_rate": 6.7434134202959555e-06, "loss": 1.0898377895355225, "step": 7306 }, { "epoch": 1.33020842814235, "grad_norm": 27.5, "learning_rate": 6.7419077648914865e-06, "loss": 1.6385815143585205, "step": 7308 }, { "epoch": 1.3305724947665423, "grad_norm": 14.3125, "learning_rate": 6.740402000615061e-06, "loss": 1.5671327114105225, "step": 7310 }, { "epoch": 1.3309365613907345, "grad_norm": 16.375, "learning_rate": 6.738896127687642e-06, "loss": 1.370741605758667, "step": 7312 }, { "epoch": 1.3313006280149267, "grad_norm": 58.5, "learning_rate": 6.7373901463302096e-06, "loss": 1.8039097785949707, "step": 7314 }, { "epoch": 1.331664694639119, "grad_norm": 8.5, "learning_rate": 6.735884056763763e-06, "loss": 1.320874571800232, "step": 7316 }, { "epoch": 1.3320287612633113, "grad_norm": 12.75, "learning_rate": 6.734377859209313e-06, "loss": 1.1930052042007446, "step": 7318 }, { "epoch": 1.3323928278875035, "grad_norm": 29.0, "learning_rate": 6.732871553887888e-06, "loss": 0.7306788563728333, "step": 7320 }, { "epoch": 1.3327568945116957, "grad_norm": 2.5, "learning_rate": 6.731365141020531e-06, "loss": 0.8885293006896973, "step": 7322 }, { "epoch": 1.3331209611358878, "grad_norm": 8.3125, "learning_rate": 6.729858620828307e-06, "loss": 1.0472424030303955, "step": 7324 }, { "epoch": 1.33348502776008, "grad_norm": 19.375, "learning_rate": 6.728351993532287e-06, "loss": 1.5017237663269043, "step": 7326 }, { "epoch": 1.3338490943842722, "grad_norm": 15.75, "learning_rate": 6.726845259353563e-06, "loss": 1.2247464656829834, "step": 7328 }, { "epoch": 1.3342131610084644, "grad_norm": 17.75, "learning_rate": 6.725338418513243e-06, "loss": 1.053222894668579, "step": 7330 }, { "epoch": 1.3345772276326568, "grad_norm": 14.25, "learning_rate": 6.723831471232449e-06, "loss": 1.7148617506027222, "step": 7332 }, { "epoch": 1.334941294256849, "grad_norm": 10.0, "learning_rate": 6.722324417732321e-06, "loss": 0.9061832427978516, "step": 7334 }, { "epoch": 1.3353053608810412, "grad_norm": 620.0, "learning_rate": 6.720817258234014e-06, "loss": 1.288906216621399, "step": 7336 }, { "epoch": 1.3356694275052334, "grad_norm": 21.25, "learning_rate": 6.719309992958691e-06, "loss": 1.7107911109924316, "step": 7338 }, { "epoch": 1.3360334941294256, "grad_norm": 8.125, "learning_rate": 6.7178026221275435e-06, "loss": 1.1392858028411865, "step": 7340 }, { "epoch": 1.336397560753618, "grad_norm": 13.5625, "learning_rate": 6.716295145961771e-06, "loss": 1.528315544128418, "step": 7342 }, { "epoch": 1.3367616273778102, "grad_norm": 17.125, "learning_rate": 6.714787564682589e-06, "loss": 1.5959678888320923, "step": 7344 }, { "epoch": 1.3371256940020024, "grad_norm": 11.0625, "learning_rate": 6.713279878511227e-06, "loss": 1.3057136535644531, "step": 7346 }, { "epoch": 1.3374897606261946, "grad_norm": 40.75, "learning_rate": 6.711772087668935e-06, "loss": 1.4564380645751953, "step": 7348 }, { "epoch": 1.3378538272503868, "grad_norm": 10.9375, "learning_rate": 6.710264192376974e-06, "loss": 1.7661343812942505, "step": 7350 }, { "epoch": 1.338217893874579, "grad_norm": 109.5, "learning_rate": 6.70875619285662e-06, "loss": 1.1260857582092285, "step": 7352 }, { "epoch": 1.3385819604987712, "grad_norm": 13.75, "learning_rate": 6.7072480893291665e-06, "loss": 0.750861644744873, "step": 7354 }, { "epoch": 1.3389460271229634, "grad_norm": 13.375, "learning_rate": 6.705739882015925e-06, "loss": 1.440624713897705, "step": 7356 }, { "epoch": 1.3393100937471556, "grad_norm": 13.875, "learning_rate": 6.704231571138213e-06, "loss": 1.6715214252471924, "step": 7358 }, { "epoch": 1.339674160371348, "grad_norm": 7.5, "learning_rate": 6.702723156917372e-06, "loss": 1.191943883895874, "step": 7360 }, { "epoch": 1.3400382269955402, "grad_norm": 14.1875, "learning_rate": 6.701214639574758e-06, "loss": 1.1247186660766602, "step": 7362 }, { "epoch": 1.3404022936197324, "grad_norm": 10.1875, "learning_rate": 6.699706019331736e-06, "loss": 1.1612498760223389, "step": 7364 }, { "epoch": 1.3407663602439246, "grad_norm": 9.8125, "learning_rate": 6.698197296409692e-06, "loss": 1.2469960451126099, "step": 7366 }, { "epoch": 1.3411304268681168, "grad_norm": 23.0, "learning_rate": 6.696688471030023e-06, "loss": 1.6345014572143555, "step": 7368 }, { "epoch": 1.3414944934923092, "grad_norm": 54.25, "learning_rate": 6.695179543414144e-06, "loss": 1.997577428817749, "step": 7370 }, { "epoch": 1.3418585601165014, "grad_norm": 20.125, "learning_rate": 6.693670513783486e-06, "loss": 1.311200737953186, "step": 7372 }, { "epoch": 1.3422226267406936, "grad_norm": 54.0, "learning_rate": 6.69216138235949e-06, "loss": 1.2803378105163574, "step": 7374 }, { "epoch": 1.3425866933648858, "grad_norm": 35.0, "learning_rate": 6.690652149363619e-06, "loss": 2.219374179840088, "step": 7376 }, { "epoch": 1.342950759989078, "grad_norm": 22.75, "learning_rate": 6.68914281501734e-06, "loss": 1.1470637321472168, "step": 7378 }, { "epoch": 1.3433148266132702, "grad_norm": 34.75, "learning_rate": 6.687633379542148e-06, "loss": 0.8603953719139099, "step": 7380 }, { "epoch": 1.3436788932374624, "grad_norm": 10.4375, "learning_rate": 6.686123843159543e-06, "loss": 1.4480098485946655, "step": 7382 }, { "epoch": 1.3440429598616546, "grad_norm": 12.125, "learning_rate": 6.684614206091047e-06, "loss": 1.4918415546417236, "step": 7384 }, { "epoch": 1.344407026485847, "grad_norm": 21.25, "learning_rate": 6.683104468558188e-06, "loss": 1.3915154933929443, "step": 7386 }, { "epoch": 1.3447710931100392, "grad_norm": 74.0, "learning_rate": 6.681594630782518e-06, "loss": 1.5127147436141968, "step": 7388 }, { "epoch": 1.3451351597342314, "grad_norm": 16.0, "learning_rate": 6.680084692985598e-06, "loss": 1.4008169174194336, "step": 7390 }, { "epoch": 1.3454992263584236, "grad_norm": 6.3125, "learning_rate": 6.678574655389005e-06, "loss": 0.9470155835151672, "step": 7392 }, { "epoch": 1.3458632929826158, "grad_norm": 30.0, "learning_rate": 6.677064518214333e-06, "loss": 0.9067777991294861, "step": 7394 }, { "epoch": 1.3462273596068082, "grad_norm": 6.4375, "learning_rate": 6.675554281683185e-06, "loss": 0.7488901615142822, "step": 7396 }, { "epoch": 1.3465914262310004, "grad_norm": 7.53125, "learning_rate": 6.6740439460171845e-06, "loss": 1.3801857233047485, "step": 7398 }, { "epoch": 1.3469554928551926, "grad_norm": 11.75, "learning_rate": 6.672533511437966e-06, "loss": 1.4196380376815796, "step": 7400 }, { "epoch": 1.3473195594793848, "grad_norm": 16.625, "learning_rate": 6.6710229781671834e-06, "loss": 1.3047449588775635, "step": 7402 }, { "epoch": 1.347683626103577, "grad_norm": 12.0, "learning_rate": 6.669512346426495e-06, "loss": 1.254878282546997, "step": 7404 }, { "epoch": 1.3480476927277691, "grad_norm": 20.25, "learning_rate": 6.6680016164375834e-06, "loss": 1.1717556715011597, "step": 7406 }, { "epoch": 1.3484117593519613, "grad_norm": 4.375, "learning_rate": 6.666490788422142e-06, "loss": 0.6975076198577881, "step": 7408 }, { "epoch": 1.3487758259761535, "grad_norm": 9.9375, "learning_rate": 6.664979862601879e-06, "loss": 1.3274766206741333, "step": 7410 }, { "epoch": 1.3491398926003457, "grad_norm": 5.5, "learning_rate": 6.663468839198516e-06, "loss": 1.0753499269485474, "step": 7412 }, { "epoch": 1.3495039592245381, "grad_norm": 18.25, "learning_rate": 6.661957718433789e-06, "loss": 1.2462944984436035, "step": 7414 }, { "epoch": 1.3498680258487303, "grad_norm": 17.875, "learning_rate": 6.660446500529449e-06, "loss": 1.1585508584976196, "step": 7416 }, { "epoch": 1.3502320924729225, "grad_norm": 7.4375, "learning_rate": 6.658935185707262e-06, "loss": 0.9352970123291016, "step": 7418 }, { "epoch": 1.3505961590971147, "grad_norm": 11.875, "learning_rate": 6.657423774189009e-06, "loss": 1.4495834112167358, "step": 7420 }, { "epoch": 1.350960225721307, "grad_norm": 9.6875, "learning_rate": 6.655912266196481e-06, "loss": 1.2501970529556274, "step": 7422 }, { "epoch": 1.3513242923454993, "grad_norm": 11.25, "learning_rate": 6.654400661951483e-06, "loss": 1.217156171798706, "step": 7424 }, { "epoch": 1.3516883589696915, "grad_norm": 5.21875, "learning_rate": 6.652888961675843e-06, "loss": 1.2445909976959229, "step": 7426 }, { "epoch": 1.3520524255938837, "grad_norm": 17.375, "learning_rate": 6.651377165591393e-06, "loss": 1.4784386157989502, "step": 7428 }, { "epoch": 1.352416492218076, "grad_norm": 16.5, "learning_rate": 6.649865273919982e-06, "loss": 1.6582244634628296, "step": 7430 }, { "epoch": 1.352780558842268, "grad_norm": 21.0, "learning_rate": 6.648353286883477e-06, "loss": 1.6432421207427979, "step": 7432 }, { "epoch": 1.3531446254664603, "grad_norm": 18.75, "learning_rate": 6.646841204703755e-06, "loss": 0.926926851272583, "step": 7434 }, { "epoch": 1.3535086920906525, "grad_norm": 24.75, "learning_rate": 6.64532902760271e-06, "loss": 1.580348253250122, "step": 7436 }, { "epoch": 1.3538727587148447, "grad_norm": 17.125, "learning_rate": 6.643816755802241e-06, "loss": 2.0164577960968018, "step": 7438 }, { "epoch": 1.354236825339037, "grad_norm": 23.625, "learning_rate": 6.642304389524274e-06, "loss": 1.4507524967193604, "step": 7440 }, { "epoch": 1.3546008919632293, "grad_norm": 13.0, "learning_rate": 6.640791928990742e-06, "loss": 1.440001368522644, "step": 7442 }, { "epoch": 1.3549649585874215, "grad_norm": 12.875, "learning_rate": 6.6392793744235885e-06, "loss": 1.5932947397232056, "step": 7444 }, { "epoch": 1.3553290252116137, "grad_norm": 12.875, "learning_rate": 6.637766726044781e-06, "loss": 1.6701114177703857, "step": 7446 }, { "epoch": 1.3556930918358059, "grad_norm": 12.0, "learning_rate": 6.636253984076288e-06, "loss": 1.492882251739502, "step": 7448 }, { "epoch": 1.3560571584599983, "grad_norm": 14.8125, "learning_rate": 6.6347411487401035e-06, "loss": 1.1613523960113525, "step": 7450 }, { "epoch": 1.3564212250841905, "grad_norm": 21.75, "learning_rate": 6.633228220258228e-06, "loss": 1.4374372959136963, "step": 7452 }, { "epoch": 1.3567852917083827, "grad_norm": 17.375, "learning_rate": 6.6317151988526766e-06, "loss": 1.3111393451690674, "step": 7454 }, { "epoch": 1.3571493583325749, "grad_norm": 26.75, "learning_rate": 6.6302020847454805e-06, "loss": 1.4557454586029053, "step": 7456 }, { "epoch": 1.357513424956767, "grad_norm": 11.3125, "learning_rate": 6.628688878158681e-06, "loss": 1.6682789325714111, "step": 7458 }, { "epoch": 1.3578774915809593, "grad_norm": 11.875, "learning_rate": 6.627175579314338e-06, "loss": 1.2872165441513062, "step": 7460 }, { "epoch": 1.3582415582051515, "grad_norm": 18.375, "learning_rate": 6.625662188434518e-06, "loss": 1.525971531867981, "step": 7462 }, { "epoch": 1.3586056248293437, "grad_norm": 6.34375, "learning_rate": 6.624148705741311e-06, "loss": 1.0821521282196045, "step": 7464 }, { "epoch": 1.358969691453536, "grad_norm": 12.0, "learning_rate": 6.6226351314568095e-06, "loss": 0.4714857339859009, "step": 7466 }, { "epoch": 1.3593337580777283, "grad_norm": 9.125, "learning_rate": 6.621121465803124e-06, "loss": 1.1857128143310547, "step": 7468 }, { "epoch": 1.3596978247019205, "grad_norm": 25.125, "learning_rate": 6.619607709002383e-06, "loss": 1.5799140930175781, "step": 7470 }, { "epoch": 1.3600618913261127, "grad_norm": 13.6875, "learning_rate": 6.61809386127672e-06, "loss": 1.1052372455596924, "step": 7472 }, { "epoch": 1.3604259579503049, "grad_norm": 18.25, "learning_rate": 6.61657992284829e-06, "loss": 1.3505126237869263, "step": 7474 }, { "epoch": 1.3607900245744973, "grad_norm": 15.0, "learning_rate": 6.615065893939254e-06, "loss": 1.4402521848678589, "step": 7476 }, { "epoch": 1.3611540911986895, "grad_norm": 21.125, "learning_rate": 6.61355177477179e-06, "loss": 1.6833484172821045, "step": 7478 }, { "epoch": 1.3615181578228817, "grad_norm": 12.9375, "learning_rate": 6.612037565568088e-06, "loss": 1.5567283630371094, "step": 7480 }, { "epoch": 1.3618822244470739, "grad_norm": 10.125, "learning_rate": 6.610523266550357e-06, "loss": 0.9774007797241211, "step": 7482 }, { "epoch": 1.362246291071266, "grad_norm": 15.75, "learning_rate": 6.609008877940808e-06, "loss": 1.3714394569396973, "step": 7484 }, { "epoch": 1.3626103576954582, "grad_norm": 10.9375, "learning_rate": 6.607494399961677e-06, "loss": 0.5788627862930298, "step": 7486 }, { "epoch": 1.3629744243196504, "grad_norm": 10.875, "learning_rate": 6.605979832835203e-06, "loss": 1.324271559715271, "step": 7488 }, { "epoch": 1.3633384909438426, "grad_norm": 12.625, "learning_rate": 6.604465176783645e-06, "loss": 1.7733110189437866, "step": 7490 }, { "epoch": 1.3637025575680348, "grad_norm": 12.75, "learning_rate": 6.602950432029272e-06, "loss": 1.4646718502044678, "step": 7492 }, { "epoch": 1.3640666241922272, "grad_norm": 12.5625, "learning_rate": 6.601435598794366e-06, "loss": 1.0780293941497803, "step": 7494 }, { "epoch": 1.3644306908164194, "grad_norm": 14.3125, "learning_rate": 6.599920677301224e-06, "loss": 0.8972785472869873, "step": 7496 }, { "epoch": 1.3647947574406116, "grad_norm": 9.3125, "learning_rate": 6.5984056677721516e-06, "loss": 1.251992106437683, "step": 7498 }, { "epoch": 1.3651588240648038, "grad_norm": 2.625, "learning_rate": 6.596890570429475e-06, "loss": 1.0147993564605713, "step": 7500 }, { "epoch": 1.365522890688996, "grad_norm": 10.4375, "learning_rate": 6.595375385495526e-06, "loss": 1.1783654689788818, "step": 7502 }, { "epoch": 1.3658869573131884, "grad_norm": 58.0, "learning_rate": 6.593860113192652e-06, "loss": 1.3592965602874756, "step": 7504 }, { "epoch": 1.3662510239373806, "grad_norm": 8.1875, "learning_rate": 6.592344753743214e-06, "loss": 1.3051929473876953, "step": 7506 }, { "epoch": 1.3666150905615728, "grad_norm": 17.0, "learning_rate": 6.590829307369582e-06, "loss": 1.3840627670288086, "step": 7508 }, { "epoch": 1.366979157185765, "grad_norm": 18.625, "learning_rate": 6.589313774294144e-06, "loss": 1.184469223022461, "step": 7510 }, { "epoch": 1.3673432238099572, "grad_norm": 29.625, "learning_rate": 6.5877981547392985e-06, "loss": 1.4012384414672852, "step": 7512 }, { "epoch": 1.3677072904341494, "grad_norm": 13.5625, "learning_rate": 6.586282448927456e-06, "loss": 1.3458597660064697, "step": 7514 }, { "epoch": 1.3680713570583416, "grad_norm": 18.375, "learning_rate": 6.584766657081041e-06, "loss": 0.9641464948654175, "step": 7516 }, { "epoch": 1.3684354236825338, "grad_norm": 11.75, "learning_rate": 6.583250779422485e-06, "loss": 1.3214313983917236, "step": 7518 }, { "epoch": 1.3687994903067262, "grad_norm": 7.625, "learning_rate": 6.581734816174244e-06, "loss": 1.255645513534546, "step": 7520 }, { "epoch": 1.3691635569309184, "grad_norm": 18.25, "learning_rate": 6.580218767558777e-06, "loss": 1.2830095291137695, "step": 7522 }, { "epoch": 1.3695276235551106, "grad_norm": 14.125, "learning_rate": 6.578702633798555e-06, "loss": 1.395407795906067, "step": 7524 }, { "epoch": 1.3698916901793028, "grad_norm": 14.375, "learning_rate": 6.5771864151160705e-06, "loss": 1.375747561454773, "step": 7526 }, { "epoch": 1.370255756803495, "grad_norm": 9.1875, "learning_rate": 6.575670111733814e-06, "loss": 1.302952527999878, "step": 7528 }, { "epoch": 1.3706198234276874, "grad_norm": 7.46875, "learning_rate": 6.574153723874304e-06, "loss": 1.146970272064209, "step": 7530 }, { "epoch": 1.3709838900518796, "grad_norm": 7.0625, "learning_rate": 6.572637251760061e-06, "loss": 1.2203412055969238, "step": 7532 }, { "epoch": 1.3713479566760718, "grad_norm": 10.1875, "learning_rate": 6.571120695613623e-06, "loss": 1.3703068494796753, "step": 7534 }, { "epoch": 1.371712023300264, "grad_norm": 15.8125, "learning_rate": 6.569604055657538e-06, "loss": 1.4244000911712646, "step": 7536 }, { "epoch": 1.3720760899244562, "grad_norm": 28.125, "learning_rate": 6.568087332114363e-06, "loss": 1.5603991746902466, "step": 7538 }, { "epoch": 1.3724401565486484, "grad_norm": 14.25, "learning_rate": 6.5665705252066765e-06, "loss": 1.2575956583023071, "step": 7540 }, { "epoch": 1.3728042231728406, "grad_norm": 15.5, "learning_rate": 6.5650536351570625e-06, "loss": 1.565503478050232, "step": 7542 }, { "epoch": 1.3731682897970328, "grad_norm": 10.8125, "learning_rate": 6.563536662188117e-06, "loss": 1.1888121366500854, "step": 7544 }, { "epoch": 1.373532356421225, "grad_norm": 15.125, "learning_rate": 6.562019606522449e-06, "loss": 0.9999662637710571, "step": 7546 }, { "epoch": 1.3738964230454174, "grad_norm": 7.25, "learning_rate": 6.560502468382682e-06, "loss": 1.159920334815979, "step": 7548 }, { "epoch": 1.3742604896696096, "grad_norm": 12.625, "learning_rate": 6.558985247991449e-06, "loss": 1.4574644565582275, "step": 7550 }, { "epoch": 1.3746245562938018, "grad_norm": 16.75, "learning_rate": 6.557467945571399e-06, "loss": 1.4080231189727783, "step": 7552 }, { "epoch": 1.374988622917994, "grad_norm": 15.375, "learning_rate": 6.555950561345184e-06, "loss": 1.7421507835388184, "step": 7554 }, { "epoch": 1.3753526895421861, "grad_norm": 12.75, "learning_rate": 6.554433095535479e-06, "loss": 1.697135090827942, "step": 7556 }, { "epoch": 1.3757167561663786, "grad_norm": 24.25, "learning_rate": 6.552915548364962e-06, "loss": 1.6388407945632935, "step": 7558 }, { "epoch": 1.3760808227905708, "grad_norm": 10.3125, "learning_rate": 6.551397920056331e-06, "loss": 1.2764458656311035, "step": 7560 }, { "epoch": 1.376444889414763, "grad_norm": 31.0, "learning_rate": 6.549880210832289e-06, "loss": 1.5978236198425293, "step": 7562 }, { "epoch": 1.3768089560389551, "grad_norm": 11.625, "learning_rate": 6.548362420915554e-06, "loss": 1.7277586460113525, "step": 7564 }, { "epoch": 1.3771730226631473, "grad_norm": 11.25, "learning_rate": 6.546844550528857e-06, "loss": 1.4749855995178223, "step": 7566 }, { "epoch": 1.3775370892873395, "grad_norm": 25.0, "learning_rate": 6.545326599894936e-06, "loss": 1.6118240356445312, "step": 7568 }, { "epoch": 1.3779011559115317, "grad_norm": 8.9375, "learning_rate": 6.543808569236549e-06, "loss": 0.887904167175293, "step": 7570 }, { "epoch": 1.378265222535724, "grad_norm": 9.75, "learning_rate": 6.542290458776459e-06, "loss": 0.5731221437454224, "step": 7572 }, { "epoch": 1.3786292891599163, "grad_norm": 3.15625, "learning_rate": 6.540772268737441e-06, "loss": 1.0332300662994385, "step": 7574 }, { "epoch": 1.3789933557841085, "grad_norm": 16.375, "learning_rate": 6.539253999342283e-06, "loss": 1.033430576324463, "step": 7576 }, { "epoch": 1.3793574224083007, "grad_norm": 15.625, "learning_rate": 6.537735650813788e-06, "loss": 1.3279283046722412, "step": 7578 }, { "epoch": 1.379721489032493, "grad_norm": 11.625, "learning_rate": 6.5362172233747635e-06, "loss": 1.3901052474975586, "step": 7580 }, { "epoch": 1.3800855556566851, "grad_norm": 16.125, "learning_rate": 6.534698717248036e-06, "loss": 1.350856900215149, "step": 7582 }, { "epoch": 1.3804496222808775, "grad_norm": 13.5625, "learning_rate": 6.533180132656438e-06, "loss": 0.8734549880027771, "step": 7584 }, { "epoch": 1.3808136889050697, "grad_norm": 5.875, "learning_rate": 6.531661469822817e-06, "loss": 1.246248722076416, "step": 7586 }, { "epoch": 1.381177755529262, "grad_norm": 11.1875, "learning_rate": 6.5301427289700284e-06, "loss": 1.71853768825531, "step": 7588 }, { "epoch": 1.3815418221534541, "grad_norm": 13.5, "learning_rate": 6.5286239103209435e-06, "loss": 1.510822057723999, "step": 7590 }, { "epoch": 1.3819058887776463, "grad_norm": 7.4375, "learning_rate": 6.5271050140984425e-06, "loss": 1.5353522300720215, "step": 7592 }, { "epoch": 1.3822699554018385, "grad_norm": 8.875, "learning_rate": 6.525586040525415e-06, "loss": 1.2981197834014893, "step": 7594 }, { "epoch": 1.3826340220260307, "grad_norm": 12.75, "learning_rate": 6.52406698982477e-06, "loss": 1.3658808469772339, "step": 7596 }, { "epoch": 1.382998088650223, "grad_norm": 8.3125, "learning_rate": 6.5225478622194146e-06, "loss": 1.2242591381072998, "step": 7598 }, { "epoch": 1.383362155274415, "grad_norm": 10.0625, "learning_rate": 6.52102865793228e-06, "loss": 0.9514259099960327, "step": 7600 }, { "epoch": 1.3837262218986075, "grad_norm": 13.4375, "learning_rate": 6.519509377186302e-06, "loss": 1.3797001838684082, "step": 7602 }, { "epoch": 1.3840902885227997, "grad_norm": 16.25, "learning_rate": 6.517990020204425e-06, "loss": 1.4543248414993286, "step": 7604 }, { "epoch": 1.3844543551469919, "grad_norm": 10.3125, "learning_rate": 6.516470587209616e-06, "loss": 1.024301290512085, "step": 7606 }, { "epoch": 1.384818421771184, "grad_norm": 8.875, "learning_rate": 6.514951078424838e-06, "loss": 1.0712511539459229, "step": 7608 }, { "epoch": 1.3851824883953763, "grad_norm": 28.375, "learning_rate": 6.513431494073077e-06, "loss": 1.6277577877044678, "step": 7610 }, { "epoch": 1.3855465550195687, "grad_norm": 25.875, "learning_rate": 6.511911834377326e-06, "loss": 1.533146619796753, "step": 7612 }, { "epoch": 1.3859106216437609, "grad_norm": 18.25, "learning_rate": 6.510392099560587e-06, "loss": 1.6026009321212769, "step": 7614 }, { "epoch": 1.386274688267953, "grad_norm": 17.125, "learning_rate": 6.508872289845878e-06, "loss": 1.763692855834961, "step": 7616 }, { "epoch": 1.3866387548921453, "grad_norm": 44.75, "learning_rate": 6.5073524054562185e-06, "loss": 1.4544414281845093, "step": 7618 }, { "epoch": 1.3870028215163375, "grad_norm": 9.875, "learning_rate": 6.505832446614653e-06, "loss": 1.4079960584640503, "step": 7620 }, { "epoch": 1.3873668881405297, "grad_norm": 11.8125, "learning_rate": 6.504312413544225e-06, "loss": 1.3743195533752441, "step": 7622 }, { "epoch": 1.3877309547647219, "grad_norm": 4.53125, "learning_rate": 6.502792306467993e-06, "loss": 1.0719960927963257, "step": 7624 }, { "epoch": 1.388095021388914, "grad_norm": 20.5, "learning_rate": 6.5012721256090306e-06, "loss": 1.5415546894073486, "step": 7626 }, { "epoch": 1.3884590880131065, "grad_norm": 18.75, "learning_rate": 6.499751871190412e-06, "loss": 1.4556267261505127, "step": 7628 }, { "epoch": 1.3888231546372987, "grad_norm": 12.4375, "learning_rate": 6.4982315434352314e-06, "loss": 1.4458072185516357, "step": 7630 }, { "epoch": 1.3891872212614909, "grad_norm": 15.4375, "learning_rate": 6.496711142566594e-06, "loss": 1.720502495765686, "step": 7632 }, { "epoch": 1.389551287885683, "grad_norm": 18.625, "learning_rate": 6.4951906688076075e-06, "loss": 1.0731414556503296, "step": 7634 }, { "epoch": 1.3899153545098752, "grad_norm": 17.5, "learning_rate": 6.4936701223813995e-06, "loss": 1.1338034868240356, "step": 7636 }, { "epoch": 1.3902794211340677, "grad_norm": 18.875, "learning_rate": 6.492149503511099e-06, "loss": 1.3427939414978027, "step": 7638 }, { "epoch": 1.3906434877582599, "grad_norm": 14.5, "learning_rate": 6.490628812419852e-06, "loss": 1.8454996347427368, "step": 7640 }, { "epoch": 1.391007554382452, "grad_norm": 10.3125, "learning_rate": 6.489108049330819e-06, "loss": 1.4115500450134277, "step": 7642 }, { "epoch": 1.3913716210066442, "grad_norm": 8.5, "learning_rate": 6.487587214467159e-06, "loss": 1.09733247756958, "step": 7644 }, { "epoch": 1.3917356876308364, "grad_norm": 11.8125, "learning_rate": 6.486066308052055e-06, "loss": 1.4169155359268188, "step": 7646 }, { "epoch": 1.3920997542550286, "grad_norm": 11.1875, "learning_rate": 6.484545330308686e-06, "loss": 1.3464170694351196, "step": 7648 }, { "epoch": 1.3924638208792208, "grad_norm": 8.1875, "learning_rate": 6.483024281460255e-06, "loss": 1.254568099975586, "step": 7650 }, { "epoch": 1.392827887503413, "grad_norm": 20.75, "learning_rate": 6.481503161729969e-06, "loss": 1.137978196144104, "step": 7652 }, { "epoch": 1.3931919541276052, "grad_norm": 18.625, "learning_rate": 6.479981971341045e-06, "loss": 1.923954963684082, "step": 7654 }, { "epoch": 1.3935560207517976, "grad_norm": 9.9375, "learning_rate": 6.478460710516712e-06, "loss": 1.641863226890564, "step": 7656 }, { "epoch": 1.3939200873759898, "grad_norm": 15.875, "learning_rate": 6.476939379480207e-06, "loss": 1.1812974214553833, "step": 7658 }, { "epoch": 1.394284154000182, "grad_norm": 18.125, "learning_rate": 6.4754179784547835e-06, "loss": 1.8159241676330566, "step": 7660 }, { "epoch": 1.3946482206243742, "grad_norm": 31.75, "learning_rate": 6.473896507663697e-06, "loss": 1.0422096252441406, "step": 7662 }, { "epoch": 1.3950122872485664, "grad_norm": 8.1875, "learning_rate": 6.472374967330217e-06, "loss": 1.3442343473434448, "step": 7664 }, { "epoch": 1.3953763538727588, "grad_norm": 16.5, "learning_rate": 6.470853357677629e-06, "loss": 1.4701565504074097, "step": 7666 }, { "epoch": 1.395740420496951, "grad_norm": 171.0, "learning_rate": 6.469331678929216e-06, "loss": 0.7476240396499634, "step": 7668 }, { "epoch": 1.3961044871211432, "grad_norm": 14.6875, "learning_rate": 6.467809931308281e-06, "loss": 0.70750492811203, "step": 7670 }, { "epoch": 1.3964685537453354, "grad_norm": 10.0, "learning_rate": 6.4662881150381356e-06, "loss": 1.3857115507125854, "step": 7672 }, { "epoch": 1.3968326203695276, "grad_norm": 17.25, "learning_rate": 6.464766230342099e-06, "loss": 1.4983041286468506, "step": 7674 }, { "epoch": 1.3971966869937198, "grad_norm": 13.0, "learning_rate": 6.463244277443501e-06, "loss": 1.8194019794464111, "step": 7676 }, { "epoch": 1.397560753617912, "grad_norm": 46.5, "learning_rate": 6.461722256565683e-06, "loss": 1.6128919124603271, "step": 7678 }, { "epoch": 1.3979248202421042, "grad_norm": 22.75, "learning_rate": 6.4602001679319944e-06, "loss": 0.8991611003875732, "step": 7680 }, { "epoch": 1.3982888868662966, "grad_norm": 8.875, "learning_rate": 6.458678011765798e-06, "loss": 1.4467920064926147, "step": 7682 }, { "epoch": 1.3986529534904888, "grad_norm": 8.625, "learning_rate": 6.457155788290459e-06, "loss": 1.2151093482971191, "step": 7684 }, { "epoch": 1.399017020114681, "grad_norm": 8.5625, "learning_rate": 6.455633497729365e-06, "loss": 1.107391595840454, "step": 7686 }, { "epoch": 1.3993810867388732, "grad_norm": 20.625, "learning_rate": 6.454111140305899e-06, "loss": 1.2695037126541138, "step": 7688 }, { "epoch": 1.3997451533630654, "grad_norm": 24.375, "learning_rate": 6.452588716243463e-06, "loss": 1.5658984184265137, "step": 7690 }, { "epoch": 1.4001092199872578, "grad_norm": 10.0, "learning_rate": 6.451066225765468e-06, "loss": 0.6073099374771118, "step": 7692 }, { "epoch": 1.40047328661145, "grad_norm": 9.3125, "learning_rate": 6.4495436690953324e-06, "loss": 1.4313074350357056, "step": 7694 }, { "epoch": 1.4008373532356422, "grad_norm": 9.25, "learning_rate": 6.448021046456487e-06, "loss": 1.3558878898620605, "step": 7696 }, { "epoch": 1.4012014198598344, "grad_norm": 16.25, "learning_rate": 6.4464983580723665e-06, "loss": 1.1899185180664062, "step": 7698 }, { "epoch": 1.4015654864840266, "grad_norm": 8.25, "learning_rate": 6.444975604166421e-06, "loss": 1.286263346672058, "step": 7700 }, { "epoch": 1.4019295531082188, "grad_norm": 12.875, "learning_rate": 6.44345278496211e-06, "loss": 1.10990571975708, "step": 7702 }, { "epoch": 1.402293619732411, "grad_norm": 59.0, "learning_rate": 6.4419299006829e-06, "loss": 1.305938482284546, "step": 7704 }, { "epoch": 1.4026576863566032, "grad_norm": 9.75, "learning_rate": 6.4404069515522686e-06, "loss": 1.478846788406372, "step": 7706 }, { "epoch": 1.4030217529807956, "grad_norm": 22.125, "learning_rate": 6.4388839377937e-06, "loss": 1.6847670078277588, "step": 7708 }, { "epoch": 1.4033858196049878, "grad_norm": 14.4375, "learning_rate": 6.437360859630692e-06, "loss": 1.8299840688705444, "step": 7710 }, { "epoch": 1.40374988622918, "grad_norm": 38.5, "learning_rate": 6.435837717286753e-06, "loss": 1.9381146430969238, "step": 7712 }, { "epoch": 1.4041139528533721, "grad_norm": 15.1875, "learning_rate": 6.434314510985393e-06, "loss": 1.411583662033081, "step": 7714 }, { "epoch": 1.4044780194775643, "grad_norm": 5.9375, "learning_rate": 6.432791240950141e-06, "loss": 1.4456018209457397, "step": 7716 }, { "epoch": 1.4048420861017568, "grad_norm": 9.3125, "learning_rate": 6.431267907404526e-06, "loss": 1.244000792503357, "step": 7718 }, { "epoch": 1.405206152725949, "grad_norm": 13.1875, "learning_rate": 6.429744510572093e-06, "loss": 1.2111845016479492, "step": 7720 }, { "epoch": 1.4055702193501411, "grad_norm": 12.625, "learning_rate": 6.428221050676398e-06, "loss": 1.2410328388214111, "step": 7722 }, { "epoch": 1.4059342859743333, "grad_norm": 25.25, "learning_rate": 6.426697527940997e-06, "loss": 1.1156731843948364, "step": 7724 }, { "epoch": 1.4062983525985255, "grad_norm": 9.125, "learning_rate": 6.4251739425894626e-06, "loss": 1.451101303100586, "step": 7726 }, { "epoch": 1.4066624192227177, "grad_norm": 12.1875, "learning_rate": 6.423650294845376e-06, "loss": 1.106182336807251, "step": 7728 }, { "epoch": 1.40702648584691, "grad_norm": 19.75, "learning_rate": 6.422126584932325e-06, "loss": 1.5805258750915527, "step": 7730 }, { "epoch": 1.4073905524711021, "grad_norm": 12.5, "learning_rate": 6.420602813073909e-06, "loss": 1.4265527725219727, "step": 7732 }, { "epoch": 1.4077546190952943, "grad_norm": 53.25, "learning_rate": 6.419078979493734e-06, "loss": 1.388393759727478, "step": 7734 }, { "epoch": 1.4081186857194867, "grad_norm": 15.4375, "learning_rate": 6.4175550844154175e-06, "loss": 1.3540140390396118, "step": 7736 }, { "epoch": 1.408482752343679, "grad_norm": 12.0625, "learning_rate": 6.416031128062585e-06, "loss": 1.3345158100128174, "step": 7738 }, { "epoch": 1.4088468189678711, "grad_norm": 39.25, "learning_rate": 6.4145071106588696e-06, "loss": 1.5851914882659912, "step": 7740 }, { "epoch": 1.4092108855920633, "grad_norm": 10.875, "learning_rate": 6.412983032427915e-06, "loss": 1.581083059310913, "step": 7742 }, { "epoch": 1.4095749522162555, "grad_norm": 3.390625, "learning_rate": 6.4114588935933764e-06, "loss": 1.36771559715271, "step": 7744 }, { "epoch": 1.409939018840448, "grad_norm": 10.0625, "learning_rate": 6.409934694378912e-06, "loss": 1.0774198770523071, "step": 7746 }, { "epoch": 1.4103030854646401, "grad_norm": 110.0, "learning_rate": 6.408410435008191e-06, "loss": 1.6050190925598145, "step": 7748 }, { "epoch": 1.4106671520888323, "grad_norm": 12.25, "learning_rate": 6.406886115704896e-06, "loss": 1.5230917930603027, "step": 7750 }, { "epoch": 1.4110312187130245, "grad_norm": 13.5625, "learning_rate": 6.40536173669271e-06, "loss": 1.3671441078186035, "step": 7752 }, { "epoch": 1.4113952853372167, "grad_norm": 11.5, "learning_rate": 6.403837298195333e-06, "loss": 1.7444974184036255, "step": 7754 }, { "epoch": 1.411759351961409, "grad_norm": 8.0, "learning_rate": 6.40231280043647e-06, "loss": 1.3938043117523193, "step": 7756 }, { "epoch": 1.412123418585601, "grad_norm": 13.875, "learning_rate": 6.400788243639833e-06, "loss": 1.1789401769638062, "step": 7758 }, { "epoch": 1.4124874852097933, "grad_norm": 11.8125, "learning_rate": 6.399263628029145e-06, "loss": 1.7180346250534058, "step": 7760 }, { "epoch": 1.4128515518339857, "grad_norm": 14.3125, "learning_rate": 6.397738953828139e-06, "loss": 1.3375813961029053, "step": 7762 }, { "epoch": 1.413215618458178, "grad_norm": 6.6875, "learning_rate": 6.396214221260553e-06, "loss": 1.2305355072021484, "step": 7764 }, { "epoch": 1.41357968508237, "grad_norm": 16.375, "learning_rate": 6.394689430550136e-06, "loss": 1.4293663501739502, "step": 7766 }, { "epoch": 1.4139437517065623, "grad_norm": 19.0, "learning_rate": 6.393164581920644e-06, "loss": 1.6117398738861084, "step": 7768 }, { "epoch": 1.4143078183307545, "grad_norm": 6.96875, "learning_rate": 6.391639675595842e-06, "loss": 1.616674780845642, "step": 7770 }, { "epoch": 1.414671884954947, "grad_norm": 22.25, "learning_rate": 6.3901147117995065e-06, "loss": 1.1837644577026367, "step": 7772 }, { "epoch": 1.415035951579139, "grad_norm": 14.0, "learning_rate": 6.388589690755418e-06, "loss": 1.2595970630645752, "step": 7774 }, { "epoch": 1.4154000182033313, "grad_norm": 14.3125, "learning_rate": 6.387064612687366e-06, "loss": 1.4133833646774292, "step": 7776 }, { "epoch": 1.4157640848275235, "grad_norm": 7.5, "learning_rate": 6.385539477819151e-06, "loss": 1.6039048433303833, "step": 7778 }, { "epoch": 1.4161281514517157, "grad_norm": 10.125, "learning_rate": 6.3840142863745806e-06, "loss": 1.2344555854797363, "step": 7780 }, { "epoch": 1.4164922180759079, "grad_norm": 14.875, "learning_rate": 6.3824890385774695e-06, "loss": 1.3861414194107056, "step": 7782 }, { "epoch": 1.4168562847001, "grad_norm": 6.1875, "learning_rate": 6.380963734651643e-06, "loss": 1.3523224592208862, "step": 7784 }, { "epoch": 1.4172203513242922, "grad_norm": 8.3125, "learning_rate": 6.379438374820932e-06, "loss": 1.0533229112625122, "step": 7786 }, { "epoch": 1.4175844179484844, "grad_norm": 16.125, "learning_rate": 6.377912959309176e-06, "loss": 1.3463441133499146, "step": 7788 }, { "epoch": 1.4179484845726769, "grad_norm": 6.625, "learning_rate": 6.376387488340225e-06, "loss": 1.2822659015655518, "step": 7790 }, { "epoch": 1.418312551196869, "grad_norm": 8.4375, "learning_rate": 6.374861962137937e-06, "loss": 1.2499428987503052, "step": 7792 }, { "epoch": 1.4186766178210612, "grad_norm": 8.375, "learning_rate": 6.373336380926175e-06, "loss": 1.30571448802948, "step": 7794 }, { "epoch": 1.4190406844452534, "grad_norm": 15.8125, "learning_rate": 6.37181074492881e-06, "loss": 1.4697508811950684, "step": 7796 }, { "epoch": 1.4194047510694456, "grad_norm": 70.0, "learning_rate": 6.370285054369728e-06, "loss": 1.6303260326385498, "step": 7798 }, { "epoch": 1.419768817693638, "grad_norm": 12.6875, "learning_rate": 6.368759309472814e-06, "loss": 1.372489333152771, "step": 7800 }, { "epoch": 1.4201328843178302, "grad_norm": 12.375, "learning_rate": 6.3672335104619654e-06, "loss": 1.3742971420288086, "step": 7802 }, { "epoch": 1.4204969509420224, "grad_norm": 9.8125, "learning_rate": 6.3657076575610865e-06, "loss": 1.2103904485702515, "step": 7804 }, { "epoch": 1.4208610175662146, "grad_norm": 4.59375, "learning_rate": 6.3641817509940915e-06, "loss": 0.9063478708267212, "step": 7806 }, { "epoch": 1.4212250841904068, "grad_norm": 4.09375, "learning_rate": 6.362655790984901e-06, "loss": 1.069659948348999, "step": 7808 }, { "epoch": 1.421589150814599, "grad_norm": 28.125, "learning_rate": 6.361129777757442e-06, "loss": 1.3138788938522339, "step": 7810 }, { "epoch": 1.4219532174387912, "grad_norm": 15.0625, "learning_rate": 6.3596037115356514e-06, "loss": 0.5693075656890869, "step": 7812 }, { "epoch": 1.4223172840629834, "grad_norm": 19.75, "learning_rate": 6.358077592543474e-06, "loss": 1.3636194467544556, "step": 7814 }, { "epoch": 1.4226813506871758, "grad_norm": 7.1875, "learning_rate": 6.356551421004862e-06, "loss": 1.2531205415725708, "step": 7816 }, { "epoch": 1.423045417311368, "grad_norm": 5.09375, "learning_rate": 6.355025197143773e-06, "loss": 0.9501316547393799, "step": 7818 }, { "epoch": 1.4234094839355602, "grad_norm": 18.125, "learning_rate": 6.3534989211841755e-06, "loss": 1.4885741472244263, "step": 7820 }, { "epoch": 1.4237735505597524, "grad_norm": 8.5625, "learning_rate": 6.351972593350044e-06, "loss": 1.7057766914367676, "step": 7822 }, { "epoch": 1.4241376171839446, "grad_norm": 9.0, "learning_rate": 6.35044621386536e-06, "loss": 1.0497283935546875, "step": 7824 }, { "epoch": 1.424501683808137, "grad_norm": 8.75, "learning_rate": 6.348919782954115e-06, "loss": 1.0681543350219727, "step": 7826 }, { "epoch": 1.4248657504323292, "grad_norm": 9.0, "learning_rate": 6.347393300840305e-06, "loss": 1.7147891521453857, "step": 7828 }, { "epoch": 1.4252298170565214, "grad_norm": 17.75, "learning_rate": 6.345866767747938e-06, "loss": 1.4910051822662354, "step": 7830 }, { "epoch": 1.4255938836807136, "grad_norm": 10.1875, "learning_rate": 6.344340183901023e-06, "loss": 1.5224055051803589, "step": 7832 }, { "epoch": 1.4259579503049058, "grad_norm": 33.25, "learning_rate": 6.342813549523581e-06, "loss": 0.6270468235015869, "step": 7834 }, { "epoch": 1.426322016929098, "grad_norm": 21.75, "learning_rate": 6.341286864839642e-06, "loss": 0.4331286549568176, "step": 7836 }, { "epoch": 1.4266860835532902, "grad_norm": 14.0, "learning_rate": 6.339760130073236e-06, "loss": 0.8436590433120728, "step": 7838 }, { "epoch": 1.4270501501774824, "grad_norm": 13.0625, "learning_rate": 6.33823334544841e-06, "loss": 1.3369219303131104, "step": 7840 }, { "epoch": 1.4274142168016746, "grad_norm": 14.0, "learning_rate": 6.3367065111892105e-06, "loss": 1.5874592065811157, "step": 7842 }, { "epoch": 1.427778283425867, "grad_norm": 10.4375, "learning_rate": 6.335179627519694e-06, "loss": 1.8331938982009888, "step": 7844 }, { "epoch": 1.4281423500500592, "grad_norm": 18.125, "learning_rate": 6.333652694663928e-06, "loss": 1.39878511428833, "step": 7846 }, { "epoch": 1.4285064166742514, "grad_norm": 15.625, "learning_rate": 6.33212571284598e-06, "loss": 1.4116853475570679, "step": 7848 }, { "epoch": 1.4288704832984436, "grad_norm": 6.5, "learning_rate": 6.330598682289928e-06, "loss": 1.092564582824707, "step": 7850 }, { "epoch": 1.4292345499226358, "grad_norm": 4.65625, "learning_rate": 6.329071603219861e-06, "loss": 0.9261064529418945, "step": 7852 }, { "epoch": 1.4295986165468282, "grad_norm": 7.65625, "learning_rate": 6.32754447585987e-06, "loss": 1.3433537483215332, "step": 7854 }, { "epoch": 1.4299626831710204, "grad_norm": 8.0625, "learning_rate": 6.326017300434053e-06, "loss": 1.3853176832199097, "step": 7856 }, { "epoch": 1.4303267497952126, "grad_norm": 6.5, "learning_rate": 6.32449007716652e-06, "loss": 1.2959727048873901, "step": 7858 }, { "epoch": 1.4306908164194048, "grad_norm": 13.3125, "learning_rate": 6.322962806281383e-06, "loss": 1.3257619142532349, "step": 7860 }, { "epoch": 1.431054883043597, "grad_norm": 10.8125, "learning_rate": 6.321435488002763e-06, "loss": 1.3447543382644653, "step": 7862 }, { "epoch": 1.4314189496677892, "grad_norm": 5.34375, "learning_rate": 6.319908122554788e-06, "loss": 1.2987921237945557, "step": 7864 }, { "epoch": 1.4317830162919813, "grad_norm": 31.5, "learning_rate": 6.318380710161591e-06, "loss": 1.4172015190124512, "step": 7866 }, { "epoch": 1.4321470829161735, "grad_norm": 8.25, "learning_rate": 6.3168532510473165e-06, "loss": 1.053973913192749, "step": 7868 }, { "epoch": 1.432511149540366, "grad_norm": 10.8125, "learning_rate": 6.31532574543611e-06, "loss": 0.394927054643631, "step": 7870 }, { "epoch": 1.4328752161645582, "grad_norm": 17.25, "learning_rate": 6.31379819355213e-06, "loss": 1.2818745374679565, "step": 7872 }, { "epoch": 1.4332392827887503, "grad_norm": 3.625, "learning_rate": 6.312270595619536e-06, "loss": 1.3657610416412354, "step": 7874 }, { "epoch": 1.4336033494129425, "grad_norm": 43.5, "learning_rate": 6.310742951862498e-06, "loss": 1.3637645244598389, "step": 7876 }, { "epoch": 1.4339674160371347, "grad_norm": 16.25, "learning_rate": 6.309215262505191e-06, "loss": 1.5117475986480713, "step": 7878 }, { "epoch": 1.4343314826613272, "grad_norm": 10.3125, "learning_rate": 6.307687527771798e-06, "loss": 1.3734525442123413, "step": 7880 }, { "epoch": 1.4346955492855193, "grad_norm": 16.0, "learning_rate": 6.306159747886505e-06, "loss": 1.3569352626800537, "step": 7882 }, { "epoch": 1.4350596159097115, "grad_norm": 12.0625, "learning_rate": 6.304631923073512e-06, "loss": 1.6434990167617798, "step": 7884 }, { "epoch": 1.4354236825339037, "grad_norm": 21.0, "learning_rate": 6.303104053557019e-06, "loss": 1.9740383625030518, "step": 7886 }, { "epoch": 1.435787749158096, "grad_norm": 10.1875, "learning_rate": 6.301576139561232e-06, "loss": 1.5621310472488403, "step": 7888 }, { "epoch": 1.4361518157822881, "grad_norm": 15.4375, "learning_rate": 6.300048181310372e-06, "loss": 1.3611433506011963, "step": 7890 }, { "epoch": 1.4365158824064803, "grad_norm": 10.5625, "learning_rate": 6.2985201790286555e-06, "loss": 1.4110527038574219, "step": 7892 }, { "epoch": 1.4368799490306725, "grad_norm": 18.125, "learning_rate": 6.2969921329403115e-06, "loss": 1.5412523746490479, "step": 7894 }, { "epoch": 1.4372440156548647, "grad_norm": 15.5, "learning_rate": 6.295464043269577e-06, "loss": 1.3198649883270264, "step": 7896 }, { "epoch": 1.4376080822790571, "grad_norm": 29.25, "learning_rate": 6.293935910240691e-06, "loss": 1.452551007270813, "step": 7898 }, { "epoch": 1.4379721489032493, "grad_norm": 17.375, "learning_rate": 6.292407734077902e-06, "loss": 1.649959683418274, "step": 7900 }, { "epoch": 1.4383362155274415, "grad_norm": 6.28125, "learning_rate": 6.290879515005464e-06, "loss": 1.087284803390503, "step": 7902 }, { "epoch": 1.4387002821516337, "grad_norm": 11.9375, "learning_rate": 6.289351253247634e-06, "loss": 1.2501131296157837, "step": 7904 }, { "epoch": 1.439064348775826, "grad_norm": 20.625, "learning_rate": 6.287822949028682e-06, "loss": 1.6574360132217407, "step": 7906 }, { "epoch": 1.4394284154000183, "grad_norm": 11.6875, "learning_rate": 6.286294602572876e-06, "loss": 1.2489105463027954, "step": 7908 }, { "epoch": 1.4397924820242105, "grad_norm": 11.1875, "learning_rate": 6.284766214104499e-06, "loss": 1.785330057144165, "step": 7910 }, { "epoch": 1.4401565486484027, "grad_norm": 8.125, "learning_rate": 6.283237783847836e-06, "loss": 1.2349014282226562, "step": 7912 }, { "epoch": 1.440520615272595, "grad_norm": 8.375, "learning_rate": 6.281709312027173e-06, "loss": 0.9802764058113098, "step": 7914 }, { "epoch": 1.440884681896787, "grad_norm": 27.875, "learning_rate": 6.280180798866811e-06, "loss": 2.0262279510498047, "step": 7916 }, { "epoch": 1.4412487485209793, "grad_norm": 7.8125, "learning_rate": 6.2786522445910525e-06, "loss": 1.2292743921279907, "step": 7918 }, { "epoch": 1.4416128151451715, "grad_norm": 9.0, "learning_rate": 6.277123649424207e-06, "loss": 1.2832567691802979, "step": 7920 }, { "epoch": 1.4419768817693637, "grad_norm": 6.3125, "learning_rate": 6.27559501359059e-06, "loss": 1.0234166383743286, "step": 7922 }, { "epoch": 1.442340948393556, "grad_norm": 15.625, "learning_rate": 6.274066337314519e-06, "loss": 1.7633954286575317, "step": 7924 }, { "epoch": 1.4427050150177483, "grad_norm": 6.84375, "learning_rate": 6.2725376208203245e-06, "loss": 1.1534686088562012, "step": 7926 }, { "epoch": 1.4430690816419405, "grad_norm": 24.125, "learning_rate": 6.27100886433234e-06, "loss": 1.1233798265457153, "step": 7928 }, { "epoch": 1.4434331482661327, "grad_norm": 10.8125, "learning_rate": 6.269480068074902e-06, "loss": 1.4721591472625732, "step": 7930 }, { "epoch": 1.4437972148903249, "grad_norm": 25.625, "learning_rate": 6.267951232272356e-06, "loss": 1.6715428829193115, "step": 7932 }, { "epoch": 1.4441612815145173, "grad_norm": 13.0625, "learning_rate": 6.266422357149051e-06, "loss": 1.6308305263519287, "step": 7934 }, { "epoch": 1.4445253481387095, "grad_norm": 5.78125, "learning_rate": 6.264893442929347e-06, "loss": 1.2765209674835205, "step": 7936 }, { "epoch": 1.4448894147629017, "grad_norm": 6.28125, "learning_rate": 6.263364489837604e-06, "loss": 1.2898404598236084, "step": 7938 }, { "epoch": 1.4452534813870939, "grad_norm": 11.25, "learning_rate": 6.261835498098189e-06, "loss": 1.2175884246826172, "step": 7940 }, { "epoch": 1.445617548011286, "grad_norm": 6.53125, "learning_rate": 6.260306467935475e-06, "loss": 1.2522536516189575, "step": 7942 }, { "epoch": 1.4459816146354783, "grad_norm": 8.625, "learning_rate": 6.258777399573844e-06, "loss": 1.265086054801941, "step": 7944 }, { "epoch": 1.4463456812596704, "grad_norm": 11.875, "learning_rate": 6.2572482932376755e-06, "loss": 2.0765380859375, "step": 7946 }, { "epoch": 1.4467097478838626, "grad_norm": 8.375, "learning_rate": 6.255719149151362e-06, "loss": 1.2645471096038818, "step": 7948 }, { "epoch": 1.447073814508055, "grad_norm": 8.875, "learning_rate": 6.2541899675393015e-06, "loss": 1.412146806716919, "step": 7950 }, { "epoch": 1.4474378811322473, "grad_norm": 11.5, "learning_rate": 6.252660748625894e-06, "loss": 1.317734956741333, "step": 7952 }, { "epoch": 1.4478019477564394, "grad_norm": 8.1875, "learning_rate": 6.2511314926355445e-06, "loss": 1.4104976654052734, "step": 7954 }, { "epoch": 1.4481660143806316, "grad_norm": 12.5625, "learning_rate": 6.249602199792667e-06, "loss": 1.13209068775177, "step": 7956 }, { "epoch": 1.4485300810048238, "grad_norm": 14.0, "learning_rate": 6.248072870321677e-06, "loss": 1.178959608078003, "step": 7958 }, { "epoch": 1.448894147629016, "grad_norm": 61.25, "learning_rate": 6.246543504447e-06, "loss": 1.770942211151123, "step": 7960 }, { "epoch": 1.4492582142532084, "grad_norm": 5.78125, "learning_rate": 6.245014102393062e-06, "loss": 1.1800146102905273, "step": 7962 }, { "epoch": 1.4496222808774006, "grad_norm": 7.65625, "learning_rate": 6.243484664384299e-06, "loss": 1.2909984588623047, "step": 7964 }, { "epoch": 1.4499863475015928, "grad_norm": 12.4375, "learning_rate": 6.241955190645146e-06, "loss": 1.340165138244629, "step": 7966 }, { "epoch": 1.450350414125785, "grad_norm": 11.125, "learning_rate": 6.240425681400051e-06, "loss": 1.3229106664657593, "step": 7968 }, { "epoch": 1.4507144807499772, "grad_norm": 17.875, "learning_rate": 6.2388961368734604e-06, "loss": 1.4175243377685547, "step": 7970 }, { "epoch": 1.4510785473741694, "grad_norm": 13.4375, "learning_rate": 6.2373665572898304e-06, "loss": 1.316265344619751, "step": 7972 }, { "epoch": 1.4514426139983616, "grad_norm": 7.28125, "learning_rate": 6.235836942873619e-06, "loss": 1.2253687381744385, "step": 7974 }, { "epoch": 1.4518066806225538, "grad_norm": 13.9375, "learning_rate": 6.2343072938492935e-06, "loss": 1.4171786308288574, "step": 7976 }, { "epoch": 1.4521707472467462, "grad_norm": 10.0625, "learning_rate": 6.232777610441322e-06, "loss": 1.399134635925293, "step": 7978 }, { "epoch": 1.4525348138709384, "grad_norm": 7.6875, "learning_rate": 6.231247892874179e-06, "loss": 1.1127716302871704, "step": 7980 }, { "epoch": 1.4528988804951306, "grad_norm": 12.0, "learning_rate": 6.229718141372345e-06, "loss": 1.4004936218261719, "step": 7982 }, { "epoch": 1.4532629471193228, "grad_norm": 4.59375, "learning_rate": 6.228188356160305e-06, "loss": 1.0497952699661255, "step": 7984 }, { "epoch": 1.453627013743515, "grad_norm": 11.125, "learning_rate": 6.226658537462548e-06, "loss": 1.4026964902877808, "step": 7986 }, { "epoch": 1.4539910803677074, "grad_norm": 8.5, "learning_rate": 6.22512868550357e-06, "loss": 0.9647002220153809, "step": 7988 }, { "epoch": 1.4543551469918996, "grad_norm": 33.5, "learning_rate": 6.223598800507868e-06, "loss": 0.4743680953979492, "step": 7990 }, { "epoch": 1.4547192136160918, "grad_norm": 12.0, "learning_rate": 6.22206888269995e-06, "loss": 1.285109281539917, "step": 7992 }, { "epoch": 1.455083280240284, "grad_norm": 17.625, "learning_rate": 6.220538932304323e-06, "loss": 1.426268458366394, "step": 7994 }, { "epoch": 1.4554473468644762, "grad_norm": 49.5, "learning_rate": 6.219008949545502e-06, "loss": 1.6398017406463623, "step": 7996 }, { "epoch": 1.4558114134886684, "grad_norm": 7.15625, "learning_rate": 6.217478934648005e-06, "loss": 1.1265734434127808, "step": 7998 }, { "epoch": 1.4561754801128606, "grad_norm": 11.0625, "learning_rate": 6.215948887836354e-06, "loss": 1.5812140703201294, "step": 8000 }, { "epoch": 1.4565395467370528, "grad_norm": 11.0, "learning_rate": 6.214418809335081e-06, "loss": 2.0047152042388916, "step": 8002 }, { "epoch": 1.4569036133612452, "grad_norm": 47.75, "learning_rate": 6.212888699368715e-06, "loss": 1.3158432245254517, "step": 8004 }, { "epoch": 1.4572676799854374, "grad_norm": 21.375, "learning_rate": 6.211358558161796e-06, "loss": 1.5836806297302246, "step": 8006 }, { "epoch": 1.4576317466096296, "grad_norm": 16.875, "learning_rate": 6.209828385938865e-06, "loss": 1.3055001497268677, "step": 8008 }, { "epoch": 1.4579958132338218, "grad_norm": 17.125, "learning_rate": 6.2082981829244694e-06, "loss": 1.2016761302947998, "step": 8010 }, { "epoch": 1.458359879858014, "grad_norm": 10.375, "learning_rate": 6.2067679493431586e-06, "loss": 1.1596183776855469, "step": 8012 }, { "epoch": 1.4587239464822064, "grad_norm": 6.5625, "learning_rate": 6.205237685419489e-06, "loss": 1.5556707382202148, "step": 8014 }, { "epoch": 1.4590880131063986, "grad_norm": 15.625, "learning_rate": 6.203707391378022e-06, "loss": 1.308623194694519, "step": 8016 }, { "epoch": 1.4594520797305908, "grad_norm": 10.9375, "learning_rate": 6.2021770674433205e-06, "loss": 1.185746192932129, "step": 8018 }, { "epoch": 1.459816146354783, "grad_norm": 16.625, "learning_rate": 6.200646713839954e-06, "loss": 1.3245813846588135, "step": 8020 }, { "epoch": 1.4601802129789752, "grad_norm": 16.25, "learning_rate": 6.199116330792496e-06, "loss": 1.0862973928451538, "step": 8022 }, { "epoch": 1.4605442796031674, "grad_norm": 11.5, "learning_rate": 6.197585918525522e-06, "loss": 1.0174875259399414, "step": 8024 }, { "epoch": 1.4609083462273595, "grad_norm": 19.125, "learning_rate": 6.196055477263616e-06, "loss": 1.508137822151184, "step": 8026 }, { "epoch": 1.4612724128515517, "grad_norm": 12.6875, "learning_rate": 6.194525007231362e-06, "loss": 1.5162885189056396, "step": 8028 }, { "epoch": 1.461636479475744, "grad_norm": 8.5625, "learning_rate": 6.192994508653352e-06, "loss": 1.4450267553329468, "step": 8030 }, { "epoch": 1.4620005460999363, "grad_norm": 4.90625, "learning_rate": 6.19146398175418e-06, "loss": 1.4267585277557373, "step": 8032 }, { "epoch": 1.4623646127241285, "grad_norm": 8.0625, "learning_rate": 6.189933426758446e-06, "loss": 1.2552216053009033, "step": 8034 }, { "epoch": 1.4627286793483207, "grad_norm": 13.1875, "learning_rate": 6.188402843890751e-06, "loss": 1.4184731245040894, "step": 8036 }, { "epoch": 1.463092745972513, "grad_norm": 8.4375, "learning_rate": 6.1868722333757e-06, "loss": 1.3659604787826538, "step": 8038 }, { "epoch": 1.4634568125967051, "grad_norm": 13.5625, "learning_rate": 6.1853415954379085e-06, "loss": 1.362208366394043, "step": 8040 }, { "epoch": 1.4638208792208975, "grad_norm": 17.375, "learning_rate": 6.183810930301988e-06, "loss": 1.3494126796722412, "step": 8042 }, { "epoch": 1.4641849458450897, "grad_norm": 13.0, "learning_rate": 6.182280238192558e-06, "loss": 1.3620030879974365, "step": 8044 }, { "epoch": 1.464549012469282, "grad_norm": 5.21875, "learning_rate": 6.180749519334242e-06, "loss": 1.3053460121154785, "step": 8046 }, { "epoch": 1.4649130790934741, "grad_norm": 9.875, "learning_rate": 6.179218773951667e-06, "loss": 1.2864794731140137, "step": 8048 }, { "epoch": 1.4652771457176663, "grad_norm": 8.9375, "learning_rate": 6.177688002269464e-06, "loss": 1.2111214399337769, "step": 8050 }, { "epoch": 1.4656412123418585, "grad_norm": 21.5, "learning_rate": 6.176157204512266e-06, "loss": 2.0363869667053223, "step": 8052 }, { "epoch": 1.4660052789660507, "grad_norm": 7.1875, "learning_rate": 6.174626380904711e-06, "loss": 1.0489566326141357, "step": 8054 }, { "epoch": 1.466369345590243, "grad_norm": 9.25, "learning_rate": 6.173095531671442e-06, "loss": 1.372649073600769, "step": 8056 }, { "epoch": 1.4667334122144353, "grad_norm": 7.1875, "learning_rate": 6.171564657037108e-06, "loss": 1.2606165409088135, "step": 8058 }, { "epoch": 1.4670974788386275, "grad_norm": 8.9375, "learning_rate": 6.170033757226353e-06, "loss": 1.0931357145309448, "step": 8060 }, { "epoch": 1.4674615454628197, "grad_norm": 15.8125, "learning_rate": 6.1685028324638365e-06, "loss": 1.344740867614746, "step": 8062 }, { "epoch": 1.467825612087012, "grad_norm": 6.5, "learning_rate": 6.166971882974209e-06, "loss": 1.2903242111206055, "step": 8064 }, { "epoch": 1.468189678711204, "grad_norm": 10.4375, "learning_rate": 6.1654409089821364e-06, "loss": 0.9969532489776611, "step": 8066 }, { "epoch": 1.4685537453353965, "grad_norm": 12.625, "learning_rate": 6.1639099107122795e-06, "loss": 1.8316736221313477, "step": 8068 }, { "epoch": 1.4689178119595887, "grad_norm": 26.0, "learning_rate": 6.162378888389308e-06, "loss": 1.5513944625854492, "step": 8070 }, { "epoch": 1.469281878583781, "grad_norm": 6.71875, "learning_rate": 6.1608478422378935e-06, "loss": 1.1155644655227661, "step": 8072 }, { "epoch": 1.469645945207973, "grad_norm": 6.75, "learning_rate": 6.159316772482709e-06, "loss": 1.168056607246399, "step": 8074 }, { "epoch": 1.4700100118321653, "grad_norm": 14.4375, "learning_rate": 6.157785679348434e-06, "loss": 1.2045965194702148, "step": 8076 }, { "epoch": 1.4703740784563575, "grad_norm": 17.75, "learning_rate": 6.156254563059749e-06, "loss": 1.296846628189087, "step": 8078 }, { "epoch": 1.4707381450805497, "grad_norm": 25.0, "learning_rate": 6.154723423841342e-06, "loss": 1.0326735973358154, "step": 8080 }, { "epoch": 1.4711022117047419, "grad_norm": 12.5625, "learning_rate": 6.153192261917899e-06, "loss": 1.5015560388565063, "step": 8082 }, { "epoch": 1.471466278328934, "grad_norm": 7.03125, "learning_rate": 6.151661077514113e-06, "loss": 1.3092989921569824, "step": 8084 }, { "epoch": 1.4718303449531265, "grad_norm": 29.375, "learning_rate": 6.150129870854677e-06, "loss": 1.1972593069076538, "step": 8086 }, { "epoch": 1.4721944115773187, "grad_norm": 6.5, "learning_rate": 6.148598642164292e-06, "loss": 1.0859806537628174, "step": 8088 }, { "epoch": 1.4725584782015109, "grad_norm": 7.4375, "learning_rate": 6.147067391667657e-06, "loss": 1.3672444820404053, "step": 8090 }, { "epoch": 1.472922544825703, "grad_norm": 12.5625, "learning_rate": 6.145536119589479e-06, "loss": 1.3686683177947998, "step": 8092 }, { "epoch": 1.4732866114498953, "grad_norm": 8.25, "learning_rate": 6.144004826154466e-06, "loss": 1.1499896049499512, "step": 8094 }, { "epoch": 1.4736506780740877, "grad_norm": 4.25, "learning_rate": 6.142473511587328e-06, "loss": 1.1893773078918457, "step": 8096 }, { "epoch": 1.4740147446982799, "grad_norm": 2.5625, "learning_rate": 6.140942176112779e-06, "loss": 0.8536308407783508, "step": 8098 }, { "epoch": 1.474378811322472, "grad_norm": 13.5625, "learning_rate": 6.139410819955538e-06, "loss": 0.30789417028427124, "step": 8100 }, { "epoch": 1.4747428779466643, "grad_norm": 13.9375, "learning_rate": 6.137879443340324e-06, "loss": 0.8554958701133728, "step": 8102 }, { "epoch": 1.4751069445708564, "grad_norm": 7.65625, "learning_rate": 6.136348046491859e-06, "loss": 1.2366546392440796, "step": 8104 }, { "epoch": 1.4754710111950486, "grad_norm": 31.625, "learning_rate": 6.134816629634872e-06, "loss": 1.4379687309265137, "step": 8106 }, { "epoch": 1.4758350778192408, "grad_norm": 10.3125, "learning_rate": 6.1332851929940904e-06, "loss": 1.5495352745056152, "step": 8108 }, { "epoch": 1.476199144443433, "grad_norm": 5.96875, "learning_rate": 6.131753736794248e-06, "loss": 1.2944271564483643, "step": 8110 }, { "epoch": 1.4765632110676254, "grad_norm": 4.1875, "learning_rate": 6.130222261260077e-06, "loss": 0.8558687567710876, "step": 8112 }, { "epoch": 1.4769272776918176, "grad_norm": 8.8125, "learning_rate": 6.128690766616317e-06, "loss": 1.3518931865692139, "step": 8114 }, { "epoch": 1.4772913443160098, "grad_norm": 12.8125, "learning_rate": 6.127159253087711e-06, "loss": 1.358198881149292, "step": 8116 }, { "epoch": 1.477655410940202, "grad_norm": 18.75, "learning_rate": 6.125627720898998e-06, "loss": 1.378767728805542, "step": 8118 }, { "epoch": 1.4780194775643942, "grad_norm": 20.625, "learning_rate": 6.124096170274925e-06, "loss": 1.7746188640594482, "step": 8120 }, { "epoch": 1.4783835441885866, "grad_norm": 10.625, "learning_rate": 6.122564601440244e-06, "loss": 1.2736667394638062, "step": 8122 }, { "epoch": 1.4787476108127788, "grad_norm": 58.75, "learning_rate": 6.121033014619704e-06, "loss": 1.10037100315094, "step": 8124 }, { "epoch": 1.479111677436971, "grad_norm": 9.8125, "learning_rate": 6.119501410038059e-06, "loss": 1.2981046438217163, "step": 8126 }, { "epoch": 1.4794757440611632, "grad_norm": 42.0, "learning_rate": 6.117969787920066e-06, "loss": 1.4311425685882568, "step": 8128 }, { "epoch": 1.4798398106853554, "grad_norm": 4.78125, "learning_rate": 6.116438148490487e-06, "loss": 0.9892849922180176, "step": 8130 }, { "epoch": 1.4802038773095476, "grad_norm": 4.59375, "learning_rate": 6.114906491974078e-06, "loss": 0.9240947961807251, "step": 8132 }, { "epoch": 1.4805679439337398, "grad_norm": 9.125, "learning_rate": 6.1133748185956095e-06, "loss": 1.1972553730010986, "step": 8134 }, { "epoch": 1.480932010557932, "grad_norm": 8.3125, "learning_rate": 6.111843128579846e-06, "loss": 1.4405138492584229, "step": 8136 }, { "epoch": 1.4812960771821242, "grad_norm": 17.125, "learning_rate": 6.110311422151556e-06, "loss": 1.4274497032165527, "step": 8138 }, { "epoch": 1.4816601438063166, "grad_norm": 9.75, "learning_rate": 6.108779699535512e-06, "loss": 1.306797742843628, "step": 8140 }, { "epoch": 1.4820242104305088, "grad_norm": 6.90625, "learning_rate": 6.107247960956486e-06, "loss": 1.5522130727767944, "step": 8142 }, { "epoch": 1.482388277054701, "grad_norm": 39.25, "learning_rate": 6.1057162066392595e-06, "loss": 1.1375278234481812, "step": 8144 }, { "epoch": 1.4827523436788932, "grad_norm": 8.125, "learning_rate": 6.104184436808607e-06, "loss": 1.2078139781951904, "step": 8146 }, { "epoch": 1.4831164103030854, "grad_norm": 14.9375, "learning_rate": 6.102652651689309e-06, "loss": 1.3695788383483887, "step": 8148 }, { "epoch": 1.4834804769272778, "grad_norm": 10.6875, "learning_rate": 6.1011208515061524e-06, "loss": 1.3216034173965454, "step": 8150 }, { "epoch": 1.48384454355147, "grad_norm": 21.375, "learning_rate": 6.09958903648392e-06, "loss": 1.2552473545074463, "step": 8152 }, { "epoch": 1.4842086101756622, "grad_norm": 17.375, "learning_rate": 6.098057206847401e-06, "loss": 1.4203650951385498, "step": 8154 }, { "epoch": 1.4845726767998544, "grad_norm": 16.125, "learning_rate": 6.096525362821382e-06, "loss": 1.2568223476409912, "step": 8156 }, { "epoch": 1.4849367434240466, "grad_norm": 14.9375, "learning_rate": 6.094993504630659e-06, "loss": 1.511087417602539, "step": 8158 }, { "epoch": 1.4853008100482388, "grad_norm": 51.5, "learning_rate": 6.093461632500024e-06, "loss": 1.5030088424682617, "step": 8160 }, { "epoch": 1.485664876672431, "grad_norm": 19.25, "learning_rate": 6.091929746654273e-06, "loss": 1.551119327545166, "step": 8162 }, { "epoch": 1.4860289432966232, "grad_norm": 14.9375, "learning_rate": 6.090397847318204e-06, "loss": 1.4594221115112305, "step": 8164 }, { "epoch": 1.4863930099208156, "grad_norm": 14.6875, "learning_rate": 6.088865934716617e-06, "loss": 1.4273381233215332, "step": 8166 }, { "epoch": 1.4867570765450078, "grad_norm": 11.0625, "learning_rate": 6.087334009074315e-06, "loss": 1.3513221740722656, "step": 8168 }, { "epoch": 1.4871211431692, "grad_norm": 9.375, "learning_rate": 6.0858020706161e-06, "loss": 1.3581629991531372, "step": 8170 }, { "epoch": 1.4874852097933922, "grad_norm": 12.625, "learning_rate": 6.0842701195667794e-06, "loss": 1.2398608922958374, "step": 8172 }, { "epoch": 1.4878492764175844, "grad_norm": 12.5, "learning_rate": 6.082738156151161e-06, "loss": 1.2210054397583008, "step": 8174 }, { "epoch": 1.4882133430417768, "grad_norm": 22.375, "learning_rate": 6.0812061805940525e-06, "loss": 0.874728798866272, "step": 8176 }, { "epoch": 1.488577409665969, "grad_norm": 7.40625, "learning_rate": 6.079674193120266e-06, "loss": 1.320002555847168, "step": 8178 }, { "epoch": 1.4889414762901612, "grad_norm": 13.875, "learning_rate": 6.078142193954615e-06, "loss": 1.4195284843444824, "step": 8180 }, { "epoch": 1.4893055429143534, "grad_norm": 33.25, "learning_rate": 6.076610183321914e-06, "loss": 1.6974737644195557, "step": 8182 }, { "epoch": 1.4896696095385455, "grad_norm": 14.5625, "learning_rate": 6.075078161446979e-06, "loss": 1.4675984382629395, "step": 8184 }, { "epoch": 1.4900336761627377, "grad_norm": 20.25, "learning_rate": 6.073546128554628e-06, "loss": 1.2003103494644165, "step": 8186 }, { "epoch": 1.49039774278693, "grad_norm": 19.875, "learning_rate": 6.072014084869682e-06, "loss": 0.804355263710022, "step": 8188 }, { "epoch": 1.4907618094111221, "grad_norm": 16.625, "learning_rate": 6.0704820306169584e-06, "loss": 1.2440123558044434, "step": 8190 }, { "epoch": 1.4911258760353143, "grad_norm": 9.9375, "learning_rate": 6.068949966021285e-06, "loss": 1.0877405405044556, "step": 8192 }, { "epoch": 1.4914899426595067, "grad_norm": 7.46875, "learning_rate": 6.067417891307481e-06, "loss": 1.4059909582138062, "step": 8194 }, { "epoch": 1.491854009283699, "grad_norm": 8.375, "learning_rate": 6.065885806700375e-06, "loss": 1.3072524070739746, "step": 8196 }, { "epoch": 1.4922180759078911, "grad_norm": 9.6875, "learning_rate": 6.064353712424795e-06, "loss": 1.1673481464385986, "step": 8198 }, { "epoch": 1.4925821425320833, "grad_norm": 10.625, "learning_rate": 6.062821608705568e-06, "loss": 1.4568266868591309, "step": 8200 }, { "epoch": 1.4929462091562755, "grad_norm": 42.5, "learning_rate": 6.061289495767525e-06, "loss": 1.3883720636367798, "step": 8202 }, { "epoch": 1.493310275780468, "grad_norm": 6.78125, "learning_rate": 6.0597573738354975e-06, "loss": 1.1405024528503418, "step": 8204 }, { "epoch": 1.4936743424046601, "grad_norm": 9.8125, "learning_rate": 6.058225243134315e-06, "loss": 1.4551286697387695, "step": 8206 }, { "epoch": 1.4940384090288523, "grad_norm": 20.0, "learning_rate": 6.056693103888816e-06, "loss": 1.2114101648330688, "step": 8208 }, { "epoch": 1.4944024756530445, "grad_norm": 10.375, "learning_rate": 6.055160956323834e-06, "loss": 0.6524553894996643, "step": 8210 }, { "epoch": 1.4947665422772367, "grad_norm": 22.0, "learning_rate": 6.0536288006642045e-06, "loss": 1.457849144935608, "step": 8212 }, { "epoch": 1.495130608901429, "grad_norm": 15.4375, "learning_rate": 6.052096637134766e-06, "loss": 1.4925663471221924, "step": 8214 }, { "epoch": 1.495494675525621, "grad_norm": 7.125, "learning_rate": 6.050564465960357e-06, "loss": 1.1473684310913086, "step": 8216 }, { "epoch": 1.4958587421498133, "grad_norm": 16.75, "learning_rate": 6.0490322873658165e-06, "loss": 1.2548805475234985, "step": 8218 }, { "epoch": 1.4962228087740057, "grad_norm": 10.0625, "learning_rate": 6.0475001015759896e-06, "loss": 1.2143735885620117, "step": 8220 }, { "epoch": 1.496586875398198, "grad_norm": 15.375, "learning_rate": 6.045967908815713e-06, "loss": 1.7000795602798462, "step": 8222 }, { "epoch": 1.49695094202239, "grad_norm": 15.9375, "learning_rate": 6.044435709309833e-06, "loss": 1.3217474222183228, "step": 8224 }, { "epoch": 1.4973150086465823, "grad_norm": 8.9375, "learning_rate": 6.042903503283191e-06, "loss": 1.2151466608047485, "step": 8226 }, { "epoch": 1.4976790752707745, "grad_norm": 8.75, "learning_rate": 6.041371290960635e-06, "loss": 1.0743054151535034, "step": 8228 }, { "epoch": 1.498043141894967, "grad_norm": 8.125, "learning_rate": 6.039839072567009e-06, "loss": 1.4411975145339966, "step": 8230 }, { "epoch": 1.498407208519159, "grad_norm": 8.8125, "learning_rate": 6.038306848327162e-06, "loss": 1.1911547183990479, "step": 8232 }, { "epoch": 1.4987712751433513, "grad_norm": 14.5, "learning_rate": 6.036774618465939e-06, "loss": 1.4692813158035278, "step": 8234 }, { "epoch": 1.4991353417675435, "grad_norm": 24.625, "learning_rate": 6.035242383208191e-06, "loss": 1.646880865097046, "step": 8236 }, { "epoch": 1.4994994083917357, "grad_norm": 12.5625, "learning_rate": 6.033710142778765e-06, "loss": 1.198011040687561, "step": 8238 }, { "epoch": 1.4998634750159279, "grad_norm": 16.25, "learning_rate": 6.032177897402511e-06, "loss": 1.742721676826477, "step": 8240 }, { "epoch": 1.50022754164012, "grad_norm": 19.0, "learning_rate": 6.030645647304283e-06, "loss": 1.8102439641952515, "step": 8242 }, { "epoch": 1.5005916082643123, "grad_norm": 11.5, "learning_rate": 6.029113392708931e-06, "loss": 1.396047592163086, "step": 8244 }, { "epoch": 1.5009556748885045, "grad_norm": 10.0625, "learning_rate": 6.027581133841305e-06, "loss": 1.1110010147094727, "step": 8246 }, { "epoch": 1.5013197415126969, "grad_norm": 29.375, "learning_rate": 6.02604887092626e-06, "loss": 1.4422245025634766, "step": 8248 }, { "epoch": 1.501683808136889, "grad_norm": 18.75, "learning_rate": 6.024516604188648e-06, "loss": 1.5606341361999512, "step": 8250 }, { "epoch": 1.5020478747610813, "grad_norm": 16.0, "learning_rate": 6.022984333853324e-06, "loss": 1.6321194171905518, "step": 8252 }, { "epoch": 1.5024119413852735, "grad_norm": 13.6875, "learning_rate": 6.021452060145143e-06, "loss": 1.39895761013031, "step": 8254 }, { "epoch": 1.5027760080094659, "grad_norm": 8.6875, "learning_rate": 6.0199197832889585e-06, "loss": 1.056370735168457, "step": 8256 }, { "epoch": 1.503140074633658, "grad_norm": 12.8125, "learning_rate": 6.0183875035096275e-06, "loss": 1.200910210609436, "step": 8258 }, { "epoch": 1.5035041412578503, "grad_norm": 7.59375, "learning_rate": 6.016855221032003e-06, "loss": 1.2097914218902588, "step": 8260 }, { "epoch": 1.5038682078820425, "grad_norm": 16.875, "learning_rate": 6.015322936080945e-06, "loss": 1.2440482378005981, "step": 8262 }, { "epoch": 1.5042322745062346, "grad_norm": 18.125, "learning_rate": 6.013790648881307e-06, "loss": 1.0889358520507812, "step": 8264 }, { "epoch": 1.5045963411304268, "grad_norm": 16.75, "learning_rate": 6.0122583596579475e-06, "loss": 1.4096873998641968, "step": 8266 }, { "epoch": 1.504960407754619, "grad_norm": 12.9375, "learning_rate": 6.010726068635724e-06, "loss": 1.7448532581329346, "step": 8268 }, { "epoch": 1.5053244743788112, "grad_norm": 13.8125, "learning_rate": 6.009193776039492e-06, "loss": 1.354616641998291, "step": 8270 }, { "epoch": 1.5056885410030034, "grad_norm": 9.9375, "learning_rate": 6.007661482094111e-06, "loss": 0.9735084176063538, "step": 8272 }, { "epoch": 1.5060526076271956, "grad_norm": 32.75, "learning_rate": 6.0061291870244395e-06, "loss": 1.3060473203659058, "step": 8274 }, { "epoch": 1.506416674251388, "grad_norm": 24.125, "learning_rate": 6.004596891055334e-06, "loss": 1.4042834043502808, "step": 8276 }, { "epoch": 1.5067807408755802, "grad_norm": 9.1875, "learning_rate": 6.003064594411654e-06, "loss": 1.3706773519515991, "step": 8278 }, { "epoch": 1.5071448074997724, "grad_norm": 7.09375, "learning_rate": 6.001532297318258e-06, "loss": 1.087712287902832, "step": 8280 }, { "epoch": 1.5075088741239648, "grad_norm": 8.625, "learning_rate": 6.000000000000001e-06, "loss": 1.2260565757751465, "step": 8282 }, { "epoch": 1.507872940748157, "grad_norm": 16.875, "learning_rate": 5.998467702681745e-06, "loss": 1.477408528327942, "step": 8284 }, { "epoch": 1.5082370073723492, "grad_norm": 9.3125, "learning_rate": 5.996935405588348e-06, "loss": 1.5100369453430176, "step": 8286 }, { "epoch": 1.5086010739965414, "grad_norm": 5.96875, "learning_rate": 5.995403108944667e-06, "loss": 1.24753999710083, "step": 8288 }, { "epoch": 1.5089651406207336, "grad_norm": 9.5625, "learning_rate": 5.993870812975563e-06, "loss": 1.4215635061264038, "step": 8290 }, { "epoch": 1.5093292072449258, "grad_norm": 8.9375, "learning_rate": 5.99233851790589e-06, "loss": 1.2854349613189697, "step": 8292 }, { "epoch": 1.509693273869118, "grad_norm": 7.34375, "learning_rate": 5.990806223960508e-06, "loss": 1.1699168682098389, "step": 8294 }, { "epoch": 1.5100573404933102, "grad_norm": 21.875, "learning_rate": 5.989273931364279e-06, "loss": 0.8238331079483032, "step": 8296 }, { "epoch": 1.5104214071175024, "grad_norm": 11.125, "learning_rate": 5.9877416403420545e-06, "loss": 1.2738925218582153, "step": 8298 }, { "epoch": 1.5107854737416946, "grad_norm": 14.4375, "learning_rate": 5.986209351118696e-06, "loss": 1.4510915279388428, "step": 8300 }, { "epoch": 1.511149540365887, "grad_norm": 25.0, "learning_rate": 5.984677063919058e-06, "loss": 1.7367358207702637, "step": 8302 }, { "epoch": 1.5115136069900792, "grad_norm": 9.0, "learning_rate": 5.983144778967998e-06, "loss": 1.017667531967163, "step": 8304 }, { "epoch": 1.5118776736142714, "grad_norm": 8.1875, "learning_rate": 5.9816124964903745e-06, "loss": 1.3267130851745605, "step": 8306 }, { "epoch": 1.5122417402384638, "grad_norm": 8.6875, "learning_rate": 5.980080216711043e-06, "loss": 0.8771546483039856, "step": 8308 }, { "epoch": 1.512605806862656, "grad_norm": 15.5, "learning_rate": 5.9785479398548595e-06, "loss": 0.9681273698806763, "step": 8310 }, { "epoch": 1.5129698734868482, "grad_norm": 15.6875, "learning_rate": 5.977015666146677e-06, "loss": 1.7618441581726074, "step": 8312 }, { "epoch": 1.5133339401110404, "grad_norm": 20.25, "learning_rate": 5.975483395811352e-06, "loss": 1.6499402523040771, "step": 8314 }, { "epoch": 1.5136980067352326, "grad_norm": 17.5, "learning_rate": 5.973951129073743e-06, "loss": 2.0083208084106445, "step": 8316 }, { "epoch": 1.5140620733594248, "grad_norm": 15.0, "learning_rate": 5.972418866158697e-06, "loss": 1.960012435913086, "step": 8318 }, { "epoch": 1.514426139983617, "grad_norm": 20.25, "learning_rate": 5.970886607291073e-06, "loss": 1.7332077026367188, "step": 8320 }, { "epoch": 1.5147902066078092, "grad_norm": 25.625, "learning_rate": 5.969354352695718e-06, "loss": 1.5385401248931885, "step": 8322 }, { "epoch": 1.5151542732320014, "grad_norm": 34.5, "learning_rate": 5.967822102597489e-06, "loss": 0.5154561400413513, "step": 8324 }, { "epoch": 1.5155183398561936, "grad_norm": 23.125, "learning_rate": 5.966289857221237e-06, "loss": 1.3367457389831543, "step": 8326 }, { "epoch": 1.5158824064803857, "grad_norm": 11.0, "learning_rate": 5.964757616791812e-06, "loss": 1.3716245889663696, "step": 8328 }, { "epoch": 1.5162464731045782, "grad_norm": 7.5, "learning_rate": 5.963225381534063e-06, "loss": 1.4495644569396973, "step": 8330 }, { "epoch": 1.5166105397287704, "grad_norm": 16.0, "learning_rate": 5.96169315167284e-06, "loss": 1.4499701261520386, "step": 8332 }, { "epoch": 1.5169746063529626, "grad_norm": 31.625, "learning_rate": 5.960160927432992e-06, "loss": 0.8963940143585205, "step": 8334 }, { "epoch": 1.517338672977155, "grad_norm": 12.4375, "learning_rate": 5.9586287090393666e-06, "loss": 1.534785509109497, "step": 8336 }, { "epoch": 1.5177027396013472, "grad_norm": 6.03125, "learning_rate": 5.957096496716811e-06, "loss": 1.3165816068649292, "step": 8338 }, { "epoch": 1.5180668062255394, "grad_norm": 7.375, "learning_rate": 5.955564290690171e-06, "loss": 1.3977460861206055, "step": 8340 }, { "epoch": 1.5184308728497315, "grad_norm": 11.1875, "learning_rate": 5.9540320911842895e-06, "loss": 0.9736803770065308, "step": 8342 }, { "epoch": 1.5187949394739237, "grad_norm": 20.125, "learning_rate": 5.9524998984240124e-06, "loss": 1.3769147396087646, "step": 8344 }, { "epoch": 1.519159006098116, "grad_norm": 41.0, "learning_rate": 5.950967712634185e-06, "loss": 1.4024406671524048, "step": 8346 }, { "epoch": 1.5195230727223081, "grad_norm": 5.15625, "learning_rate": 5.949435534039645e-06, "loss": 0.9985767602920532, "step": 8348 }, { "epoch": 1.5198871393465003, "grad_norm": 15.5625, "learning_rate": 5.947903362865237e-06, "loss": 1.3010436296463013, "step": 8350 }, { "epoch": 1.5202512059706925, "grad_norm": 21.75, "learning_rate": 5.9463711993357975e-06, "loss": 1.0750222206115723, "step": 8352 }, { "epoch": 1.5206152725948847, "grad_norm": 22.625, "learning_rate": 5.9448390436761674e-06, "loss": 1.5535142421722412, "step": 8354 }, { "epoch": 1.5209793392190771, "grad_norm": 14.8125, "learning_rate": 5.943306896111185e-06, "loss": 1.4182324409484863, "step": 8356 }, { "epoch": 1.5213434058432693, "grad_norm": 26.25, "learning_rate": 5.941774756865686e-06, "loss": 1.4964677095413208, "step": 8358 }, { "epoch": 1.5217074724674615, "grad_norm": 12.625, "learning_rate": 5.940242626164506e-06, "loss": 1.5679097175598145, "step": 8360 }, { "epoch": 1.522071539091654, "grad_norm": 13.625, "learning_rate": 5.938710504232476e-06, "loss": 1.6520633697509766, "step": 8362 }, { "epoch": 1.5224356057158461, "grad_norm": 22.625, "learning_rate": 5.937178391294433e-06, "loss": 1.4216349124908447, "step": 8364 }, { "epoch": 1.5227996723400383, "grad_norm": 17.25, "learning_rate": 5.935646287575208e-06, "loss": 1.4336011409759521, "step": 8366 }, { "epoch": 1.5231637389642305, "grad_norm": 8.3125, "learning_rate": 5.9341141932996275e-06, "loss": 1.4558292627334595, "step": 8368 }, { "epoch": 1.5235278055884227, "grad_norm": 36.25, "learning_rate": 5.932582108692522e-06, "loss": 1.4104833602905273, "step": 8370 }, { "epoch": 1.523891872212615, "grad_norm": 17.25, "learning_rate": 5.931050033978718e-06, "loss": 1.018526315689087, "step": 8372 }, { "epoch": 1.524255938836807, "grad_norm": 12.0625, "learning_rate": 5.929517969383043e-06, "loss": 1.8626515865325928, "step": 8374 }, { "epoch": 1.5246200054609993, "grad_norm": 7.75, "learning_rate": 5.927985915130321e-06, "loss": 1.7673389911651611, "step": 8376 }, { "epoch": 1.5249840720851915, "grad_norm": 14.375, "learning_rate": 5.926453871445373e-06, "loss": 1.1028656959533691, "step": 8378 }, { "epoch": 1.5253481387093837, "grad_norm": 29.625, "learning_rate": 5.924921838553024e-06, "loss": 1.423374056816101, "step": 8380 }, { "epoch": 1.525712205333576, "grad_norm": 22.25, "learning_rate": 5.9233898166780865e-06, "loss": 1.524986743927002, "step": 8382 }, { "epoch": 1.5260762719577683, "grad_norm": 28.625, "learning_rate": 5.921857806045385e-06, "loss": 1.250152826309204, "step": 8384 }, { "epoch": 1.5264403385819605, "grad_norm": 44.0, "learning_rate": 5.920325806879736e-06, "loss": 0.9956077933311462, "step": 8386 }, { "epoch": 1.5268044052061527, "grad_norm": 161.0, "learning_rate": 5.918793819405949e-06, "loss": 0.8774659037590027, "step": 8388 }, { "epoch": 1.527168471830345, "grad_norm": 5.6875, "learning_rate": 5.917261843848843e-06, "loss": 1.2939871549606323, "step": 8390 }, { "epoch": 1.5275325384545373, "grad_norm": 4.59375, "learning_rate": 5.915729880433223e-06, "loss": 1.26493501663208, "step": 8392 }, { "epoch": 1.5278966050787295, "grad_norm": 17.0, "learning_rate": 5.9141979293839e-06, "loss": 1.4402207136154175, "step": 8394 }, { "epoch": 1.5282606717029217, "grad_norm": 22.625, "learning_rate": 5.912665990925688e-06, "loss": 1.4345242977142334, "step": 8396 }, { "epoch": 1.5286247383271139, "grad_norm": 21.25, "learning_rate": 5.9111340652833844e-06, "loss": 1.6565146446228027, "step": 8398 }, { "epoch": 1.528988804951306, "grad_norm": 32.25, "learning_rate": 5.909602152681799e-06, "loss": 1.5919088125228882, "step": 8400 }, { "epoch": 1.5293528715754983, "grad_norm": 20.75, "learning_rate": 5.90807025334573e-06, "loss": 1.845825433731079, "step": 8402 }, { "epoch": 1.5297169381996905, "grad_norm": 10.9375, "learning_rate": 5.906538367499977e-06, "loss": 1.4942909479141235, "step": 8404 }, { "epoch": 1.5300810048238827, "grad_norm": 12.8125, "learning_rate": 5.905006495369343e-06, "loss": 1.358984351158142, "step": 8406 }, { "epoch": 1.5304450714480748, "grad_norm": 7.5625, "learning_rate": 5.903474637178619e-06, "loss": 1.354823350906372, "step": 8408 }, { "epoch": 1.5308091380722673, "grad_norm": 15.6875, "learning_rate": 5.901942793152603e-06, "loss": 1.3463715314865112, "step": 8410 }, { "epoch": 1.5311732046964595, "grad_norm": 14.3125, "learning_rate": 5.9004109635160814e-06, "loss": 1.397836446762085, "step": 8412 }, { "epoch": 1.5315372713206516, "grad_norm": 7.46875, "learning_rate": 5.898879148493848e-06, "loss": 1.4281895160675049, "step": 8414 }, { "epoch": 1.531901337944844, "grad_norm": 14.625, "learning_rate": 5.897347348310691e-06, "loss": 1.494858741760254, "step": 8416 }, { "epoch": 1.5322654045690363, "grad_norm": 13.125, "learning_rate": 5.895815563191396e-06, "loss": 1.4905179738998413, "step": 8418 }, { "epoch": 1.5326294711932285, "grad_norm": 2.828125, "learning_rate": 5.894283793360744e-06, "loss": 0.8018645644187927, "step": 8420 }, { "epoch": 1.5329935378174206, "grad_norm": 19.75, "learning_rate": 5.8927520390435145e-06, "loss": 0.3094245493412018, "step": 8422 }, { "epoch": 1.5333576044416128, "grad_norm": 49.0, "learning_rate": 5.89122030046449e-06, "loss": 0.5090566277503967, "step": 8424 }, { "epoch": 1.533721671065805, "grad_norm": 20.5, "learning_rate": 5.889688577848447e-06, "loss": 0.6525697708129883, "step": 8426 }, { "epoch": 1.5340857376899972, "grad_norm": 9.125, "learning_rate": 5.888156871420157e-06, "loss": 1.0942453145980835, "step": 8428 }, { "epoch": 1.5344498043141894, "grad_norm": 11.6875, "learning_rate": 5.886625181404393e-06, "loss": 1.3017857074737549, "step": 8430 }, { "epoch": 1.5348138709383816, "grad_norm": 18.375, "learning_rate": 5.885093508025923e-06, "loss": 1.3903698921203613, "step": 8432 }, { "epoch": 1.5351779375625738, "grad_norm": 11.6875, "learning_rate": 5.883561851509515e-06, "loss": 1.4669889211654663, "step": 8434 }, { "epoch": 1.5355420041867662, "grad_norm": 15.0625, "learning_rate": 5.882030212079936e-06, "loss": 1.559173583984375, "step": 8436 }, { "epoch": 1.5359060708109584, "grad_norm": 8.75, "learning_rate": 5.880498589961943e-06, "loss": 1.368179202079773, "step": 8438 }, { "epoch": 1.5362701374351506, "grad_norm": 9.375, "learning_rate": 5.878966985380299e-06, "loss": 1.5964009761810303, "step": 8440 }, { "epoch": 1.5366342040593428, "grad_norm": 8.5, "learning_rate": 5.877435398559759e-06, "loss": 1.2057523727416992, "step": 8442 }, { "epoch": 1.5369982706835352, "grad_norm": 14.5, "learning_rate": 5.875903829725076e-06, "loss": 1.5160874128341675, "step": 8444 }, { "epoch": 1.5373623373077274, "grad_norm": 8.5625, "learning_rate": 5.8743722791010036e-06, "loss": 1.3408002853393555, "step": 8446 }, { "epoch": 1.5377264039319196, "grad_norm": 40.25, "learning_rate": 5.872840746912292e-06, "loss": 1.550868034362793, "step": 8448 }, { "epoch": 1.5380904705561118, "grad_norm": 22.125, "learning_rate": 5.871309233383684e-06, "loss": 1.6083693504333496, "step": 8450 }, { "epoch": 1.538454537180304, "grad_norm": 6.71875, "learning_rate": 5.869777738739924e-06, "loss": 1.4470646381378174, "step": 8452 }, { "epoch": 1.5388186038044962, "grad_norm": 6.21875, "learning_rate": 5.868246263205753e-06, "loss": 1.283869743347168, "step": 8454 }, { "epoch": 1.5391826704286884, "grad_norm": 11.0, "learning_rate": 5.866714807005911e-06, "loss": 1.3350982666015625, "step": 8456 }, { "epoch": 1.5395467370528806, "grad_norm": 8.8125, "learning_rate": 5.86518337036513e-06, "loss": 1.3710887432098389, "step": 8458 }, { "epoch": 1.5399108036770728, "grad_norm": 13.3125, "learning_rate": 5.8636519535081435e-06, "loss": 1.0875675678253174, "step": 8460 }, { "epoch": 1.540274870301265, "grad_norm": 20.375, "learning_rate": 5.862120556659678e-06, "loss": 1.2588577270507812, "step": 8462 }, { "epoch": 1.5406389369254574, "grad_norm": 27.5, "learning_rate": 5.860589180044463e-06, "loss": 1.671203851699829, "step": 8464 }, { "epoch": 1.5410030035496496, "grad_norm": 13.6875, "learning_rate": 5.859057823887222e-06, "loss": 1.5473215579986572, "step": 8466 }, { "epoch": 1.5413670701738418, "grad_norm": 23.125, "learning_rate": 5.857526488412675e-06, "loss": 1.7330286502838135, "step": 8468 }, { "epoch": 1.5417311367980342, "grad_norm": 6.21875, "learning_rate": 5.855995173845537e-06, "loss": 1.4392908811569214, "step": 8470 }, { "epoch": 1.5420952034222264, "grad_norm": 12.5, "learning_rate": 5.854463880410523e-06, "loss": 1.416183590888977, "step": 8472 }, { "epoch": 1.5424592700464186, "grad_norm": 13.25, "learning_rate": 5.852932608332344e-06, "loss": 1.4640228748321533, "step": 8474 }, { "epoch": 1.5428233366706108, "grad_norm": 18.375, "learning_rate": 5.851401357835711e-06, "loss": 1.5139390230178833, "step": 8476 }, { "epoch": 1.543187403294803, "grad_norm": 30.125, "learning_rate": 5.8498701291453255e-06, "loss": 1.9712140560150146, "step": 8478 }, { "epoch": 1.5435514699189952, "grad_norm": 300.0, "learning_rate": 5.848338922485891e-06, "loss": 1.7092909812927246, "step": 8480 }, { "epoch": 1.5439155365431874, "grad_norm": 9.25, "learning_rate": 5.846807738082103e-06, "loss": 1.1048921346664429, "step": 8482 }, { "epoch": 1.5442796031673796, "grad_norm": 8.1875, "learning_rate": 5.84527657615866e-06, "loss": 1.4587063789367676, "step": 8484 }, { "epoch": 1.5446436697915717, "grad_norm": 8.4375, "learning_rate": 5.843745436940252e-06, "loss": 1.3519057035446167, "step": 8486 }, { "epoch": 1.545007736415764, "grad_norm": 11.875, "learning_rate": 5.842214320651569e-06, "loss": 1.1182470321655273, "step": 8488 }, { "epoch": 1.5453718030399564, "grad_norm": 11.125, "learning_rate": 5.840683227517294e-06, "loss": 0.3271283507347107, "step": 8490 }, { "epoch": 1.5457358696641486, "grad_norm": 9.6875, "learning_rate": 5.839152157762109e-06, "loss": 1.103482961654663, "step": 8492 }, { "epoch": 1.5460999362883407, "grad_norm": 16.0, "learning_rate": 5.837621111610693e-06, "loss": 1.3002212047576904, "step": 8494 }, { "epoch": 1.546464002912533, "grad_norm": 7.3125, "learning_rate": 5.8360900892877225e-06, "loss": 1.3260561227798462, "step": 8496 }, { "epoch": 1.5468280695367254, "grad_norm": 9.375, "learning_rate": 5.834559091017866e-06, "loss": 1.5359067916870117, "step": 8498 }, { "epoch": 1.5471921361609176, "grad_norm": 16.5, "learning_rate": 5.833028117025794e-06, "loss": 1.2190123796463013, "step": 8500 }, { "epoch": 1.5475562027851097, "grad_norm": 14.625, "learning_rate": 5.831497167536166e-06, "loss": 0.8445422649383545, "step": 8502 }, { "epoch": 1.547920269409302, "grad_norm": 6.46875, "learning_rate": 5.829966242773647e-06, "loss": 1.2735350131988525, "step": 8504 }, { "epoch": 1.5482843360334941, "grad_norm": 10.625, "learning_rate": 5.828435342962895e-06, "loss": 1.376702070236206, "step": 8506 }, { "epoch": 1.5486484026576863, "grad_norm": 6.0, "learning_rate": 5.826904468328558e-06, "loss": 1.2295076847076416, "step": 8508 }, { "epoch": 1.5490124692818785, "grad_norm": 20.5, "learning_rate": 5.8253736190952915e-06, "loss": 1.2841095924377441, "step": 8510 }, { "epoch": 1.5493765359060707, "grad_norm": 10.875, "learning_rate": 5.823842795487737e-06, "loss": 1.907101035118103, "step": 8512 }, { "epoch": 1.549740602530263, "grad_norm": 7.5625, "learning_rate": 5.822311997730538e-06, "loss": 1.339274525642395, "step": 8514 }, { "epoch": 1.550104669154455, "grad_norm": 7.71875, "learning_rate": 5.820781226048336e-06, "loss": 1.4191474914550781, "step": 8516 }, { "epoch": 1.5504687357786475, "grad_norm": 10.3125, "learning_rate": 5.819250480665759e-06, "loss": 1.308241844177246, "step": 8518 }, { "epoch": 1.5508328024028397, "grad_norm": 14.4375, "learning_rate": 5.817719761807445e-06, "loss": 1.6076023578643799, "step": 8520 }, { "epoch": 1.551196869027032, "grad_norm": 6.53125, "learning_rate": 5.816189069698015e-06, "loss": 1.2995057106018066, "step": 8522 }, { "epoch": 1.5515609356512243, "grad_norm": 8.25, "learning_rate": 5.814658404562093e-06, "loss": 1.04618501663208, "step": 8524 }, { "epoch": 1.5519250022754165, "grad_norm": 9.0625, "learning_rate": 5.813127766624301e-06, "loss": 1.06477952003479, "step": 8526 }, { "epoch": 1.5522890688996087, "grad_norm": 14.8125, "learning_rate": 5.811597156109252e-06, "loss": 1.6031912565231323, "step": 8528 }, { "epoch": 1.552653135523801, "grad_norm": 31.375, "learning_rate": 5.810066573241557e-06, "loss": 1.568178653717041, "step": 8530 }, { "epoch": 1.553017202147993, "grad_norm": 10.9375, "learning_rate": 5.80853601824582e-06, "loss": 1.361877679824829, "step": 8532 }, { "epoch": 1.5533812687721853, "grad_norm": 7.65625, "learning_rate": 5.807005491346649e-06, "loss": 1.37688410282135, "step": 8534 }, { "epoch": 1.5537453353963775, "grad_norm": 13.5625, "learning_rate": 5.8054749927686405e-06, "loss": 1.1899266242980957, "step": 8536 }, { "epoch": 1.5541094020205697, "grad_norm": 15.4375, "learning_rate": 5.803944522736387e-06, "loss": 0.8405632376670837, "step": 8538 }, { "epoch": 1.5544734686447619, "grad_norm": 14.625, "learning_rate": 5.802414081474481e-06, "loss": 1.0263118743896484, "step": 8540 }, { "epoch": 1.554837535268954, "grad_norm": 13.625, "learning_rate": 5.800883669207507e-06, "loss": 1.4692708253860474, "step": 8542 }, { "epoch": 1.5552016018931465, "grad_norm": 13.25, "learning_rate": 5.799353286160048e-06, "loss": 1.644458293914795, "step": 8544 }, { "epoch": 1.5555656685173387, "grad_norm": 34.75, "learning_rate": 5.797822932556681e-06, "loss": 1.3747681379318237, "step": 8546 }, { "epoch": 1.5559297351415309, "grad_norm": 11.5, "learning_rate": 5.796292608621978e-06, "loss": 1.0696145296096802, "step": 8548 }, { "epoch": 1.556293801765723, "grad_norm": 15.75, "learning_rate": 5.7947623145805135e-06, "loss": 0.958878755569458, "step": 8550 }, { "epoch": 1.5566578683899155, "grad_norm": 135.0, "learning_rate": 5.793232050656843e-06, "loss": 0.7434136867523193, "step": 8552 }, { "epoch": 1.5570219350141077, "grad_norm": 7.375, "learning_rate": 5.7917018170755326e-06, "loss": 0.9431828856468201, "step": 8554 }, { "epoch": 1.5573860016382999, "grad_norm": 27.0, "learning_rate": 5.790171614061136e-06, "loss": 0.9860231876373291, "step": 8556 }, { "epoch": 1.557750068262492, "grad_norm": 17.75, "learning_rate": 5.788641441838205e-06, "loss": 1.4978352785110474, "step": 8558 }, { "epoch": 1.5581141348866843, "grad_norm": 119.5, "learning_rate": 5.787111300631288e-06, "loss": 1.4909592866897583, "step": 8560 }, { "epoch": 1.5584782015108765, "grad_norm": 15.4375, "learning_rate": 5.785581190664922e-06, "loss": 1.4978018999099731, "step": 8562 }, { "epoch": 1.5588422681350687, "grad_norm": 10.375, "learning_rate": 5.784051112163647e-06, "loss": 1.31137216091156, "step": 8564 }, { "epoch": 1.5592063347592608, "grad_norm": 19.625, "learning_rate": 5.782521065351998e-06, "loss": 1.5296207666397095, "step": 8566 }, { "epoch": 1.559570401383453, "grad_norm": 11.8125, "learning_rate": 5.780991050454501e-06, "loss": 1.3518354892730713, "step": 8568 }, { "epoch": 1.5599344680076452, "grad_norm": 34.75, "learning_rate": 5.77946106769568e-06, "loss": 2.0326223373413086, "step": 8570 }, { "epoch": 1.5602985346318377, "grad_norm": 18.875, "learning_rate": 5.7779311173000516e-06, "loss": 1.4092392921447754, "step": 8572 }, { "epoch": 1.5606626012560298, "grad_norm": 17.5, "learning_rate": 5.776401199492132e-06, "loss": 1.399479627609253, "step": 8574 }, { "epoch": 1.561026667880222, "grad_norm": 12.125, "learning_rate": 5.774871314496433e-06, "loss": 1.3655784130096436, "step": 8576 }, { "epoch": 1.5613907345044145, "grad_norm": 9.5625, "learning_rate": 5.773341462537454e-06, "loss": 1.3360631465911865, "step": 8578 }, { "epoch": 1.5617548011286067, "grad_norm": 12.5625, "learning_rate": 5.771811643839698e-06, "loss": 0.7992429733276367, "step": 8580 }, { "epoch": 1.5621188677527988, "grad_norm": 6.75, "learning_rate": 5.770281858627658e-06, "loss": 0.42511245608329773, "step": 8582 }, { "epoch": 1.562482934376991, "grad_norm": 22.125, "learning_rate": 5.768752107125822e-06, "loss": 1.5596882104873657, "step": 8584 }, { "epoch": 1.5628470010011832, "grad_norm": 112.5, "learning_rate": 5.7672223895586795e-06, "loss": 1.231987476348877, "step": 8586 }, { "epoch": 1.5632110676253754, "grad_norm": 14.5625, "learning_rate": 5.765692706150709e-06, "loss": 1.411910057067871, "step": 8588 }, { "epoch": 1.5635751342495676, "grad_norm": 10.625, "learning_rate": 5.764163057126384e-06, "loss": 1.0222399234771729, "step": 8590 }, { "epoch": 1.5639392008737598, "grad_norm": 8.3125, "learning_rate": 5.762633442710172e-06, "loss": 1.4847803115844727, "step": 8592 }, { "epoch": 1.564303267497952, "grad_norm": 5.125, "learning_rate": 5.7611038631265416e-06, "loss": 1.3583101034164429, "step": 8594 }, { "epoch": 1.5646673341221442, "grad_norm": 7.125, "learning_rate": 5.759574318599952e-06, "loss": 1.162548542022705, "step": 8596 }, { "epoch": 1.5650314007463366, "grad_norm": 9.0, "learning_rate": 5.758044809354857e-06, "loss": 1.2633427381515503, "step": 8598 }, { "epoch": 1.5653954673705288, "grad_norm": 13.0, "learning_rate": 5.756515335615704e-06, "loss": 1.4692398309707642, "step": 8600 }, { "epoch": 1.565759533994721, "grad_norm": 11.625, "learning_rate": 5.75498589760694e-06, "loss": 1.229184865951538, "step": 8602 }, { "epoch": 1.5661236006189134, "grad_norm": 18.25, "learning_rate": 5.753456495553e-06, "loss": 1.597200870513916, "step": 8604 }, { "epoch": 1.5664876672431056, "grad_norm": 31.375, "learning_rate": 5.7519271296783256e-06, "loss": 1.7171881198883057, "step": 8606 }, { "epoch": 1.5668517338672978, "grad_norm": 11.375, "learning_rate": 5.750397800207335e-06, "loss": 1.0852664709091187, "step": 8608 }, { "epoch": 1.56721580049149, "grad_norm": 8.375, "learning_rate": 5.748868507364458e-06, "loss": 1.3956425189971924, "step": 8610 }, { "epoch": 1.5675798671156822, "grad_norm": 7.5, "learning_rate": 5.7473392513741075e-06, "loss": 1.3740757703781128, "step": 8612 }, { "epoch": 1.5679439337398744, "grad_norm": 9.125, "learning_rate": 5.745810032460699e-06, "loss": 0.9973607063293457, "step": 8614 }, { "epoch": 1.5683080003640666, "grad_norm": 5.0625, "learning_rate": 5.744280850848638e-06, "loss": 1.200483798980713, "step": 8616 }, { "epoch": 1.5686720669882588, "grad_norm": 8.0625, "learning_rate": 5.7427517067623265e-06, "loss": 1.2188284397125244, "step": 8618 }, { "epoch": 1.569036133612451, "grad_norm": 6.625, "learning_rate": 5.741222600426159e-06, "loss": 1.246840476989746, "step": 8620 }, { "epoch": 1.5694002002366432, "grad_norm": 7.96875, "learning_rate": 5.739693532064527e-06, "loss": 1.0373082160949707, "step": 8622 }, { "epoch": 1.5697642668608354, "grad_norm": 11.8125, "learning_rate": 5.7381645019018125e-06, "loss": 1.562772512435913, "step": 8624 }, { "epoch": 1.5701283334850278, "grad_norm": 21.0, "learning_rate": 5.736635510162398e-06, "loss": 0.974455714225769, "step": 8626 }, { "epoch": 1.57049240010922, "grad_norm": 9.875, "learning_rate": 5.735106557070655e-06, "loss": 1.198800802230835, "step": 8628 }, { "epoch": 1.5708564667334122, "grad_norm": 9.8125, "learning_rate": 5.73357764285095e-06, "loss": 1.1682682037353516, "step": 8630 }, { "epoch": 1.5712205333576046, "grad_norm": 21.0, "learning_rate": 5.732048767727647e-06, "loss": 1.9496302604675293, "step": 8632 }, { "epoch": 1.5715845999817968, "grad_norm": 13.375, "learning_rate": 5.730519931925101e-06, "loss": 1.9157482385635376, "step": 8634 }, { "epoch": 1.571948666605989, "grad_norm": 4.15625, "learning_rate": 5.728991135667663e-06, "loss": 1.082655906677246, "step": 8636 }, { "epoch": 1.5723127332301812, "grad_norm": 5.34375, "learning_rate": 5.727462379179677e-06, "loss": 1.136138677597046, "step": 8638 }, { "epoch": 1.5726767998543734, "grad_norm": 12.5625, "learning_rate": 5.7259336626854835e-06, "loss": 1.2212103605270386, "step": 8640 }, { "epoch": 1.5730408664785656, "grad_norm": 53.25, "learning_rate": 5.724404986409413e-06, "loss": 1.5884782075881958, "step": 8642 }, { "epoch": 1.5734049331027578, "grad_norm": 7.875, "learning_rate": 5.722876350575794e-06, "loss": 1.4193819761276245, "step": 8644 }, { "epoch": 1.57376899972695, "grad_norm": 7.125, "learning_rate": 5.721347755408948e-06, "loss": 1.177306890487671, "step": 8646 }, { "epoch": 1.5741330663511421, "grad_norm": 16.5, "learning_rate": 5.7198192011331895e-06, "loss": 0.8277719020843506, "step": 8648 }, { "epoch": 1.5744971329753343, "grad_norm": 23.125, "learning_rate": 5.71829068797283e-06, "loss": 0.9947350025177002, "step": 8650 }, { "epoch": 1.5748611995995268, "grad_norm": 16.125, "learning_rate": 5.716762216152167e-06, "loss": 1.2280057668685913, "step": 8652 }, { "epoch": 1.575225266223719, "grad_norm": 13.5625, "learning_rate": 5.715233785895502e-06, "loss": 1.5317602157592773, "step": 8654 }, { "epoch": 1.5755893328479111, "grad_norm": 47.0, "learning_rate": 5.713705397427125e-06, "loss": 1.4001306295394897, "step": 8656 }, { "epoch": 1.5759533994721036, "grad_norm": 52.75, "learning_rate": 5.712177050971321e-06, "loss": 0.5568578243255615, "step": 8658 }, { "epoch": 1.5763174660962957, "grad_norm": 9.125, "learning_rate": 5.71064874675237e-06, "loss": 1.4954227209091187, "step": 8660 }, { "epoch": 1.576681532720488, "grad_norm": 8.25, "learning_rate": 5.709120484994539e-06, "loss": 1.3732085227966309, "step": 8662 }, { "epoch": 1.5770455993446801, "grad_norm": 4.40625, "learning_rate": 5.707592265922099e-06, "loss": 1.2604023218154907, "step": 8664 }, { "epoch": 1.5774096659688723, "grad_norm": 8.0, "learning_rate": 5.706064089759311e-06, "loss": 1.3800157308578491, "step": 8666 }, { "epoch": 1.5777737325930645, "grad_norm": 13.5, "learning_rate": 5.704535956730425e-06, "loss": 1.3275694847106934, "step": 8668 }, { "epoch": 1.5781377992172567, "grad_norm": 15.4375, "learning_rate": 5.703007867059691e-06, "loss": 1.7590510845184326, "step": 8670 }, { "epoch": 1.578501865841449, "grad_norm": 17.125, "learning_rate": 5.701479820971347e-06, "loss": 1.1481068134307861, "step": 8672 }, { "epoch": 1.578865932465641, "grad_norm": 13.875, "learning_rate": 5.6999518186896305e-06, "loss": 1.5052275657653809, "step": 8674 }, { "epoch": 1.5792299990898333, "grad_norm": 6.5625, "learning_rate": 5.698423860438769e-06, "loss": 1.2088903188705444, "step": 8676 }, { "epoch": 1.5795940657140257, "grad_norm": 4.09375, "learning_rate": 5.696895946442984e-06, "loss": 1.2658741474151611, "step": 8678 }, { "epoch": 1.579958132338218, "grad_norm": 3.984375, "learning_rate": 5.695368076926491e-06, "loss": 1.0872126817703247, "step": 8680 }, { "epoch": 1.58032219896241, "grad_norm": 36.25, "learning_rate": 5.693840252113496e-06, "loss": 1.4419200420379639, "step": 8682 }, { "epoch": 1.5806862655866023, "grad_norm": 5.1875, "learning_rate": 5.6923124722282034e-06, "loss": 1.0466389656066895, "step": 8684 }, { "epoch": 1.5810503322107947, "grad_norm": 19.25, "learning_rate": 5.690784737494811e-06, "loss": 1.0109425783157349, "step": 8686 }, { "epoch": 1.581414398834987, "grad_norm": 14.125, "learning_rate": 5.689257048137504e-06, "loss": 0.5340254902839661, "step": 8688 }, { "epoch": 1.581778465459179, "grad_norm": 14.6875, "learning_rate": 5.687729404380466e-06, "loss": 1.5743764638900757, "step": 8690 }, { "epoch": 1.5821425320833713, "grad_norm": 6.78125, "learning_rate": 5.686201806447872e-06, "loss": 1.295348882675171, "step": 8692 }, { "epoch": 1.5825065987075635, "grad_norm": 16.125, "learning_rate": 5.68467425456389e-06, "loss": 1.126971960067749, "step": 8694 }, { "epoch": 1.5828706653317557, "grad_norm": 12.875, "learning_rate": 5.6831467489526855e-06, "loss": 1.5214215517044067, "step": 8696 }, { "epoch": 1.5832347319559479, "grad_norm": 8.8125, "learning_rate": 5.681619289838412e-06, "loss": 1.789697289466858, "step": 8698 }, { "epoch": 1.58359879858014, "grad_norm": 6.40625, "learning_rate": 5.680091877445215e-06, "loss": 1.026115894317627, "step": 8700 }, { "epoch": 1.5839628652043323, "grad_norm": 10.125, "learning_rate": 5.67856451199724e-06, "loss": 1.6779255867004395, "step": 8702 }, { "epoch": 1.5843269318285245, "grad_norm": 12.5, "learning_rate": 5.677037193718617e-06, "loss": 1.417406678199768, "step": 8704 }, { "epoch": 1.5846909984527169, "grad_norm": 14.4375, "learning_rate": 5.675509922833482e-06, "loss": 1.6378275156021118, "step": 8706 }, { "epoch": 1.585055065076909, "grad_norm": 9.375, "learning_rate": 5.673982699565948e-06, "loss": 1.3963499069213867, "step": 8708 }, { "epoch": 1.5854191317011013, "grad_norm": 7.3125, "learning_rate": 5.672455524140133e-06, "loss": 1.308735966682434, "step": 8710 }, { "epoch": 1.5857831983252937, "grad_norm": 12.75, "learning_rate": 5.67092839678014e-06, "loss": 1.4711189270019531, "step": 8712 }, { "epoch": 1.5861472649494859, "grad_norm": 11.0625, "learning_rate": 5.669401317710073e-06, "loss": 1.260784387588501, "step": 8714 }, { "epoch": 1.586511331573678, "grad_norm": 13.25, "learning_rate": 5.667874287154023e-06, "loss": 1.3194794654846191, "step": 8716 }, { "epoch": 1.5868753981978703, "grad_norm": 12.5, "learning_rate": 5.666347305336075e-06, "loss": 1.4339884519577026, "step": 8718 }, { "epoch": 1.5872394648220625, "grad_norm": 9.125, "learning_rate": 5.664820372480306e-06, "loss": 1.392461895942688, "step": 8720 }, { "epoch": 1.5876035314462547, "grad_norm": 15.75, "learning_rate": 5.6632934888107915e-06, "loss": 1.5660393238067627, "step": 8722 }, { "epoch": 1.5879675980704469, "grad_norm": 85.0, "learning_rate": 5.6617666545515905e-06, "loss": 1.7725472450256348, "step": 8724 }, { "epoch": 1.588331664694639, "grad_norm": 17.0, "learning_rate": 5.660239869926764e-06, "loss": 1.525301456451416, "step": 8726 }, { "epoch": 1.5886957313188312, "grad_norm": 9.5625, "learning_rate": 5.658713135160361e-06, "loss": 1.3020918369293213, "step": 8728 }, { "epoch": 1.5890597979430234, "grad_norm": 7.15625, "learning_rate": 5.657186450476419e-06, "loss": 1.112493872642517, "step": 8730 }, { "epoch": 1.5894238645672158, "grad_norm": 6.5, "learning_rate": 5.65565981609898e-06, "loss": 1.3058762550354004, "step": 8732 }, { "epoch": 1.589787931191408, "grad_norm": 6.34375, "learning_rate": 5.6541332322520635e-06, "loss": 1.2176767587661743, "step": 8734 }, { "epoch": 1.5901519978156002, "grad_norm": 9.9375, "learning_rate": 5.652606699159696e-06, "loss": 1.2101428508758545, "step": 8736 }, { "epoch": 1.5905160644397924, "grad_norm": 10.3125, "learning_rate": 5.651080217045887e-06, "loss": 0.7322897911071777, "step": 8738 }, { "epoch": 1.5908801310639848, "grad_norm": 6.09375, "learning_rate": 5.649553786134642e-06, "loss": 1.0011916160583496, "step": 8740 }, { "epoch": 1.591244197688177, "grad_norm": 12.125, "learning_rate": 5.6480274066499585e-06, "loss": 1.5173137187957764, "step": 8742 }, { "epoch": 1.5916082643123692, "grad_norm": 14.625, "learning_rate": 5.646501078815826e-06, "loss": 1.4697926044464111, "step": 8744 }, { "epoch": 1.5919723309365614, "grad_norm": 20.625, "learning_rate": 5.644974802856229e-06, "loss": 1.6070770025253296, "step": 8746 }, { "epoch": 1.5923363975607536, "grad_norm": 23.5, "learning_rate": 5.64344857899514e-06, "loss": 1.5925300121307373, "step": 8748 }, { "epoch": 1.5927004641849458, "grad_norm": 14.1875, "learning_rate": 5.641922407456527e-06, "loss": 1.7517449855804443, "step": 8750 }, { "epoch": 1.593064530809138, "grad_norm": 33.0, "learning_rate": 5.640396288464349e-06, "loss": 1.6902092695236206, "step": 8752 }, { "epoch": 1.5934285974333302, "grad_norm": 10.25, "learning_rate": 5.638870222242558e-06, "loss": 1.4073898792266846, "step": 8754 }, { "epoch": 1.5937926640575224, "grad_norm": 7.125, "learning_rate": 5.637344209015101e-06, "loss": 1.3730113506317139, "step": 8756 }, { "epoch": 1.5941567306817146, "grad_norm": 9.0, "learning_rate": 5.635818249005911e-06, "loss": 1.0621342658996582, "step": 8758 }, { "epoch": 1.594520797305907, "grad_norm": 17.25, "learning_rate": 5.634292342438916e-06, "loss": 0.5995445847511292, "step": 8760 }, { "epoch": 1.5948848639300992, "grad_norm": 7.1875, "learning_rate": 5.632766489538037e-06, "loss": 1.5728943347930908, "step": 8762 }, { "epoch": 1.5952489305542914, "grad_norm": 11.0, "learning_rate": 5.631240690527189e-06, "loss": 1.1080061197280884, "step": 8764 }, { "epoch": 1.5956129971784838, "grad_norm": 18.0, "learning_rate": 5.629714945630274e-06, "loss": 1.4351915121078491, "step": 8766 }, { "epoch": 1.595977063802676, "grad_norm": 14.3125, "learning_rate": 5.62818925507119e-06, "loss": 1.8252661228179932, "step": 8768 }, { "epoch": 1.5963411304268682, "grad_norm": 9.1875, "learning_rate": 5.626663619073827e-06, "loss": 0.8909593224525452, "step": 8770 }, { "epoch": 1.5967051970510604, "grad_norm": 28.125, "learning_rate": 5.625138037862065e-06, "loss": 1.1606981754302979, "step": 8772 }, { "epoch": 1.5970692636752526, "grad_norm": 12.9375, "learning_rate": 5.623612511659775e-06, "loss": 1.5173490047454834, "step": 8774 }, { "epoch": 1.5974333302994448, "grad_norm": 31.375, "learning_rate": 5.622087040690824e-06, "loss": 1.4508112668991089, "step": 8776 }, { "epoch": 1.597797396923637, "grad_norm": 20.25, "learning_rate": 5.6205616251790704e-06, "loss": 0.8631260991096497, "step": 8778 }, { "epoch": 1.5981614635478292, "grad_norm": 15.0625, "learning_rate": 5.619036265348359e-06, "loss": 1.7663922309875488, "step": 8780 }, { "epoch": 1.5985255301720214, "grad_norm": 10.0, "learning_rate": 5.617510961422532e-06, "loss": 1.3401504755020142, "step": 8782 }, { "epoch": 1.5988895967962136, "grad_norm": 40.5, "learning_rate": 5.615985713625421e-06, "loss": 1.2212750911712646, "step": 8784 }, { "epoch": 1.599253663420406, "grad_norm": 15.1875, "learning_rate": 5.614460522180852e-06, "loss": 1.282362937927246, "step": 8786 }, { "epoch": 1.5996177300445982, "grad_norm": 9.375, "learning_rate": 5.6129353873126354e-06, "loss": 1.5193630456924438, "step": 8788 }, { "epoch": 1.5999817966687904, "grad_norm": 7.25, "learning_rate": 5.611410309244585e-06, "loss": 1.2703156471252441, "step": 8790 }, { "epoch": 1.6003458632929826, "grad_norm": 23.125, "learning_rate": 5.609885288200496e-06, "loss": 1.7064123153686523, "step": 8792 }, { "epoch": 1.600709929917175, "grad_norm": 12.375, "learning_rate": 5.608360324404158e-06, "loss": 1.427061676979065, "step": 8794 }, { "epoch": 1.6010739965413672, "grad_norm": 9.5625, "learning_rate": 5.606835418079358e-06, "loss": 1.0867226123809814, "step": 8796 }, { "epoch": 1.6014380631655594, "grad_norm": 9.4375, "learning_rate": 5.605310569449867e-06, "loss": 1.4145636558532715, "step": 8798 }, { "epoch": 1.6018021297897516, "grad_norm": 9.4375, "learning_rate": 5.603785778739449e-06, "loss": 1.1019208431243896, "step": 8800 }, { "epoch": 1.6021661964139438, "grad_norm": 17.125, "learning_rate": 5.602261046171863e-06, "loss": 1.4459761381149292, "step": 8802 }, { "epoch": 1.602530263038136, "grad_norm": 10.5, "learning_rate": 5.600736371970855e-06, "loss": 1.4090943336486816, "step": 8804 }, { "epoch": 1.6028943296623281, "grad_norm": 25.25, "learning_rate": 5.599211756360169e-06, "loss": 1.6040135622024536, "step": 8806 }, { "epoch": 1.6032583962865203, "grad_norm": 8.875, "learning_rate": 5.597687199563533e-06, "loss": 1.7145119905471802, "step": 8808 }, { "epoch": 1.6036224629107125, "grad_norm": 3.109375, "learning_rate": 5.596162701804669e-06, "loss": 0.9624874591827393, "step": 8810 }, { "epoch": 1.6039865295349047, "grad_norm": 7.8125, "learning_rate": 5.594638263307292e-06, "loss": 0.9814913272857666, "step": 8812 }, { "epoch": 1.6043505961590971, "grad_norm": 25.625, "learning_rate": 5.593113884295106e-06, "loss": 1.1439611911773682, "step": 8814 }, { "epoch": 1.6047146627832893, "grad_norm": 13.0625, "learning_rate": 5.591589564991811e-06, "loss": 1.449749231338501, "step": 8816 }, { "epoch": 1.6050787294074815, "grad_norm": 18.75, "learning_rate": 5.590065305621091e-06, "loss": 1.4518253803253174, "step": 8818 }, { "epoch": 1.605442796031674, "grad_norm": 14.3125, "learning_rate": 5.5885411064066256e-06, "loss": 1.2628220319747925, "step": 8820 }, { "epoch": 1.6058068626558661, "grad_norm": 15.5, "learning_rate": 5.5870169675720855e-06, "loss": 1.7324600219726562, "step": 8822 }, { "epoch": 1.6061709292800583, "grad_norm": 17.375, "learning_rate": 5.585492889341131e-06, "loss": 1.5263853073120117, "step": 8824 }, { "epoch": 1.6065349959042505, "grad_norm": 23.75, "learning_rate": 5.583968871937418e-06, "loss": 1.8368717432022095, "step": 8826 }, { "epoch": 1.6068990625284427, "grad_norm": 13.3125, "learning_rate": 5.582444915584584e-06, "loss": 1.5513668060302734, "step": 8828 }, { "epoch": 1.607263129152635, "grad_norm": 10.625, "learning_rate": 5.580921020506268e-06, "loss": 1.3387057781219482, "step": 8830 }, { "epoch": 1.6076271957768271, "grad_norm": 26.125, "learning_rate": 5.579397186926093e-06, "loss": 1.421623945236206, "step": 8832 }, { "epoch": 1.6079912624010193, "grad_norm": 23.125, "learning_rate": 5.5778734150676765e-06, "loss": 1.663716435432434, "step": 8834 }, { "epoch": 1.6083553290252115, "grad_norm": 41.25, "learning_rate": 5.576349705154626e-06, "loss": 1.5509027242660522, "step": 8836 }, { "epoch": 1.6087193956494037, "grad_norm": 8.3125, "learning_rate": 5.5748260574105394e-06, "loss": 0.9681510925292969, "step": 8838 }, { "epoch": 1.609083462273596, "grad_norm": 11.1875, "learning_rate": 5.573302472059005e-06, "loss": 0.9092290997505188, "step": 8840 }, { "epoch": 1.6094475288977883, "grad_norm": 8.25, "learning_rate": 5.571778949323605e-06, "loss": 0.5999207496643066, "step": 8842 }, { "epoch": 1.6098115955219805, "grad_norm": 9.9375, "learning_rate": 5.570255489427907e-06, "loss": 1.4470223188400269, "step": 8844 }, { "epoch": 1.610175662146173, "grad_norm": 13.125, "learning_rate": 5.568732092595476e-06, "loss": 1.619198203086853, "step": 8846 }, { "epoch": 1.610539728770365, "grad_norm": 11.0, "learning_rate": 5.567208759049862e-06, "loss": 1.3654470443725586, "step": 8848 }, { "epoch": 1.6109037953945573, "grad_norm": 10.8125, "learning_rate": 5.565685489014608e-06, "loss": 1.2100518941879272, "step": 8850 }, { "epoch": 1.6112678620187495, "grad_norm": 12.0625, "learning_rate": 5.564162282713249e-06, "loss": 1.229964017868042, "step": 8852 }, { "epoch": 1.6116319286429417, "grad_norm": 15.375, "learning_rate": 5.562639140369308e-06, "loss": 1.3368597030639648, "step": 8854 }, { "epoch": 1.6119959952671339, "grad_norm": 10.25, "learning_rate": 5.561116062206302e-06, "loss": 1.3706207275390625, "step": 8856 }, { "epoch": 1.612360061891326, "grad_norm": 21.5, "learning_rate": 5.5595930484477334e-06, "loss": 1.4467570781707764, "step": 8858 }, { "epoch": 1.6127241285155183, "grad_norm": 7.0625, "learning_rate": 5.558070099317103e-06, "loss": 1.3247259855270386, "step": 8860 }, { "epoch": 1.6130881951397105, "grad_norm": 5.78125, "learning_rate": 5.556547215037893e-06, "loss": 1.3324511051177979, "step": 8862 }, { "epoch": 1.6134522617639027, "grad_norm": 14.375, "learning_rate": 5.55502439583358e-06, "loss": 1.5234766006469727, "step": 8864 }, { "epoch": 1.6138163283880949, "grad_norm": 11.0, "learning_rate": 5.553501641927636e-06, "loss": 1.4986146688461304, "step": 8866 }, { "epoch": 1.6141803950122873, "grad_norm": 17.25, "learning_rate": 5.551978953543515e-06, "loss": 1.4198793172836304, "step": 8868 }, { "epoch": 1.6145444616364795, "grad_norm": 85.0, "learning_rate": 5.5504563309046695e-06, "loss": 1.4635794162750244, "step": 8870 }, { "epoch": 1.6149085282606717, "grad_norm": 18.375, "learning_rate": 5.548933774234533e-06, "loss": 0.9434354305267334, "step": 8872 }, { "epoch": 1.615272594884864, "grad_norm": 7.75, "learning_rate": 5.5474112837565385e-06, "loss": 1.0740314722061157, "step": 8874 }, { "epoch": 1.6156366615090563, "grad_norm": 22.0, "learning_rate": 5.545888859694104e-06, "loss": 1.0221545696258545, "step": 8876 }, { "epoch": 1.6160007281332485, "grad_norm": 16.25, "learning_rate": 5.544366502270637e-06, "loss": 1.4753661155700684, "step": 8878 }, { "epoch": 1.6163647947574407, "grad_norm": 9.5625, "learning_rate": 5.542844211709542e-06, "loss": 1.646446943283081, "step": 8880 }, { "epoch": 1.6167288613816329, "grad_norm": 8.3125, "learning_rate": 5.541321988234205e-06, "loss": 1.248424768447876, "step": 8882 }, { "epoch": 1.617092928005825, "grad_norm": 11.875, "learning_rate": 5.539799832068007e-06, "loss": 1.565284252166748, "step": 8884 }, { "epoch": 1.6174569946300172, "grad_norm": 28.875, "learning_rate": 5.538277743434319e-06, "loss": 1.7744112014770508, "step": 8886 }, { "epoch": 1.6178210612542094, "grad_norm": 11.3125, "learning_rate": 5.536755722556499e-06, "loss": 1.1932414770126343, "step": 8888 }, { "epoch": 1.6181851278784016, "grad_norm": 8.375, "learning_rate": 5.535233769657904e-06, "loss": 0.6376725435256958, "step": 8890 }, { "epoch": 1.6185491945025938, "grad_norm": 15.25, "learning_rate": 5.5337118849618664e-06, "loss": 1.260134220123291, "step": 8892 }, { "epoch": 1.6189132611267862, "grad_norm": 10.0625, "learning_rate": 5.532190068691719e-06, "loss": 1.6569552421569824, "step": 8894 }, { "epoch": 1.6192773277509784, "grad_norm": 4.625, "learning_rate": 5.5306683210707866e-06, "loss": 0.8247285485267639, "step": 8896 }, { "epoch": 1.6196413943751706, "grad_norm": 3.984375, "learning_rate": 5.529146642322374e-06, "loss": 1.441962718963623, "step": 8898 }, { "epoch": 1.620005460999363, "grad_norm": 3.34375, "learning_rate": 5.527625032669783e-06, "loss": 0.9892392158508301, "step": 8900 }, { "epoch": 1.6203695276235552, "grad_norm": 12.25, "learning_rate": 5.526103492336306e-06, "loss": 1.0255471467971802, "step": 8902 }, { "epoch": 1.6207335942477474, "grad_norm": 6.59375, "learning_rate": 5.5245820215452185e-06, "loss": 1.160487174987793, "step": 8904 }, { "epoch": 1.6210976608719396, "grad_norm": 6.09375, "learning_rate": 5.523060620519795e-06, "loss": 1.2020061016082764, "step": 8906 }, { "epoch": 1.6214617274961318, "grad_norm": 16.875, "learning_rate": 5.521539289483291e-06, "loss": 1.1621804237365723, "step": 8908 }, { "epoch": 1.621825794120324, "grad_norm": 9.5625, "learning_rate": 5.520018028658958e-06, "loss": 1.438092589378357, "step": 8910 }, { "epoch": 1.6221898607445162, "grad_norm": 20.125, "learning_rate": 5.518496838270034e-06, "loss": 1.1545531749725342, "step": 8912 }, { "epoch": 1.6225539273687084, "grad_norm": 1.859375, "learning_rate": 5.5169757185397456e-06, "loss": 0.8762395977973938, "step": 8914 }, { "epoch": 1.6229179939929006, "grad_norm": 16.125, "learning_rate": 5.515454669691316e-06, "loss": 1.1666585206985474, "step": 8916 }, { "epoch": 1.6232820606170928, "grad_norm": 10.5625, "learning_rate": 5.513933691947948e-06, "loss": 1.4057941436767578, "step": 8918 }, { "epoch": 1.6236461272412852, "grad_norm": 6.8125, "learning_rate": 5.512412785532841e-06, "loss": 1.4671525955200195, "step": 8920 }, { "epoch": 1.6240101938654774, "grad_norm": 12.9375, "learning_rate": 5.510891950669184e-06, "loss": 1.3448476791381836, "step": 8922 }, { "epoch": 1.6243742604896696, "grad_norm": 56.0, "learning_rate": 5.509371187580148e-06, "loss": 1.086970329284668, "step": 8924 }, { "epoch": 1.6247383271138618, "grad_norm": 8.625, "learning_rate": 5.507850496488904e-06, "loss": 0.7681834101676941, "step": 8926 }, { "epoch": 1.6251023937380542, "grad_norm": 12.25, "learning_rate": 5.506329877618603e-06, "loss": 1.516718864440918, "step": 8928 }, { "epoch": 1.6254664603622464, "grad_norm": 21.25, "learning_rate": 5.504809331192394e-06, "loss": 1.3979264497756958, "step": 8930 }, { "epoch": 1.6258305269864386, "grad_norm": 10.25, "learning_rate": 5.503288857433409e-06, "loss": 1.8341989517211914, "step": 8932 }, { "epoch": 1.6261945936106308, "grad_norm": 17.375, "learning_rate": 5.501768456564769e-06, "loss": 1.3230714797973633, "step": 8934 }, { "epoch": 1.626558660234823, "grad_norm": 26.875, "learning_rate": 5.500248128809591e-06, "loss": 1.917144775390625, "step": 8936 }, { "epoch": 1.6269227268590152, "grad_norm": 8.1875, "learning_rate": 5.498727874390972e-06, "loss": 1.2894903421401978, "step": 8938 }, { "epoch": 1.6272867934832074, "grad_norm": 10.875, "learning_rate": 5.497207693532008e-06, "loss": 1.4657738208770752, "step": 8940 }, { "epoch": 1.6276508601073996, "grad_norm": 16.5, "learning_rate": 5.495687586455778e-06, "loss": 1.2809555530548096, "step": 8942 }, { "epoch": 1.6280149267315918, "grad_norm": 7.65625, "learning_rate": 5.4941675533853485e-06, "loss": 0.8499621152877808, "step": 8944 }, { "epoch": 1.628378993355784, "grad_norm": 54.5, "learning_rate": 5.492647594543783e-06, "loss": 1.1398581266403198, "step": 8946 }, { "epoch": 1.6287430599799764, "grad_norm": 7.71875, "learning_rate": 5.491127710154125e-06, "loss": 1.3814674615859985, "step": 8948 }, { "epoch": 1.6291071266041686, "grad_norm": 13.5, "learning_rate": 5.4896079004394155e-06, "loss": 1.2310429811477661, "step": 8950 }, { "epoch": 1.6294711932283608, "grad_norm": 32.25, "learning_rate": 5.488088165622676e-06, "loss": 1.471536636352539, "step": 8952 }, { "epoch": 1.6298352598525532, "grad_norm": 10.0, "learning_rate": 5.486568505926924e-06, "loss": 1.4782040119171143, "step": 8954 }, { "epoch": 1.6301993264767454, "grad_norm": 10.625, "learning_rate": 5.485048921575165e-06, "loss": 1.1005005836486816, "step": 8956 }, { "epoch": 1.6305633931009376, "grad_norm": 11.875, "learning_rate": 5.483529412790387e-06, "loss": 1.4505761861801147, "step": 8958 }, { "epoch": 1.6309274597251298, "grad_norm": 6.1875, "learning_rate": 5.482009979795576e-06, "loss": 1.2646241188049316, "step": 8960 }, { "epoch": 1.631291526349322, "grad_norm": 4.625, "learning_rate": 5.480490622813701e-06, "loss": 1.01548171043396, "step": 8962 }, { "epoch": 1.6316555929735141, "grad_norm": 8.1875, "learning_rate": 5.47897134206772e-06, "loss": 1.4203006029129028, "step": 8964 }, { "epoch": 1.6320196595977063, "grad_norm": 14.125, "learning_rate": 5.477452137780587e-06, "loss": 1.7511482238769531, "step": 8966 }, { "epoch": 1.6323837262218985, "grad_norm": 16.625, "learning_rate": 5.475933010175232e-06, "loss": 1.571290135383606, "step": 8968 }, { "epoch": 1.6327477928460907, "grad_norm": 9.9375, "learning_rate": 5.474413959474585e-06, "loss": 1.7354551553726196, "step": 8970 }, { "epoch": 1.633111859470283, "grad_norm": 29.25, "learning_rate": 5.47289498590156e-06, "loss": 1.4419825077056885, "step": 8972 }, { "epoch": 1.6334759260944753, "grad_norm": 15.8125, "learning_rate": 5.471376089679057e-06, "loss": 1.440701961517334, "step": 8974 }, { "epoch": 1.6338399927186675, "grad_norm": 13.625, "learning_rate": 5.4698572710299736e-06, "loss": 1.2962970733642578, "step": 8976 }, { "epoch": 1.6342040593428597, "grad_norm": 5.03125, "learning_rate": 5.468338530177185e-06, "loss": 1.3956835269927979, "step": 8978 }, { "epoch": 1.634568125967052, "grad_norm": 3.46875, "learning_rate": 5.466819867343564e-06, "loss": 0.9811263084411621, "step": 8980 }, { "epoch": 1.6349321925912443, "grad_norm": 5216.0, "learning_rate": 5.465301282751967e-06, "loss": 1.2953565120697021, "step": 8982 }, { "epoch": 1.6352962592154365, "grad_norm": 9.1875, "learning_rate": 5.463782776625237e-06, "loss": 0.8272882699966431, "step": 8984 }, { "epoch": 1.6356603258396287, "grad_norm": 10.5625, "learning_rate": 5.462264349186215e-06, "loss": 1.404762625694275, "step": 8986 }, { "epoch": 1.636024392463821, "grad_norm": 3.03125, "learning_rate": 5.460746000657717e-06, "loss": 1.0570034980773926, "step": 8988 }, { "epoch": 1.6363884590880131, "grad_norm": 13.0625, "learning_rate": 5.459227731262562e-06, "loss": 1.164574146270752, "step": 8990 }, { "epoch": 1.6367525257122053, "grad_norm": 8.25, "learning_rate": 5.457709541223544e-06, "loss": 1.4518458843231201, "step": 8992 }, { "epoch": 1.6371165923363975, "grad_norm": 124.5, "learning_rate": 5.456191430763451e-06, "loss": 1.3307973146438599, "step": 8994 }, { "epoch": 1.6374806589605897, "grad_norm": 13.75, "learning_rate": 5.454673400105065e-06, "loss": 1.1192717552185059, "step": 8996 }, { "epoch": 1.637844725584782, "grad_norm": 6.6875, "learning_rate": 5.4531554494711445e-06, "loss": 1.2777091264724731, "step": 8998 }, { "epoch": 1.638208792208974, "grad_norm": 10.125, "learning_rate": 5.451637579084448e-06, "loss": 1.3846396207809448, "step": 9000 }, { "epoch": 1.6385728588331665, "grad_norm": 13.8125, "learning_rate": 5.450119789167714e-06, "loss": 1.405811071395874, "step": 9002 }, { "epoch": 1.6389369254573587, "grad_norm": 7.03125, "learning_rate": 5.448602079943671e-06, "loss": 1.1963441371917725, "step": 9004 }, { "epoch": 1.639300992081551, "grad_norm": 11.1875, "learning_rate": 5.4470844516350404e-06, "loss": 1.3561813831329346, "step": 9006 }, { "epoch": 1.6396650587057433, "grad_norm": 12.0625, "learning_rate": 5.445566904464523e-06, "loss": 1.2439383268356323, "step": 9008 }, { "epoch": 1.6400291253299355, "grad_norm": 15.625, "learning_rate": 5.444049438654819e-06, "loss": 1.3630499839782715, "step": 9010 }, { "epoch": 1.6403931919541277, "grad_norm": 69.5, "learning_rate": 5.442532054428604e-06, "loss": 1.6038141250610352, "step": 9012 }, { "epoch": 1.6407572585783199, "grad_norm": 27.25, "learning_rate": 5.441014752008551e-06, "loss": 1.2607192993164062, "step": 9014 }, { "epoch": 1.641121325202512, "grad_norm": 21.75, "learning_rate": 5.439497531617319e-06, "loss": 1.7777246236801147, "step": 9016 }, { "epoch": 1.6414853918267043, "grad_norm": 8.9375, "learning_rate": 5.437980393477551e-06, "loss": 1.5362765789031982, "step": 9018 }, { "epoch": 1.6418494584508965, "grad_norm": 7.9375, "learning_rate": 5.436463337811886e-06, "loss": 1.2003294229507446, "step": 9020 }, { "epoch": 1.6422135250750887, "grad_norm": 6.28125, "learning_rate": 5.434946364842939e-06, "loss": 1.3044151067733765, "step": 9022 }, { "epoch": 1.6425775916992809, "grad_norm": 10.75, "learning_rate": 5.433429474793324e-06, "loss": 1.3526506423950195, "step": 9024 }, { "epoch": 1.642941658323473, "grad_norm": 7.96875, "learning_rate": 5.431912667885637e-06, "loss": 1.2049086093902588, "step": 9026 }, { "epoch": 1.6433057249476655, "grad_norm": 10.6875, "learning_rate": 5.430395944342463e-06, "loss": 1.1403303146362305, "step": 9028 }, { "epoch": 1.6436697915718577, "grad_norm": 11.0, "learning_rate": 5.4288793043863784e-06, "loss": 1.4107240438461304, "step": 9030 }, { "epoch": 1.6440338581960499, "grad_norm": 17.375, "learning_rate": 5.427362748239941e-06, "loss": 1.4551666975021362, "step": 9032 }, { "epoch": 1.644397924820242, "grad_norm": 6.3125, "learning_rate": 5.425846276125697e-06, "loss": 1.3451907634735107, "step": 9034 }, { "epoch": 1.6447619914444345, "grad_norm": 7.9375, "learning_rate": 5.424329888266188e-06, "loss": 1.4933338165283203, "step": 9036 }, { "epoch": 1.6451260580686267, "grad_norm": 9.3125, "learning_rate": 5.422813584883932e-06, "loss": 1.297314167022705, "step": 9038 }, { "epoch": 1.6454901246928189, "grad_norm": 22.875, "learning_rate": 5.421297366201446e-06, "loss": 1.2356611490249634, "step": 9040 }, { "epoch": 1.645854191317011, "grad_norm": 10.375, "learning_rate": 5.419781232441226e-06, "loss": 1.3946754932403564, "step": 9042 }, { "epoch": 1.6462182579412032, "grad_norm": 247.0, "learning_rate": 5.418265183825757e-06, "loss": 1.477485179901123, "step": 9044 }, { "epoch": 1.6465823245653954, "grad_norm": 8.8125, "learning_rate": 5.416749220577515e-06, "loss": 1.5828111171722412, "step": 9046 }, { "epoch": 1.6469463911895876, "grad_norm": 13.5, "learning_rate": 5.415233342918962e-06, "loss": 1.320845127105713, "step": 9048 }, { "epoch": 1.6473104578137798, "grad_norm": 5.9375, "learning_rate": 5.413717551072546e-06, "loss": 1.3015503883361816, "step": 9050 }, { "epoch": 1.647674524437972, "grad_norm": 29.625, "learning_rate": 5.412201845260703e-06, "loss": 1.0998867750167847, "step": 9052 }, { "epoch": 1.6480385910621642, "grad_norm": 24.625, "learning_rate": 5.410686225705857e-06, "loss": 1.0409722328186035, "step": 9054 }, { "epoch": 1.6484026576863566, "grad_norm": 12.8125, "learning_rate": 5.40917069263042e-06, "loss": 0.7813601493835449, "step": 9056 }, { "epoch": 1.6487667243105488, "grad_norm": 8.4375, "learning_rate": 5.407655246256789e-06, "loss": 1.0301543474197388, "step": 9058 }, { "epoch": 1.649130790934741, "grad_norm": 38.5, "learning_rate": 5.406139886807349e-06, "loss": 1.2839436531066895, "step": 9060 }, { "epoch": 1.6494948575589334, "grad_norm": 19.0, "learning_rate": 5.4046246145044755e-06, "loss": 1.2923681735992432, "step": 9062 }, { "epoch": 1.6498589241831256, "grad_norm": 10.1875, "learning_rate": 5.403109429570525e-06, "loss": 1.2729697227478027, "step": 9064 }, { "epoch": 1.6502229908073178, "grad_norm": 2.9375, "learning_rate": 5.401594332227849e-06, "loss": 1.033754587173462, "step": 9066 }, { "epoch": 1.65058705743151, "grad_norm": 13.25, "learning_rate": 5.400079322698777e-06, "loss": 0.9543336629867554, "step": 9068 }, { "epoch": 1.6509511240557022, "grad_norm": 16.25, "learning_rate": 5.398564401205637e-06, "loss": 1.4318230152130127, "step": 9070 }, { "epoch": 1.6513151906798944, "grad_norm": 8.0625, "learning_rate": 5.397049567970731e-06, "loss": 1.3613770008087158, "step": 9072 }, { "epoch": 1.6516792573040866, "grad_norm": 11.1875, "learning_rate": 5.395534823216358e-06, "loss": 1.47922682762146, "step": 9074 }, { "epoch": 1.6520433239282788, "grad_norm": 13.5625, "learning_rate": 5.3940201671648e-06, "loss": 1.669333577156067, "step": 9076 }, { "epoch": 1.652407390552471, "grad_norm": 16.75, "learning_rate": 5.3925056000383245e-06, "loss": 1.2753726243972778, "step": 9078 }, { "epoch": 1.6527714571766632, "grad_norm": 8.375, "learning_rate": 5.390991122059193e-06, "loss": 1.3380441665649414, "step": 9080 }, { "epoch": 1.6531355238008556, "grad_norm": 13.875, "learning_rate": 5.389476733449646e-06, "loss": 1.5330275297164917, "step": 9082 }, { "epoch": 1.6534995904250478, "grad_norm": 12.8125, "learning_rate": 5.387962434431913e-06, "loss": 1.524970293045044, "step": 9084 }, { "epoch": 1.65386365704924, "grad_norm": 9.375, "learning_rate": 5.386448225228213e-06, "loss": 1.6511247158050537, "step": 9086 }, { "epoch": 1.6542277236734324, "grad_norm": 15.6875, "learning_rate": 5.384934106060748e-06, "loss": 1.4060384035110474, "step": 9088 }, { "epoch": 1.6545917902976246, "grad_norm": 5.9375, "learning_rate": 5.383420077151713e-06, "loss": 0.9899530410766602, "step": 9090 }, { "epoch": 1.6549558569218168, "grad_norm": 13.0625, "learning_rate": 5.3819061387232816e-06, "loss": 1.388381004333496, "step": 9092 }, { "epoch": 1.655319923546009, "grad_norm": 6.6875, "learning_rate": 5.3803922909976184e-06, "loss": 1.1267688274383545, "step": 9094 }, { "epoch": 1.6556839901702012, "grad_norm": 12.0, "learning_rate": 5.378878534196877e-06, "loss": 1.3740040063858032, "step": 9096 }, { "epoch": 1.6560480567943934, "grad_norm": 18.625, "learning_rate": 5.377364868543193e-06, "loss": 1.334236741065979, "step": 9098 }, { "epoch": 1.6564121234185856, "grad_norm": 23.75, "learning_rate": 5.375851294258692e-06, "loss": 1.568598747253418, "step": 9100 }, { "epoch": 1.6567761900427778, "grad_norm": 20.625, "learning_rate": 5.374337811565482e-06, "loss": 1.7845431566238403, "step": 9102 }, { "epoch": 1.65714025666697, "grad_norm": 8.5625, "learning_rate": 5.372824420685663e-06, "loss": 1.3297598361968994, "step": 9104 }, { "epoch": 1.6575043232911622, "grad_norm": 4.25, "learning_rate": 5.37131112184132e-06, "loss": 1.066178321838379, "step": 9106 }, { "epoch": 1.6578683899153543, "grad_norm": 6.75, "learning_rate": 5.369797915254522e-06, "loss": 0.8099057674407959, "step": 9108 }, { "epoch": 1.6582324565395468, "grad_norm": 8.6875, "learning_rate": 5.3682848011473254e-06, "loss": 1.3451005220413208, "step": 9110 }, { "epoch": 1.658596523163739, "grad_norm": 19.0, "learning_rate": 5.366771779741775e-06, "loss": 1.580949068069458, "step": 9112 }, { "epoch": 1.6589605897879312, "grad_norm": 29.5, "learning_rate": 5.365258851259898e-06, "loss": 1.4729431867599487, "step": 9114 }, { "epoch": 1.6593246564121236, "grad_norm": 5.4375, "learning_rate": 5.363746015923713e-06, "loss": 0.19191277027130127, "step": 9116 }, { "epoch": 1.6596887230363158, "grad_norm": 35.25, "learning_rate": 5.362233273955221e-06, "loss": 1.3085311651229858, "step": 9118 }, { "epoch": 1.660052789660508, "grad_norm": 10.125, "learning_rate": 5.360720625576412e-06, "loss": 1.7102875709533691, "step": 9120 }, { "epoch": 1.6604168562847001, "grad_norm": 17.125, "learning_rate": 5.359208071009261e-06, "loss": 1.4008978605270386, "step": 9122 }, { "epoch": 1.6607809229088923, "grad_norm": 13.4375, "learning_rate": 5.357695610475727e-06, "loss": 1.5351812839508057, "step": 9124 }, { "epoch": 1.6611449895330845, "grad_norm": 32.25, "learning_rate": 5.356183244197761e-06, "loss": 1.5203293561935425, "step": 9126 }, { "epoch": 1.6615090561572767, "grad_norm": 13.5625, "learning_rate": 5.354670972397293e-06, "loss": 1.6168484687805176, "step": 9128 }, { "epoch": 1.661873122781469, "grad_norm": 17.25, "learning_rate": 5.353158795296247e-06, "loss": 1.8464418649673462, "step": 9130 }, { "epoch": 1.6622371894056611, "grad_norm": 16.625, "learning_rate": 5.351646713116525e-06, "loss": 1.4194422960281372, "step": 9132 }, { "epoch": 1.6626012560298533, "grad_norm": 12.0, "learning_rate": 5.350134726080018e-06, "loss": 1.9352693557739258, "step": 9134 }, { "epoch": 1.6629653226540457, "grad_norm": 24.375, "learning_rate": 5.34862283440861e-06, "loss": 1.3733594417572021, "step": 9136 }, { "epoch": 1.663329389278238, "grad_norm": 8.9375, "learning_rate": 5.347111038324158e-06, "loss": 1.1910452842712402, "step": 9138 }, { "epoch": 1.6636934559024301, "grad_norm": 8.3125, "learning_rate": 5.34559933804852e-06, "loss": 0.8649009466171265, "step": 9140 }, { "epoch": 1.6640575225266225, "grad_norm": 20.625, "learning_rate": 5.344087733803522e-06, "loss": 1.5014541149139404, "step": 9142 }, { "epoch": 1.6644215891508147, "grad_norm": 17.625, "learning_rate": 5.3425762258109925e-06, "loss": 1.9736855030059814, "step": 9144 }, { "epoch": 1.664785655775007, "grad_norm": 11.1875, "learning_rate": 5.341064814292739e-06, "loss": 1.4210644960403442, "step": 9146 }, { "epoch": 1.6651497223991991, "grad_norm": 10.125, "learning_rate": 5.339553499470551e-06, "loss": 1.3818964958190918, "step": 9148 }, { "epoch": 1.6655137890233913, "grad_norm": 10.625, "learning_rate": 5.3380422815662135e-06, "loss": 1.3692893981933594, "step": 9150 }, { "epoch": 1.6658778556475835, "grad_norm": 12.0, "learning_rate": 5.3365311608014855e-06, "loss": 1.957620620727539, "step": 9152 }, { "epoch": 1.6662419222717757, "grad_norm": 23.25, "learning_rate": 5.335020137398121e-06, "loss": 1.610182762145996, "step": 9154 }, { "epoch": 1.666605988895968, "grad_norm": 4.09375, "learning_rate": 5.33350921157786e-06, "loss": 0.9809678792953491, "step": 9156 }, { "epoch": 1.66697005552016, "grad_norm": 11.6875, "learning_rate": 5.331998383562418e-06, "loss": 1.2095112800598145, "step": 9158 }, { "epoch": 1.6673341221443523, "grad_norm": 14.0625, "learning_rate": 5.330487653573507e-06, "loss": 1.2562638521194458, "step": 9160 }, { "epoch": 1.6676981887685447, "grad_norm": 24.125, "learning_rate": 5.328977021832819e-06, "loss": 1.8170607089996338, "step": 9162 }, { "epoch": 1.668062255392737, "grad_norm": 16.625, "learning_rate": 5.327466488562034e-06, "loss": 1.8475511074066162, "step": 9164 }, { "epoch": 1.668426322016929, "grad_norm": 18.875, "learning_rate": 5.325956053982817e-06, "loss": 1.1698594093322754, "step": 9166 }, { "epoch": 1.6687903886411213, "grad_norm": 19.75, "learning_rate": 5.324445718316815e-06, "loss": 1.3467518091201782, "step": 9168 }, { "epoch": 1.6691544552653137, "grad_norm": 8.6875, "learning_rate": 5.322935481785669e-06, "loss": 1.2459766864776611, "step": 9170 }, { "epoch": 1.669518521889506, "grad_norm": 26.75, "learning_rate": 5.321425344610995e-06, "loss": 1.4066283702850342, "step": 9172 }, { "epoch": 1.669882588513698, "grad_norm": 10.5, "learning_rate": 5.319915307014402e-06, "loss": 1.3927595615386963, "step": 9174 }, { "epoch": 1.6702466551378903, "grad_norm": 8.0625, "learning_rate": 5.318405369217483e-06, "loss": 1.5326435565948486, "step": 9176 }, { "epoch": 1.6706107217620825, "grad_norm": 38.25, "learning_rate": 5.316895531441812e-06, "loss": 1.6073873043060303, "step": 9178 }, { "epoch": 1.6709747883862747, "grad_norm": 24.0, "learning_rate": 5.315385793908956e-06, "loss": 2.1608691215515137, "step": 9180 }, { "epoch": 1.6713388550104669, "grad_norm": 12.0, "learning_rate": 5.313876156840459e-06, "loss": 1.6498310565948486, "step": 9182 }, { "epoch": 1.671702921634659, "grad_norm": 42.5, "learning_rate": 5.312366620457854e-06, "loss": 1.7011055946350098, "step": 9184 }, { "epoch": 1.6720669882588513, "grad_norm": 10.0625, "learning_rate": 5.3108571849826615e-06, "loss": 0.814986526966095, "step": 9186 }, { "epoch": 1.6724310548830434, "grad_norm": 24.25, "learning_rate": 5.309347850636384e-06, "loss": 1.368643045425415, "step": 9188 }, { "epoch": 1.6727951215072359, "grad_norm": 22.75, "learning_rate": 5.307838617640512e-06, "loss": 0.6063592433929443, "step": 9190 }, { "epoch": 1.673159188131428, "grad_norm": 12.6875, "learning_rate": 5.306329486216516e-06, "loss": 1.4647926092147827, "step": 9192 }, { "epoch": 1.6735232547556202, "grad_norm": 10.9375, "learning_rate": 5.304820456585856e-06, "loss": 1.5418574810028076, "step": 9194 }, { "epoch": 1.6738873213798127, "grad_norm": 20.625, "learning_rate": 5.303311528969979e-06, "loss": 1.108280897140503, "step": 9196 }, { "epoch": 1.6742513880040049, "grad_norm": 24.5, "learning_rate": 5.301802703590311e-06, "loss": 0.5406228303909302, "step": 9198 }, { "epoch": 1.674615454628197, "grad_norm": 35.0, "learning_rate": 5.300293980668266e-06, "loss": 1.4860377311706543, "step": 9200 }, { "epoch": 1.6749795212523892, "grad_norm": 12.375, "learning_rate": 5.298785360425245e-06, "loss": 1.735909104347229, "step": 9202 }, { "epoch": 1.6753435878765814, "grad_norm": 5.78125, "learning_rate": 5.297276843082628e-06, "loss": 1.5042681694030762, "step": 9204 }, { "epoch": 1.6757076545007736, "grad_norm": 12.375, "learning_rate": 5.2957684288617895e-06, "loss": 1.05649995803833, "step": 9206 }, { "epoch": 1.6760717211249658, "grad_norm": 22.0, "learning_rate": 5.294260117984077e-06, "loss": 1.1301065683364868, "step": 9208 }, { "epoch": 1.676435787749158, "grad_norm": 18.75, "learning_rate": 5.292751910670835e-06, "loss": 0.9675709009170532, "step": 9210 }, { "epoch": 1.6767998543733502, "grad_norm": 10.625, "learning_rate": 5.291243807143383e-06, "loss": 1.2153215408325195, "step": 9212 }, { "epoch": 1.6771639209975424, "grad_norm": 12.125, "learning_rate": 5.289735807623028e-06, "loss": 1.546874761581421, "step": 9214 }, { "epoch": 1.6775279876217348, "grad_norm": 16.875, "learning_rate": 5.288227912331068e-06, "loss": 2.194091558456421, "step": 9216 }, { "epoch": 1.677892054245927, "grad_norm": 3.75, "learning_rate": 5.286720121488773e-06, "loss": 1.0411674976348877, "step": 9218 }, { "epoch": 1.6782561208701192, "grad_norm": 7.40625, "learning_rate": 5.285212435317413e-06, "loss": 1.4277840852737427, "step": 9220 }, { "epoch": 1.6786201874943114, "grad_norm": 10.9375, "learning_rate": 5.283704854038231e-06, "loss": 1.3571604490280151, "step": 9222 }, { "epoch": 1.6789842541185038, "grad_norm": 13.875, "learning_rate": 5.282197377872458e-06, "loss": 1.5189456939697266, "step": 9224 }, { "epoch": 1.679348320742696, "grad_norm": 11.125, "learning_rate": 5.2806900070413115e-06, "loss": 1.382796287536621, "step": 9226 }, { "epoch": 1.6797123873668882, "grad_norm": 6.625, "learning_rate": 5.279182741765989e-06, "loss": 1.0974713563919067, "step": 9228 }, { "epoch": 1.6800764539910804, "grad_norm": 8.6875, "learning_rate": 5.27767558226768e-06, "loss": 1.148158311843872, "step": 9230 }, { "epoch": 1.6804405206152726, "grad_norm": 3.75, "learning_rate": 5.276168528767554e-06, "loss": 0.6674026250839233, "step": 9232 }, { "epoch": 1.6808045872394648, "grad_norm": 25.375, "learning_rate": 5.2746615814867584e-06, "loss": 1.2732802629470825, "step": 9234 }, { "epoch": 1.681168653863657, "grad_norm": 9.3125, "learning_rate": 5.27315474064644e-06, "loss": 0.7327215075492859, "step": 9236 }, { "epoch": 1.6815327204878492, "grad_norm": 15.3125, "learning_rate": 5.271648006467716e-06, "loss": 1.437905192375183, "step": 9238 }, { "epoch": 1.6818967871120414, "grad_norm": 6.25, "learning_rate": 5.270141379171696e-06, "loss": 1.0680959224700928, "step": 9240 }, { "epoch": 1.6822608537362336, "grad_norm": 23.0, "learning_rate": 5.268634858979469e-06, "loss": 1.5189906358718872, "step": 9242 }, { "epoch": 1.682624920360426, "grad_norm": 10.375, "learning_rate": 5.267128446112113e-06, "loss": 0.9952925443649292, "step": 9244 }, { "epoch": 1.6829889869846182, "grad_norm": 8.375, "learning_rate": 5.2656221407906895e-06, "loss": 1.3214839696884155, "step": 9246 }, { "epoch": 1.6833530536088104, "grad_norm": 13.8125, "learning_rate": 5.264115943236238e-06, "loss": 1.367179274559021, "step": 9248 }, { "epoch": 1.6837171202330028, "grad_norm": 18.25, "learning_rate": 5.262609853669793e-06, "loss": 1.3042641878128052, "step": 9250 }, { "epoch": 1.684081186857195, "grad_norm": 12.25, "learning_rate": 5.261103872312361e-06, "loss": 1.419074535369873, "step": 9252 }, { "epoch": 1.6844452534813872, "grad_norm": 31.75, "learning_rate": 5.259597999384941e-06, "loss": 1.2369577884674072, "step": 9254 }, { "epoch": 1.6848093201055794, "grad_norm": 9.5625, "learning_rate": 5.258092235108516e-06, "loss": 1.074294924736023, "step": 9256 }, { "epoch": 1.6851733867297716, "grad_norm": 6.84375, "learning_rate": 5.256586579704046e-06, "loss": 1.322725772857666, "step": 9258 }, { "epoch": 1.6855374533539638, "grad_norm": 5.28125, "learning_rate": 5.255081033392486e-06, "loss": 1.184746503829956, "step": 9260 }, { "epoch": 1.685901519978156, "grad_norm": 8.5625, "learning_rate": 5.253575596394763e-06, "loss": 1.1378341913223267, "step": 9262 }, { "epoch": 1.6862655866023482, "grad_norm": 10.875, "learning_rate": 5.2520702689317975e-06, "loss": 1.686555027961731, "step": 9264 }, { "epoch": 1.6866296532265403, "grad_norm": 7.625, "learning_rate": 5.250565051224488e-06, "loss": 1.6420520544052124, "step": 9266 }, { "epoch": 1.6869937198507325, "grad_norm": 5.5, "learning_rate": 5.24905994349372e-06, "loss": 1.1952345371246338, "step": 9268 }, { "epoch": 1.687357786474925, "grad_norm": 7.65625, "learning_rate": 5.2475549459603625e-06, "loss": 0.9739567041397095, "step": 9270 }, { "epoch": 1.6877218530991172, "grad_norm": 29.75, "learning_rate": 5.246050058845266e-06, "loss": 1.4336153268814087, "step": 9272 }, { "epoch": 1.6880859197233093, "grad_norm": 15.0, "learning_rate": 5.24454528236927e-06, "loss": 1.7244280576705933, "step": 9274 }, { "epoch": 1.6884499863475015, "grad_norm": 9.375, "learning_rate": 5.243040616753192e-06, "loss": 1.275808334350586, "step": 9276 }, { "epoch": 1.688814052971694, "grad_norm": 6.75, "learning_rate": 5.2415360622178334e-06, "loss": 1.4948639869689941, "step": 9278 }, { "epoch": 1.6891781195958862, "grad_norm": 9.125, "learning_rate": 5.240031618983987e-06, "loss": 1.428116798400879, "step": 9280 }, { "epoch": 1.6895421862200783, "grad_norm": 16.125, "learning_rate": 5.238527287272419e-06, "loss": 1.5895593166351318, "step": 9282 }, { "epoch": 1.6899062528442705, "grad_norm": 12.25, "learning_rate": 5.237023067303886e-06, "loss": 1.3284916877746582, "step": 9284 }, { "epoch": 1.6902703194684627, "grad_norm": 5.03125, "learning_rate": 5.235518959299126e-06, "loss": 1.3085006475448608, "step": 9286 }, { "epoch": 1.690634386092655, "grad_norm": 7.0625, "learning_rate": 5.2340149634788594e-06, "loss": 1.1924309730529785, "step": 9288 }, { "epoch": 1.6909984527168471, "grad_norm": 13.0, "learning_rate": 5.232511080063793e-06, "loss": 1.3618104457855225, "step": 9290 }, { "epoch": 1.6913625193410393, "grad_norm": 3.9375, "learning_rate": 5.231007309274616e-06, "loss": 1.1392757892608643, "step": 9292 }, { "epoch": 1.6917265859652315, "grad_norm": 18.0, "learning_rate": 5.229503651332e-06, "loss": 1.2429265975952148, "step": 9294 }, { "epoch": 1.6920906525894237, "grad_norm": 111.5, "learning_rate": 5.228000106456601e-06, "loss": 0.9867711067199707, "step": 9296 }, { "epoch": 1.6924547192136161, "grad_norm": 23.25, "learning_rate": 5.226496674869055e-06, "loss": 1.2803643941879272, "step": 9298 }, { "epoch": 1.6928187858378083, "grad_norm": 8.75, "learning_rate": 5.224993356789991e-06, "loss": 1.7405083179473877, "step": 9300 }, { "epoch": 1.6931828524620005, "grad_norm": 29.625, "learning_rate": 5.22349015244001e-06, "loss": 1.165168046951294, "step": 9302 }, { "epoch": 1.693546919086193, "grad_norm": 15.3125, "learning_rate": 5.2219870620397e-06, "loss": 0.8452050089836121, "step": 9304 }, { "epoch": 1.6939109857103851, "grad_norm": 12.875, "learning_rate": 5.220484085809637e-06, "loss": 1.5507532358169556, "step": 9306 }, { "epoch": 1.6942750523345773, "grad_norm": 4.46875, "learning_rate": 5.218981223970374e-06, "loss": 1.3271949291229248, "step": 9308 }, { "epoch": 1.6946391189587695, "grad_norm": 7.34375, "learning_rate": 5.217478476742455e-06, "loss": 1.477545976638794, "step": 9310 }, { "epoch": 1.6950031855829617, "grad_norm": 8.8125, "learning_rate": 5.215975844346395e-06, "loss": 1.3406667709350586, "step": 9312 }, { "epoch": 1.695367252207154, "grad_norm": 12.3125, "learning_rate": 5.214473327002703e-06, "loss": 1.5259292125701904, "step": 9314 }, { "epoch": 1.695731318831346, "grad_norm": 33.5, "learning_rate": 5.212970924931868e-06, "loss": 1.3747682571411133, "step": 9316 }, { "epoch": 1.6960953854555383, "grad_norm": 22.25, "learning_rate": 5.211468638354358e-06, "loss": 1.6125006675720215, "step": 9318 }, { "epoch": 1.6964594520797305, "grad_norm": 9.5, "learning_rate": 5.2099664674906325e-06, "loss": 1.3490309715270996, "step": 9320 }, { "epoch": 1.6968235187039227, "grad_norm": 37.25, "learning_rate": 5.208464412561124e-06, "loss": 1.115883469581604, "step": 9322 }, { "epoch": 1.697187585328115, "grad_norm": 25.375, "learning_rate": 5.206962473786254e-06, "loss": 0.8189382553100586, "step": 9324 }, { "epoch": 1.6975516519523073, "grad_norm": 27.0, "learning_rate": 5.205460651386432e-06, "loss": 1.2329597473144531, "step": 9326 }, { "epoch": 1.6979157185764995, "grad_norm": 19.125, "learning_rate": 5.2039589455820346e-06, "loss": 1.3114888668060303, "step": 9328 }, { "epoch": 1.6982797852006917, "grad_norm": 6.71875, "learning_rate": 5.202457356593438e-06, "loss": 1.1543605327606201, "step": 9330 }, { "epoch": 1.698643851824884, "grad_norm": 13.75, "learning_rate": 5.2009558846409925e-06, "loss": 0.8102421760559082, "step": 9332 }, { "epoch": 1.6990079184490763, "grad_norm": 54.0, "learning_rate": 5.199454529945031e-06, "loss": 0.7338652014732361, "step": 9334 }, { "epoch": 1.6993719850732685, "grad_norm": 5.5, "learning_rate": 5.197953292725875e-06, "loss": 1.1436655521392822, "step": 9336 }, { "epoch": 1.6997360516974607, "grad_norm": 23.75, "learning_rate": 5.19645217320382e-06, "loss": 1.6214869022369385, "step": 9338 }, { "epoch": 1.7001001183216529, "grad_norm": 7.71875, "learning_rate": 5.194951171599154e-06, "loss": 1.4811253547668457, "step": 9340 }, { "epoch": 1.700464184945845, "grad_norm": 13.3125, "learning_rate": 5.193450288132141e-06, "loss": 1.2000652551651, "step": 9342 }, { "epoch": 1.7008282515700373, "grad_norm": 12.0625, "learning_rate": 5.191949523023029e-06, "loss": 1.1856586933135986, "step": 9344 }, { "epoch": 1.7011923181942294, "grad_norm": 10.6875, "learning_rate": 5.190448876492051e-06, "loss": 1.1105928421020508, "step": 9346 }, { "epoch": 1.7015563848184216, "grad_norm": 7.9375, "learning_rate": 5.188948348759418e-06, "loss": 1.2888965606689453, "step": 9348 }, { "epoch": 1.7019204514426138, "grad_norm": 17.25, "learning_rate": 5.187447940045329e-06, "loss": 1.5396370887756348, "step": 9350 }, { "epoch": 1.7022845180668063, "grad_norm": 24.125, "learning_rate": 5.185947650569961e-06, "loss": 1.5175641775131226, "step": 9352 }, { "epoch": 1.7026485846909984, "grad_norm": 16.25, "learning_rate": 5.184447480553476e-06, "loss": 1.5482220649719238, "step": 9354 }, { "epoch": 1.7030126513151906, "grad_norm": 11.1875, "learning_rate": 5.182947430216019e-06, "loss": 1.1901549100875854, "step": 9356 }, { "epoch": 1.703376717939383, "grad_norm": 8.0625, "learning_rate": 5.181447499777714e-06, "loss": 1.4369940757751465, "step": 9358 }, { "epoch": 1.7037407845635753, "grad_norm": 23.0, "learning_rate": 5.179947689458673e-06, "loss": 1.5072280168533325, "step": 9360 }, { "epoch": 1.7041048511877674, "grad_norm": 11.375, "learning_rate": 5.178447999478985e-06, "loss": 1.5146267414093018, "step": 9362 }, { "epoch": 1.7044689178119596, "grad_norm": 18.875, "learning_rate": 5.1769484300587215e-06, "loss": 1.6368751525878906, "step": 9364 }, { "epoch": 1.7048329844361518, "grad_norm": 5.78125, "learning_rate": 5.175448981417943e-06, "loss": 0.9928371906280518, "step": 9366 }, { "epoch": 1.705197051060344, "grad_norm": 6.65625, "learning_rate": 5.173949653776683e-06, "loss": 1.5561423301696777, "step": 9368 }, { "epoch": 1.7055611176845362, "grad_norm": 11.6875, "learning_rate": 5.172450447354966e-06, "loss": 1.412014365196228, "step": 9370 }, { "epoch": 1.7059251843087284, "grad_norm": 7.8125, "learning_rate": 5.170951362372789e-06, "loss": 1.345872402191162, "step": 9372 }, { "epoch": 1.7062892509329206, "grad_norm": 7.125, "learning_rate": 5.169452399050141e-06, "loss": 1.2993645668029785, "step": 9374 }, { "epoch": 1.7066533175571128, "grad_norm": 6.09375, "learning_rate": 5.167953557606988e-06, "loss": 1.3439033031463623, "step": 9376 }, { "epoch": 1.7070173841813052, "grad_norm": 15.0625, "learning_rate": 5.166454838263278e-06, "loss": 1.2377959489822388, "step": 9378 }, { "epoch": 1.7073814508054974, "grad_norm": 7.90625, "learning_rate": 5.164956241238943e-06, "loss": 1.341631293296814, "step": 9380 }, { "epoch": 1.7077455174296896, "grad_norm": 14.8125, "learning_rate": 5.163457766753894e-06, "loss": 1.4095263481140137, "step": 9382 }, { "epoch": 1.708109584053882, "grad_norm": 29.875, "learning_rate": 5.161959415028028e-06, "loss": 1.945432186126709, "step": 9384 }, { "epoch": 1.7084736506780742, "grad_norm": 17.125, "learning_rate": 5.160461186281224e-06, "loss": 1.323392629623413, "step": 9386 }, { "epoch": 1.7088377173022664, "grad_norm": 6.34375, "learning_rate": 5.158963080733335e-06, "loss": 1.144814372062683, "step": 9388 }, { "epoch": 1.7092017839264586, "grad_norm": 6.8125, "learning_rate": 5.1574650986042085e-06, "loss": 1.1991299390792847, "step": 9390 }, { "epoch": 1.7095658505506508, "grad_norm": 7.90625, "learning_rate": 5.155967240113662e-06, "loss": 1.3379712104797363, "step": 9392 }, { "epoch": 1.709929917174843, "grad_norm": 19.625, "learning_rate": 5.154469505481503e-06, "loss": 1.4230942726135254, "step": 9394 }, { "epoch": 1.7102939837990352, "grad_norm": 7.84375, "learning_rate": 5.152971894927518e-06, "loss": 1.030795931816101, "step": 9396 }, { "epoch": 1.7106580504232274, "grad_norm": 12.0625, "learning_rate": 5.151474408671475e-06, "loss": 0.7345353364944458, "step": 9398 }, { "epoch": 1.7110221170474196, "grad_norm": 13.6875, "learning_rate": 5.1499770469331235e-06, "loss": 1.1238645315170288, "step": 9400 }, { "epoch": 1.7113861836716118, "grad_norm": 13.0, "learning_rate": 5.148479809932195e-06, "loss": 1.5702509880065918, "step": 9402 }, { "epoch": 1.711750250295804, "grad_norm": 16.125, "learning_rate": 5.146982697888403e-06, "loss": 1.2398544549942017, "step": 9404 }, { "epoch": 1.7121143169199964, "grad_norm": 20.875, "learning_rate": 5.145485711021445e-06, "loss": 1.4667586088180542, "step": 9406 }, { "epoch": 1.7124783835441886, "grad_norm": 13.625, "learning_rate": 5.143988849550994e-06, "loss": 1.22396981716156, "step": 9408 }, { "epoch": 1.7128424501683808, "grad_norm": 13.8125, "learning_rate": 5.142492113696711e-06, "loss": 1.5100560188293457, "step": 9410 }, { "epoch": 1.7132065167925732, "grad_norm": 10.4375, "learning_rate": 5.140995503678235e-06, "loss": 1.011368989944458, "step": 9412 }, { "epoch": 1.7135705834167654, "grad_norm": 11.9375, "learning_rate": 5.139499019715188e-06, "loss": 1.2850549221038818, "step": 9414 }, { "epoch": 1.7139346500409576, "grad_norm": 28.625, "learning_rate": 5.138002662027173e-06, "loss": 0.9053092002868652, "step": 9416 }, { "epoch": 1.7142987166651498, "grad_norm": 12.75, "learning_rate": 5.136506430833772e-06, "loss": 1.3963650465011597, "step": 9418 }, { "epoch": 1.714662783289342, "grad_norm": 8.3125, "learning_rate": 5.135010326354555e-06, "loss": 1.2343649864196777, "step": 9420 }, { "epoch": 1.7150268499135342, "grad_norm": 10.625, "learning_rate": 5.1335143488090656e-06, "loss": 1.386676549911499, "step": 9422 }, { "epoch": 1.7153909165377264, "grad_norm": 7.625, "learning_rate": 5.132018498416833e-06, "loss": 1.2937157154083252, "step": 9424 }, { "epoch": 1.7157549831619185, "grad_norm": 14.5625, "learning_rate": 5.130522775397371e-06, "loss": 0.9588303565979004, "step": 9426 }, { "epoch": 1.7161190497861107, "grad_norm": 16.0, "learning_rate": 5.129027179970165e-06, "loss": 1.663581132888794, "step": 9428 }, { "epoch": 1.716483116410303, "grad_norm": 13.875, "learning_rate": 5.1275317123546945e-06, "loss": 1.381098747253418, "step": 9430 }, { "epoch": 1.7168471830344953, "grad_norm": 10.75, "learning_rate": 5.126036372770407e-06, "loss": 1.3656188249588013, "step": 9432 }, { "epoch": 1.7172112496586875, "grad_norm": 5.53125, "learning_rate": 5.124541161436738e-06, "loss": 0.9090326428413391, "step": 9434 }, { "epoch": 1.7175753162828797, "grad_norm": 13.75, "learning_rate": 5.123046078573109e-06, "loss": 0.985514760017395, "step": 9436 }, { "epoch": 1.7179393829070722, "grad_norm": 15.9375, "learning_rate": 5.121551124398912e-06, "loss": 0.6309006214141846, "step": 9438 }, { "epoch": 1.7183034495312643, "grad_norm": 16.5, "learning_rate": 5.120056299133529e-06, "loss": 1.5923762321472168, "step": 9440 }, { "epoch": 1.7186675161554565, "grad_norm": 20.625, "learning_rate": 5.118561602996317e-06, "loss": 1.670379400253296, "step": 9442 }, { "epoch": 1.7190315827796487, "grad_norm": 11.25, "learning_rate": 5.1170670362066175e-06, "loss": 1.2797925472259521, "step": 9444 }, { "epoch": 1.719395649403841, "grad_norm": 6.8125, "learning_rate": 5.115572598983753e-06, "loss": 1.2918821573257446, "step": 9446 }, { "epoch": 1.7197597160280331, "grad_norm": 10.25, "learning_rate": 5.114078291547024e-06, "loss": 1.371572732925415, "step": 9448 }, { "epoch": 1.7201237826522253, "grad_norm": 6.75, "learning_rate": 5.112584114115717e-06, "loss": 1.7317709922790527, "step": 9450 }, { "epoch": 1.7204878492764175, "grad_norm": 4.34375, "learning_rate": 5.1110900669090945e-06, "loss": 1.1518534421920776, "step": 9452 }, { "epoch": 1.7208519159006097, "grad_norm": 9.25, "learning_rate": 5.109596150146401e-06, "loss": 1.1583870649337769, "step": 9454 }, { "epoch": 1.721215982524802, "grad_norm": 30.0, "learning_rate": 5.1081023640468654e-06, "loss": 1.1486270427703857, "step": 9456 }, { "epoch": 1.7215800491489943, "grad_norm": 6.1875, "learning_rate": 5.106608708829693e-06, "loss": 1.5493502616882324, "step": 9458 }, { "epoch": 1.7219441157731865, "grad_norm": 23.625, "learning_rate": 5.105115184714071e-06, "loss": 1.1435967683792114, "step": 9460 }, { "epoch": 1.7223081823973787, "grad_norm": 27.625, "learning_rate": 5.10362179191917e-06, "loss": 1.833583950996399, "step": 9462 }, { "epoch": 1.722672249021571, "grad_norm": 9.125, "learning_rate": 5.102128530664136e-06, "loss": 1.5355030298233032, "step": 9464 }, { "epoch": 1.7230363156457633, "grad_norm": 27.75, "learning_rate": 5.1006354011681055e-06, "loss": 1.1746819019317627, "step": 9466 }, { "epoch": 1.7234003822699555, "grad_norm": 8.9375, "learning_rate": 5.0991424036501814e-06, "loss": 0.8762860894203186, "step": 9468 }, { "epoch": 1.7237644488941477, "grad_norm": 7.78125, "learning_rate": 5.097649538329461e-06, "loss": 1.2093828916549683, "step": 9470 }, { "epoch": 1.72412851551834, "grad_norm": 6.53125, "learning_rate": 5.096156805425014e-06, "loss": 1.2469215393066406, "step": 9472 }, { "epoch": 1.724492582142532, "grad_norm": 16.125, "learning_rate": 5.094664205155891e-06, "loss": 1.2745331525802612, "step": 9474 }, { "epoch": 1.7248566487667243, "grad_norm": 8.9375, "learning_rate": 5.093171737741129e-06, "loss": 1.4616479873657227, "step": 9476 }, { "epoch": 1.7252207153909165, "grad_norm": 7.3125, "learning_rate": 5.0916794033997375e-06, "loss": 1.0668138265609741, "step": 9478 }, { "epoch": 1.7255847820151087, "grad_norm": 49.0, "learning_rate": 5.090187202350714e-06, "loss": 1.4169104099273682, "step": 9480 }, { "epoch": 1.7259488486393009, "grad_norm": 5.84375, "learning_rate": 5.088695134813031e-06, "loss": 1.3103148937225342, "step": 9482 }, { "epoch": 1.726312915263493, "grad_norm": 14.4375, "learning_rate": 5.087203201005642e-06, "loss": 1.83831787109375, "step": 9484 }, { "epoch": 1.7266769818876855, "grad_norm": 11.5, "learning_rate": 5.085711401147486e-06, "loss": 1.5398776531219482, "step": 9486 }, { "epoch": 1.7270410485118777, "grad_norm": 61.5, "learning_rate": 5.084219735457476e-06, "loss": 1.3492802381515503, "step": 9488 }, { "epoch": 1.7274051151360699, "grad_norm": 13.8125, "learning_rate": 5.08272820415451e-06, "loss": 1.3110461235046387, "step": 9490 }, { "epoch": 1.7277691817602623, "grad_norm": 22.5, "learning_rate": 5.081236807457461e-06, "loss": 1.5548452138900757, "step": 9492 }, { "epoch": 1.7281332483844545, "grad_norm": 21.875, "learning_rate": 5.0797455455851865e-06, "loss": 1.5569431781768799, "step": 9494 }, { "epoch": 1.7284973150086467, "grad_norm": 16.5, "learning_rate": 5.0782544187565255e-06, "loss": 1.5821748971939087, "step": 9496 }, { "epoch": 1.7288613816328389, "grad_norm": 14.375, "learning_rate": 5.076763427190291e-06, "loss": 1.7407346963882446, "step": 9498 }, { "epoch": 1.729225448257031, "grad_norm": 7.53125, "learning_rate": 5.0752725711052846e-06, "loss": 1.4714140892028809, "step": 9500 }, { "epoch": 1.7295895148812233, "grad_norm": 7.5, "learning_rate": 5.073781850720278e-06, "loss": 1.117492437362671, "step": 9502 }, { "epoch": 1.7299535815054154, "grad_norm": 6.3125, "learning_rate": 5.072291266254033e-06, "loss": 1.2635581493377686, "step": 9504 }, { "epoch": 1.7303176481296076, "grad_norm": 350.0, "learning_rate": 5.070800817925286e-06, "loss": 1.3402974605560303, "step": 9506 }, { "epoch": 1.7306817147537998, "grad_norm": 12.125, "learning_rate": 5.069310505952749e-06, "loss": 1.6758350133895874, "step": 9508 }, { "epoch": 1.731045781377992, "grad_norm": 19.75, "learning_rate": 5.0678203305551274e-06, "loss": 1.5974210500717163, "step": 9510 }, { "epoch": 1.7314098480021844, "grad_norm": 27.75, "learning_rate": 5.0663302919510935e-06, "loss": 0.6250400543212891, "step": 9512 }, { "epoch": 1.7317739146263766, "grad_norm": 30.125, "learning_rate": 5.064840390359305e-06, "loss": 0.9814783334732056, "step": 9514 }, { "epoch": 1.7321379812505688, "grad_norm": 21.875, "learning_rate": 5.0633506259984e-06, "loss": 1.5342494249343872, "step": 9516 }, { "epoch": 1.732502047874761, "grad_norm": 16.375, "learning_rate": 5.061860999086994e-06, "loss": 1.9756970405578613, "step": 9518 }, { "epoch": 1.7328661144989534, "grad_norm": 22.875, "learning_rate": 5.060371509843684e-06, "loss": 1.25270414352417, "step": 9520 }, { "epoch": 1.7332301811231456, "grad_norm": 15.3125, "learning_rate": 5.058882158487049e-06, "loss": 1.0654053688049316, "step": 9522 }, { "epoch": 1.7335942477473378, "grad_norm": 7.75, "learning_rate": 5.057392945235642e-06, "loss": 1.390311598777771, "step": 9524 }, { "epoch": 1.73395831437153, "grad_norm": 7.75, "learning_rate": 5.055903870308001e-06, "loss": 0.8419263362884521, "step": 9526 }, { "epoch": 1.7343223809957222, "grad_norm": 8.125, "learning_rate": 5.0544149339226375e-06, "loss": 1.2089424133300781, "step": 9528 }, { "epoch": 1.7346864476199144, "grad_norm": 8.9375, "learning_rate": 5.052926136298055e-06, "loss": 0.4906526803970337, "step": 9530 }, { "epoch": 1.7350505142441066, "grad_norm": 9.9375, "learning_rate": 5.051437477652721e-06, "loss": 1.4194902181625366, "step": 9532 }, { "epoch": 1.7354145808682988, "grad_norm": 15.875, "learning_rate": 5.049948958205093e-06, "loss": 1.4018341302871704, "step": 9534 }, { "epoch": 1.735778647492491, "grad_norm": 9.875, "learning_rate": 5.048460578173607e-06, "loss": 1.3147599697113037, "step": 9536 }, { "epoch": 1.7361427141166832, "grad_norm": 7.96875, "learning_rate": 5.046972337776673e-06, "loss": 1.4243731498718262, "step": 9538 }, { "epoch": 1.7365067807408756, "grad_norm": 10.25, "learning_rate": 5.045484237232687e-06, "loss": 1.4286456108093262, "step": 9540 }, { "epoch": 1.7368708473650678, "grad_norm": 9.5625, "learning_rate": 5.0439962767600214e-06, "loss": 1.455309271812439, "step": 9542 }, { "epoch": 1.73723491398926, "grad_norm": 17.375, "learning_rate": 5.0425084565770266e-06, "loss": 1.3779274225234985, "step": 9544 }, { "epoch": 1.7375989806134524, "grad_norm": 22.25, "learning_rate": 5.041020776902037e-06, "loss": 1.2114263772964478, "step": 9546 }, { "epoch": 1.7379630472376446, "grad_norm": 16.625, "learning_rate": 5.03953323795336e-06, "loss": 1.4143348932266235, "step": 9548 }, { "epoch": 1.7383271138618368, "grad_norm": 35.0, "learning_rate": 5.038045839949291e-06, "loss": 1.9574635028839111, "step": 9550 }, { "epoch": 1.738691180486029, "grad_norm": 8.125, "learning_rate": 5.036558583108093e-06, "loss": 1.0166016817092896, "step": 9552 }, { "epoch": 1.7390552471102212, "grad_norm": 14.625, "learning_rate": 5.03507146764802e-06, "loss": 1.1967015266418457, "step": 9554 }, { "epoch": 1.7394193137344134, "grad_norm": 14.6875, "learning_rate": 5.0335844937873e-06, "loss": 1.814778447151184, "step": 9556 }, { "epoch": 1.7397833803586056, "grad_norm": 13.8125, "learning_rate": 5.032097661744135e-06, "loss": 1.4534716606140137, "step": 9558 }, { "epoch": 1.7401474469827978, "grad_norm": 50.5, "learning_rate": 5.03061097173672e-06, "loss": 1.4580074548721313, "step": 9560 }, { "epoch": 1.74051151360699, "grad_norm": 10.75, "learning_rate": 5.029124423983215e-06, "loss": 1.4458574056625366, "step": 9562 }, { "epoch": 1.7408755802311822, "grad_norm": 6.75, "learning_rate": 5.027638018701764e-06, "loss": 1.387359857559204, "step": 9564 }, { "epoch": 1.7412396468553746, "grad_norm": 37.5, "learning_rate": 5.026151756110496e-06, "loss": 1.1520726680755615, "step": 9566 }, { "epoch": 1.7416037134795668, "grad_norm": 14.9375, "learning_rate": 5.024665636427509e-06, "loss": 1.3024561405181885, "step": 9568 }, { "epoch": 1.741967780103759, "grad_norm": 12.875, "learning_rate": 5.023179659870889e-06, "loss": 1.454302191734314, "step": 9570 }, { "epoch": 1.7423318467279512, "grad_norm": 21.125, "learning_rate": 5.0216938266586934e-06, "loss": 1.5647664070129395, "step": 9572 }, { "epoch": 1.7426959133521436, "grad_norm": 8.75, "learning_rate": 5.020208137008965e-06, "loss": 1.5302326679229736, "step": 9574 }, { "epoch": 1.7430599799763358, "grad_norm": 15.3125, "learning_rate": 5.018722591139722e-06, "loss": 1.4510908126831055, "step": 9576 }, { "epoch": 1.743424046600528, "grad_norm": 11.3125, "learning_rate": 5.017237189268961e-06, "loss": 0.5014241933822632, "step": 9578 }, { "epoch": 1.7437881132247202, "grad_norm": 13.4375, "learning_rate": 5.015751931614661e-06, "loss": 1.3752236366271973, "step": 9580 }, { "epoch": 1.7441521798489124, "grad_norm": 40.25, "learning_rate": 5.014266818394775e-06, "loss": 1.66645085811615, "step": 9582 }, { "epoch": 1.7445162464731045, "grad_norm": 6.03125, "learning_rate": 5.01278184982724e-06, "loss": 1.3075840473175049, "step": 9584 }, { "epoch": 1.7448803130972967, "grad_norm": 15.375, "learning_rate": 5.011297026129967e-06, "loss": 1.4868955612182617, "step": 9586 }, { "epoch": 1.745244379721489, "grad_norm": 13.375, "learning_rate": 5.009812347520846e-06, "loss": 1.9733105897903442, "step": 9588 }, { "epoch": 1.7456084463456811, "grad_norm": 26.625, "learning_rate": 5.008327814217755e-06, "loss": 1.2591856718063354, "step": 9590 }, { "epoch": 1.7459725129698733, "grad_norm": 9.25, "learning_rate": 5.006843426438534e-06, "loss": 1.5363489389419556, "step": 9592 }, { "epoch": 1.7463365795940657, "grad_norm": 10.3125, "learning_rate": 5.005359184401017e-06, "loss": 1.3313227891921997, "step": 9594 }, { "epoch": 1.746700646218258, "grad_norm": 18.625, "learning_rate": 5.003875088323009e-06, "loss": 1.4109573364257812, "step": 9596 }, { "epoch": 1.7470647128424501, "grad_norm": 14.25, "learning_rate": 5.0023911384222955e-06, "loss": 1.4049272537231445, "step": 9598 }, { "epoch": 1.7474287794666425, "grad_norm": 10.375, "learning_rate": 5.000907334916638e-06, "loss": 1.351060390472412, "step": 9600 }, { "epoch": 1.7477928460908347, "grad_norm": 6.25, "learning_rate": 4.999423678023782e-06, "loss": 1.3122403621673584, "step": 9602 }, { "epoch": 1.748156912715027, "grad_norm": 3.84375, "learning_rate": 4.997940167961444e-06, "loss": 0.9075431227684021, "step": 9604 }, { "epoch": 1.7485209793392191, "grad_norm": 23.625, "learning_rate": 4.9964568049473275e-06, "loss": 1.1615616083145142, "step": 9606 }, { "epoch": 1.7488850459634113, "grad_norm": 20.625, "learning_rate": 4.9949735891991055e-06, "loss": 0.8063016533851624, "step": 9608 }, { "epoch": 1.7492491125876035, "grad_norm": 10.9375, "learning_rate": 4.993490520934438e-06, "loss": 1.422484278678894, "step": 9610 }, { "epoch": 1.7496131792117957, "grad_norm": 35.25, "learning_rate": 4.9920076003709575e-06, "loss": 1.580983281135559, "step": 9612 }, { "epoch": 1.749977245835988, "grad_norm": 10.4375, "learning_rate": 4.990524827726275e-06, "loss": 1.4442027807235718, "step": 9614 }, { "epoch": 1.75034131246018, "grad_norm": 29.375, "learning_rate": 4.9890422032179855e-06, "loss": 1.4181630611419678, "step": 9616 }, { "epoch": 1.7507053790843723, "grad_norm": 9.5, "learning_rate": 4.987559727063653e-06, "loss": 1.2040762901306152, "step": 9618 }, { "epoch": 1.7510694457085647, "grad_norm": 13.0, "learning_rate": 4.9860773994808295e-06, "loss": 1.7428909540176392, "step": 9620 }, { "epoch": 1.751433512332757, "grad_norm": 16.375, "learning_rate": 4.984595220687038e-06, "loss": 1.8364250659942627, "step": 9622 }, { "epoch": 1.751797578956949, "grad_norm": 6.96875, "learning_rate": 4.983113190899782e-06, "loss": 1.2444138526916504, "step": 9624 }, { "epoch": 1.7521616455811415, "grad_norm": 7.34375, "learning_rate": 4.981631310336546e-06, "loss": 0.8715922832489014, "step": 9626 }, { "epoch": 1.7525257122053337, "grad_norm": 6.03125, "learning_rate": 4.980149579214786e-06, "loss": 1.0471537113189697, "step": 9628 }, { "epoch": 1.752889778829526, "grad_norm": 15.375, "learning_rate": 4.9786679977519435e-06, "loss": 1.3787966966629028, "step": 9630 }, { "epoch": 1.753253845453718, "grad_norm": 9.125, "learning_rate": 4.9771865661654325e-06, "loss": 1.387390375137329, "step": 9632 }, { "epoch": 1.7536179120779103, "grad_norm": 4.625, "learning_rate": 4.975705284672647e-06, "loss": 1.1727123260498047, "step": 9634 }, { "epoch": 1.7539819787021025, "grad_norm": 12.5, "learning_rate": 4.974224153490963e-06, "loss": 1.7207012176513672, "step": 9636 }, { "epoch": 1.7543460453262947, "grad_norm": 10.6875, "learning_rate": 4.972743172837724e-06, "loss": 1.40591299533844, "step": 9638 }, { "epoch": 1.7547101119504869, "grad_norm": 12.5625, "learning_rate": 4.971262342930263e-06, "loss": 1.2562315464019775, "step": 9640 }, { "epoch": 1.755074178574679, "grad_norm": 13.375, "learning_rate": 4.969781663985884e-06, "loss": 1.6212990283966064, "step": 9642 }, { "epoch": 1.7554382451988713, "grad_norm": 11.0, "learning_rate": 4.9683011362218695e-06, "loss": 0.8833602666854858, "step": 9644 }, { "epoch": 1.7558023118230635, "grad_norm": 10.625, "learning_rate": 4.966820759855484e-06, "loss": 1.2150349617004395, "step": 9646 }, { "epoch": 1.7561663784472559, "grad_norm": 26.375, "learning_rate": 4.965340535103964e-06, "loss": 1.778388500213623, "step": 9648 }, { "epoch": 1.756530445071448, "grad_norm": 6.3125, "learning_rate": 4.963860462184527e-06, "loss": 1.2072433233261108, "step": 9650 }, { "epoch": 1.7568945116956403, "grad_norm": 8.5625, "learning_rate": 4.962380541314369e-06, "loss": 1.3482211828231812, "step": 9652 }, { "epoch": 1.7572585783198327, "grad_norm": 6.875, "learning_rate": 4.96090077271066e-06, "loss": 1.3736768960952759, "step": 9654 }, { "epoch": 1.7576226449440249, "grad_norm": 13.0625, "learning_rate": 4.9594211565905535e-06, "loss": 1.7501568794250488, "step": 9656 }, { "epoch": 1.757986711568217, "grad_norm": 16.25, "learning_rate": 4.957941693171173e-06, "loss": 1.4537973403930664, "step": 9658 }, { "epoch": 1.7583507781924093, "grad_norm": 13.625, "learning_rate": 4.956462382669627e-06, "loss": 1.5427837371826172, "step": 9660 }, { "epoch": 1.7587148448166015, "grad_norm": 45.0, "learning_rate": 4.954983225302998e-06, "loss": 1.3104393482208252, "step": 9662 }, { "epoch": 1.7590789114407936, "grad_norm": 6.0, "learning_rate": 4.953504221288344e-06, "loss": 1.290662407875061, "step": 9664 }, { "epoch": 1.7594429780649858, "grad_norm": 18.375, "learning_rate": 4.952025370842706e-06, "loss": 1.1948498487472534, "step": 9666 }, { "epoch": 1.759807044689178, "grad_norm": 17.75, "learning_rate": 4.950546674183096e-06, "loss": 1.5208001136779785, "step": 9668 }, { "epoch": 1.7601711113133702, "grad_norm": 16.5, "learning_rate": 4.94906813152651e-06, "loss": 1.3499042987823486, "step": 9670 }, { "epoch": 1.7605351779375624, "grad_norm": 9.5625, "learning_rate": 4.947589743089916e-06, "loss": 1.2112205028533936, "step": 9672 }, { "epoch": 1.7608992445617548, "grad_norm": 11.0625, "learning_rate": 4.946111509090262e-06, "loss": 1.1928448677062988, "step": 9674 }, { "epoch": 1.761263311185947, "grad_norm": 14.125, "learning_rate": 4.944633429744474e-06, "loss": 1.924996256828308, "step": 9676 }, { "epoch": 1.7616273778101392, "grad_norm": 22.875, "learning_rate": 4.9431555052694516e-06, "loss": 1.5360736846923828, "step": 9678 }, { "epoch": 1.7619914444343316, "grad_norm": 15.625, "learning_rate": 4.941677735882078e-06, "loss": 1.4673950672149658, "step": 9680 }, { "epoch": 1.7623555110585238, "grad_norm": 7.46875, "learning_rate": 4.940200121799206e-06, "loss": 1.2095304727554321, "step": 9682 }, { "epoch": 1.762719577682716, "grad_norm": 8.375, "learning_rate": 4.93872266323767e-06, "loss": 1.2931064367294312, "step": 9684 }, { "epoch": 1.7630836443069082, "grad_norm": 7.4375, "learning_rate": 4.937245360414285e-06, "loss": 1.0370876789093018, "step": 9686 }, { "epoch": 1.7634477109311004, "grad_norm": 15.375, "learning_rate": 4.935768213545834e-06, "loss": 1.4221354722976685, "step": 9688 }, { "epoch": 1.7638117775552926, "grad_norm": 8.375, "learning_rate": 4.934291222849086e-06, "loss": 1.0306072235107422, "step": 9690 }, { "epoch": 1.7641758441794848, "grad_norm": 8.9375, "learning_rate": 4.932814388540783e-06, "loss": 1.6003035306930542, "step": 9692 }, { "epoch": 1.764539910803677, "grad_norm": 10.1875, "learning_rate": 4.9313377108376405e-06, "loss": 1.466977834701538, "step": 9694 }, { "epoch": 1.7649039774278692, "grad_norm": 15.6875, "learning_rate": 4.929861189956362e-06, "loss": 1.6034886837005615, "step": 9696 }, { "epoch": 1.7652680440520614, "grad_norm": 18.125, "learning_rate": 4.928384826113613e-06, "loss": 1.4333608150482178, "step": 9698 }, { "epoch": 1.7656321106762538, "grad_norm": 15.4375, "learning_rate": 4.926908619526051e-06, "loss": 1.4016716480255127, "step": 9700 }, { "epoch": 1.765996177300446, "grad_norm": 6.5, "learning_rate": 4.925432570410299e-06, "loss": 1.491098403930664, "step": 9702 }, { "epoch": 1.7663602439246382, "grad_norm": 11.1875, "learning_rate": 4.923956678982962e-06, "loss": 1.1578155755996704, "step": 9704 }, { "epoch": 1.7667243105488304, "grad_norm": 8.5625, "learning_rate": 4.922480945460623e-06, "loss": 1.3627545833587646, "step": 9706 }, { "epoch": 1.7670883771730228, "grad_norm": 8.9375, "learning_rate": 4.921005370059836e-06, "loss": 1.1596611738204956, "step": 9708 }, { "epoch": 1.767452443797215, "grad_norm": 15.25, "learning_rate": 4.91952995299714e-06, "loss": 1.0563263893127441, "step": 9710 }, { "epoch": 1.7678165104214072, "grad_norm": 7.125, "learning_rate": 4.918054694489045e-06, "loss": 1.2106642723083496, "step": 9712 }, { "epoch": 1.7681805770455994, "grad_norm": 6.25, "learning_rate": 4.916579594752037e-06, "loss": 1.1421802043914795, "step": 9714 }, { "epoch": 1.7685446436697916, "grad_norm": 10.1875, "learning_rate": 4.915104654002585e-06, "loss": 1.6308321952819824, "step": 9716 }, { "epoch": 1.7689087102939838, "grad_norm": 27.5, "learning_rate": 4.9136298724571265e-06, "loss": 1.9257757663726807, "step": 9718 }, { "epoch": 1.769272776918176, "grad_norm": 7.375, "learning_rate": 4.912155250332082e-06, "loss": 1.3204705715179443, "step": 9720 }, { "epoch": 1.7696368435423682, "grad_norm": 17.125, "learning_rate": 4.9106807878438465e-06, "loss": 1.5168133974075317, "step": 9722 }, { "epoch": 1.7700009101665604, "grad_norm": 22.875, "learning_rate": 4.9092064852087895e-06, "loss": 1.393210768699646, "step": 9724 }, { "epoch": 1.7703649767907526, "grad_norm": 10.5, "learning_rate": 4.9077323426432625e-06, "loss": 1.0466089248657227, "step": 9726 }, { "epoch": 1.770729043414945, "grad_norm": 13.0, "learning_rate": 4.906258360363585e-06, "loss": 0.4767671823501587, "step": 9728 }, { "epoch": 1.7710931100391372, "grad_norm": 27.625, "learning_rate": 4.904784538586063e-06, "loss": 1.5078160762786865, "step": 9730 }, { "epoch": 1.7714571766633294, "grad_norm": 10.0625, "learning_rate": 4.90331087752697e-06, "loss": 1.4488803148269653, "step": 9732 }, { "epoch": 1.7718212432875218, "grad_norm": 18.0, "learning_rate": 4.90183737740256e-06, "loss": 1.5841262340545654, "step": 9734 }, { "epoch": 1.772185309911714, "grad_norm": 5.5, "learning_rate": 4.900364038429067e-06, "loss": 1.2088572978973389, "step": 9736 }, { "epoch": 1.7725493765359062, "grad_norm": 18.875, "learning_rate": 4.898890860822693e-06, "loss": 1.2688897848129272, "step": 9738 }, { "epoch": 1.7729134431600984, "grad_norm": 10.1875, "learning_rate": 4.897417844799624e-06, "loss": 1.2133854627609253, "step": 9740 }, { "epoch": 1.7732775097842906, "grad_norm": 19.5, "learning_rate": 4.895944990576018e-06, "loss": 1.4112749099731445, "step": 9742 }, { "epoch": 1.7736415764084827, "grad_norm": 24.75, "learning_rate": 4.894472298368009e-06, "loss": 1.697887897491455, "step": 9744 }, { "epoch": 1.774005643032675, "grad_norm": 8.3125, "learning_rate": 4.892999768391711e-06, "loss": 1.4441301822662354, "step": 9746 }, { "epoch": 1.7743697096568671, "grad_norm": 14.3125, "learning_rate": 4.8915274008632095e-06, "loss": 1.5689034461975098, "step": 9748 }, { "epoch": 1.7747337762810593, "grad_norm": 11.75, "learning_rate": 4.890055195998571e-06, "loss": 1.5739667415618896, "step": 9750 }, { "epoch": 1.7750978429052515, "grad_norm": 7.375, "learning_rate": 4.888583154013834e-06, "loss": 1.1880637407302856, "step": 9752 }, { "epoch": 1.775461909529444, "grad_norm": 9.625, "learning_rate": 4.887111275125014e-06, "loss": 1.3103233575820923, "step": 9754 }, { "epoch": 1.7758259761536361, "grad_norm": 22.625, "learning_rate": 4.885639559548105e-06, "loss": 1.568418264389038, "step": 9756 }, { "epoch": 1.7761900427778283, "grad_norm": 17.5, "learning_rate": 4.884168007499075e-06, "loss": 1.8974366188049316, "step": 9758 }, { "epoch": 1.7765541094020205, "grad_norm": 6.8125, "learning_rate": 4.882696619193868e-06, "loss": 1.309972882270813, "step": 9760 }, { "epoch": 1.776918176026213, "grad_norm": 86.5, "learning_rate": 4.881225394848404e-06, "loss": 1.1577789783477783, "step": 9762 }, { "epoch": 1.7772822426504051, "grad_norm": 7.78125, "learning_rate": 4.879754334678577e-06, "loss": 1.3325051069259644, "step": 9764 }, { "epoch": 1.7776463092745973, "grad_norm": 12.3125, "learning_rate": 4.8782834389002645e-06, "loss": 1.4475016593933105, "step": 9766 }, { "epoch": 1.7780103758987895, "grad_norm": 16.5, "learning_rate": 4.876812707729309e-06, "loss": 1.1812524795532227, "step": 9768 }, { "epoch": 1.7783744425229817, "grad_norm": 6.375, "learning_rate": 4.875342141381538e-06, "loss": 0.9161214828491211, "step": 9770 }, { "epoch": 1.778738509147174, "grad_norm": 7.96875, "learning_rate": 4.87387174007275e-06, "loss": 1.3399105072021484, "step": 9772 }, { "epoch": 1.779102575771366, "grad_norm": 8.5, "learning_rate": 4.872401504018719e-06, "loss": 1.2644909620285034, "step": 9774 }, { "epoch": 1.7794666423955583, "grad_norm": 16.875, "learning_rate": 4.8709314334352e-06, "loss": 1.1939635276794434, "step": 9776 }, { "epoch": 1.7798307090197505, "grad_norm": 16.375, "learning_rate": 4.869461528537916e-06, "loss": 1.680476427078247, "step": 9778 }, { "epoch": 1.7801947756439427, "grad_norm": 6.375, "learning_rate": 4.867991789542571e-06, "loss": 1.2742371559143066, "step": 9780 }, { "epoch": 1.780558842268135, "grad_norm": 7.0, "learning_rate": 4.866522216664844e-06, "loss": 1.0444464683532715, "step": 9782 }, { "epoch": 1.7809229088923273, "grad_norm": 7.6875, "learning_rate": 4.865052810120386e-06, "loss": 1.1968847513198853, "step": 9784 }, { "epoch": 1.7812869755165195, "grad_norm": 14.8125, "learning_rate": 4.8635835701248304e-06, "loss": 1.2499253749847412, "step": 9786 }, { "epoch": 1.781651042140712, "grad_norm": 15.1875, "learning_rate": 4.8621144968937795e-06, "loss": 1.7331328392028809, "step": 9788 }, { "epoch": 1.782015108764904, "grad_norm": 5.125, "learning_rate": 4.860645590642816e-06, "loss": 1.2850385904312134, "step": 9790 }, { "epoch": 1.7823791753890963, "grad_norm": 11.5, "learning_rate": 4.859176851587494e-06, "loss": 1.3192908763885498, "step": 9792 }, { "epoch": 1.7827432420132885, "grad_norm": 9.5, "learning_rate": 4.857708279943345e-06, "loss": 1.550360918045044, "step": 9794 }, { "epoch": 1.7831073086374807, "grad_norm": 9.6875, "learning_rate": 4.856239875925878e-06, "loss": 1.3419959545135498, "step": 9796 }, { "epoch": 1.7834713752616729, "grad_norm": 21.5, "learning_rate": 4.854771639750573e-06, "loss": 1.4597018957138062, "step": 9798 }, { "epoch": 1.783835441885865, "grad_norm": 19.75, "learning_rate": 4.853303571632889e-06, "loss": 1.4212193489074707, "step": 9800 }, { "epoch": 1.7841995085100573, "grad_norm": 11.3125, "learning_rate": 4.851835671788258e-06, "loss": 1.2252379655838013, "step": 9802 }, { "epoch": 1.7845635751342495, "grad_norm": 108.0, "learning_rate": 4.85036794043209e-06, "loss": 1.308140516281128, "step": 9804 }, { "epoch": 1.7849276417584417, "grad_norm": 12.875, "learning_rate": 4.848900377779768e-06, "loss": 1.1591253280639648, "step": 9806 }, { "epoch": 1.785291708382634, "grad_norm": 17.875, "learning_rate": 4.847432984046649e-06, "loss": 1.3936431407928467, "step": 9808 }, { "epoch": 1.7856557750068263, "grad_norm": 4.78125, "learning_rate": 4.84596575944807e-06, "loss": 1.1082038879394531, "step": 9810 }, { "epoch": 1.7860198416310185, "grad_norm": 19.25, "learning_rate": 4.844498704199338e-06, "loss": 1.299839735031128, "step": 9812 }, { "epoch": 1.7863839082552107, "grad_norm": 14.625, "learning_rate": 4.843031818515738e-06, "loss": 1.3926259279251099, "step": 9814 }, { "epoch": 1.786747974879403, "grad_norm": 17.25, "learning_rate": 4.841565102612531e-06, "loss": 1.6030869483947754, "step": 9816 }, { "epoch": 1.7871120415035953, "grad_norm": 21.375, "learning_rate": 4.84009855670495e-06, "loss": 1.7322543859481812, "step": 9818 }, { "epoch": 1.7874761081277875, "grad_norm": 4.25, "learning_rate": 4.838632181008206e-06, "loss": 0.9729434251785278, "step": 9820 }, { "epoch": 1.7878401747519796, "grad_norm": 5.875, "learning_rate": 4.837165975737481e-06, "loss": 1.184375286102295, "step": 9822 }, { "epoch": 1.7882042413761718, "grad_norm": 16.25, "learning_rate": 4.835699941107938e-06, "loss": 1.1851658821105957, "step": 9824 }, { "epoch": 1.788568308000364, "grad_norm": 26.125, "learning_rate": 4.83423407733471e-06, "loss": 0.8616297245025635, "step": 9826 }, { "epoch": 1.7889323746245562, "grad_norm": 20.625, "learning_rate": 4.832768384632906e-06, "loss": 0.9894980192184448, "step": 9828 }, { "epoch": 1.7892964412487484, "grad_norm": 15.8125, "learning_rate": 4.831302863217613e-06, "loss": 1.2195518016815186, "step": 9830 }, { "epoch": 1.7896605078729406, "grad_norm": 10.5625, "learning_rate": 4.829837513303886e-06, "loss": 1.5083246231079102, "step": 9832 }, { "epoch": 1.7900245744971328, "grad_norm": 11.375, "learning_rate": 4.828372335106762e-06, "loss": 0.9479171633720398, "step": 9834 }, { "epoch": 1.7903886411213252, "grad_norm": 11.0, "learning_rate": 4.826907328841251e-06, "loss": 1.514286756515503, "step": 9836 }, { "epoch": 1.7907527077455174, "grad_norm": 16.875, "learning_rate": 4.825442494722334e-06, "loss": 1.240665316581726, "step": 9838 }, { "epoch": 1.7911167743697096, "grad_norm": 11.8125, "learning_rate": 4.823977832964972e-06, "loss": 0.9979418516159058, "step": 9840 }, { "epoch": 1.791480840993902, "grad_norm": 15.4375, "learning_rate": 4.8225133437840965e-06, "loss": 1.7140535116195679, "step": 9842 }, { "epoch": 1.7918449076180942, "grad_norm": 3.046875, "learning_rate": 4.821049027394615e-06, "loss": 0.9580082893371582, "step": 9844 }, { "epoch": 1.7922089742422864, "grad_norm": 10.6875, "learning_rate": 4.819584884011413e-06, "loss": 1.4786930084228516, "step": 9846 }, { "epoch": 1.7925730408664786, "grad_norm": 8.3125, "learning_rate": 4.818120913849344e-06, "loss": 1.4283615350723267, "step": 9848 }, { "epoch": 1.7929371074906708, "grad_norm": 7.15625, "learning_rate": 4.816657117123243e-06, "loss": 1.3162473440170288, "step": 9850 }, { "epoch": 1.793301174114863, "grad_norm": 8.8125, "learning_rate": 4.815193494047911e-06, "loss": 1.4530799388885498, "step": 9852 }, { "epoch": 1.7936652407390552, "grad_norm": 6.6875, "learning_rate": 4.813730044838134e-06, "loss": 1.4405879974365234, "step": 9854 }, { "epoch": 1.7940293073632474, "grad_norm": 12.125, "learning_rate": 4.8122667697086664e-06, "loss": 1.2693815231323242, "step": 9856 }, { "epoch": 1.7943933739874396, "grad_norm": 12.5, "learning_rate": 4.8108036688742345e-06, "loss": 1.287493109703064, "step": 9858 }, { "epoch": 1.7947574406116318, "grad_norm": 62.5, "learning_rate": 4.809340742549548e-06, "loss": 1.3426483869552612, "step": 9860 }, { "epoch": 1.7951215072358242, "grad_norm": 5.75, "learning_rate": 4.807877990949279e-06, "loss": 1.3801523447036743, "step": 9862 }, { "epoch": 1.7954855738600164, "grad_norm": 11.25, "learning_rate": 4.806415414288085e-06, "loss": 1.3038803339004517, "step": 9864 }, { "epoch": 1.7958496404842086, "grad_norm": 7.125, "learning_rate": 4.8049530127805925e-06, "loss": 1.341579794883728, "step": 9866 }, { "epoch": 1.796213707108401, "grad_norm": 5.3125, "learning_rate": 4.8034907866414005e-06, "loss": 1.26124107837677, "step": 9868 }, { "epoch": 1.7965777737325932, "grad_norm": 21.0, "learning_rate": 4.802028736085089e-06, "loss": 1.4446187019348145, "step": 9870 }, { "epoch": 1.7969418403567854, "grad_norm": 15.375, "learning_rate": 4.800566861326203e-06, "loss": 1.2436654567718506, "step": 9872 }, { "epoch": 1.7973059069809776, "grad_norm": 18.0, "learning_rate": 4.799105162579269e-06, "loss": 1.4584167003631592, "step": 9874 }, { "epoch": 1.7976699736051698, "grad_norm": 10.3125, "learning_rate": 4.797643640058789e-06, "loss": 1.368586778640747, "step": 9876 }, { "epoch": 1.798034040229362, "grad_norm": 13.25, "learning_rate": 4.7961822939792285e-06, "loss": 1.2624218463897705, "step": 9878 }, { "epoch": 1.7983981068535542, "grad_norm": 24.5, "learning_rate": 4.79472112455504e-06, "loss": 1.330539584159851, "step": 9880 }, { "epoch": 1.7987621734777464, "grad_norm": 15.375, "learning_rate": 4.7932601320006405e-06, "loss": 1.5191045999526978, "step": 9882 }, { "epoch": 1.7991262401019386, "grad_norm": 9.8125, "learning_rate": 4.7917993165304265e-06, "loss": 1.120861530303955, "step": 9884 }, { "epoch": 1.7994903067261308, "grad_norm": 12.4375, "learning_rate": 4.790338678358767e-06, "loss": 0.6580009460449219, "step": 9886 }, { "epoch": 1.799854373350323, "grad_norm": 17.625, "learning_rate": 4.788878217700003e-06, "loss": 1.2356891632080078, "step": 9888 }, { "epoch": 1.8002184399745154, "grad_norm": 10.25, "learning_rate": 4.787417934768455e-06, "loss": 1.4245901107788086, "step": 9890 }, { "epoch": 1.8005825065987076, "grad_norm": 6.0, "learning_rate": 4.785957829778407e-06, "loss": 1.2386894226074219, "step": 9892 }, { "epoch": 1.8009465732228997, "grad_norm": 8.25, "learning_rate": 4.78449790294413e-06, "loss": 1.229024887084961, "step": 9894 }, { "epoch": 1.8013106398470922, "grad_norm": 14.125, "learning_rate": 4.78303815447986e-06, "loss": 1.4715592861175537, "step": 9896 }, { "epoch": 1.8016747064712844, "grad_norm": 14.5, "learning_rate": 4.781578584599807e-06, "loss": 1.2632544040679932, "step": 9898 }, { "epoch": 1.8020387730954766, "grad_norm": 14.25, "learning_rate": 4.78011919351816e-06, "loss": 1.2378745079040527, "step": 9900 }, { "epoch": 1.8024028397196687, "grad_norm": 15.125, "learning_rate": 4.778659981449077e-06, "loss": 0.8463464975357056, "step": 9902 }, { "epoch": 1.802766906343861, "grad_norm": 10.1875, "learning_rate": 4.777200948606693e-06, "loss": 1.6600208282470703, "step": 9904 }, { "epoch": 1.8031309729680531, "grad_norm": 8.9375, "learning_rate": 4.775742095205114e-06, "loss": 1.0083280801773071, "step": 9906 }, { "epoch": 1.8034950395922453, "grad_norm": 2.640625, "learning_rate": 4.77428342145842e-06, "loss": 0.6644299030303955, "step": 9908 }, { "epoch": 1.8038591062164375, "grad_norm": 6.3125, "learning_rate": 4.772824927580668e-06, "loss": 1.184407353401184, "step": 9910 }, { "epoch": 1.8042231728406297, "grad_norm": 25.125, "learning_rate": 4.771366613785884e-06, "loss": 1.1789664030075073, "step": 9912 }, { "epoch": 1.804587239464822, "grad_norm": 3.6875, "learning_rate": 4.769908480288069e-06, "loss": 1.1635903120040894, "step": 9914 }, { "epoch": 1.8049513060890143, "grad_norm": 9.5, "learning_rate": 4.768450527301202e-06, "loss": 1.1157206296920776, "step": 9916 }, { "epoch": 1.8053153727132065, "grad_norm": 7.78125, "learning_rate": 4.7669927550392264e-06, "loss": 1.4344539642333984, "step": 9918 }, { "epoch": 1.8056794393373987, "grad_norm": 13.375, "learning_rate": 4.76553516371607e-06, "loss": 1.2899246215820312, "step": 9920 }, { "epoch": 1.8060435059615911, "grad_norm": 6.0, "learning_rate": 4.764077753545622e-06, "loss": 1.222097635269165, "step": 9922 }, { "epoch": 1.8064075725857833, "grad_norm": 14.125, "learning_rate": 4.762620524741756e-06, "loss": 1.786362886428833, "step": 9924 }, { "epoch": 1.8067716392099755, "grad_norm": 7.40625, "learning_rate": 4.761163477518315e-06, "loss": 1.2622867822647095, "step": 9926 }, { "epoch": 1.8071357058341677, "grad_norm": 15.75, "learning_rate": 4.759706612089112e-06, "loss": 1.1997376680374146, "step": 9928 }, { "epoch": 1.80749977245836, "grad_norm": 9.375, "learning_rate": 4.758249928667938e-06, "loss": 1.3268487453460693, "step": 9930 }, { "epoch": 1.807863839082552, "grad_norm": 13.625, "learning_rate": 4.756793427468553e-06, "loss": 1.278355360031128, "step": 9932 }, { "epoch": 1.8082279057067443, "grad_norm": 3.25, "learning_rate": 4.755337108704695e-06, "loss": 1.045915126800537, "step": 9934 }, { "epoch": 1.8085919723309365, "grad_norm": 17.25, "learning_rate": 4.753880972590073e-06, "loss": 1.578112244606018, "step": 9936 }, { "epoch": 1.8089560389551287, "grad_norm": 21.0, "learning_rate": 4.752425019338367e-06, "loss": 1.659615397453308, "step": 9938 }, { "epoch": 1.8093201055793209, "grad_norm": 9.0625, "learning_rate": 4.750969249163234e-06, "loss": 1.2165932655334473, "step": 9940 }, { "epoch": 1.8096841722035133, "grad_norm": 9.1875, "learning_rate": 4.749513662278301e-06, "loss": 1.5094040632247925, "step": 9942 }, { "epoch": 1.8100482388277055, "grad_norm": 6.125, "learning_rate": 4.748058258897172e-06, "loss": 0.9273290634155273, "step": 9944 }, { "epoch": 1.8104123054518977, "grad_norm": 12.0625, "learning_rate": 4.746603039233419e-06, "loss": 1.0045676231384277, "step": 9946 }, { "epoch": 1.8107763720760899, "grad_norm": 27.25, "learning_rate": 4.745148003500589e-06, "loss": 0.7226732969284058, "step": 9948 }, { "epoch": 1.8111404387002823, "grad_norm": 10.625, "learning_rate": 4.7436931519122065e-06, "loss": 0.9850039482116699, "step": 9950 }, { "epoch": 1.8115045053244745, "grad_norm": 22.125, "learning_rate": 4.74223848468176e-06, "loss": 1.3006930351257324, "step": 9952 }, { "epoch": 1.8118685719486667, "grad_norm": 19.0, "learning_rate": 4.740784002022721e-06, "loss": 1.9448378086090088, "step": 9954 }, { "epoch": 1.8122326385728589, "grad_norm": 21.625, "learning_rate": 4.739329704148525e-06, "loss": 1.9606869220733643, "step": 9956 }, { "epoch": 1.812596705197051, "grad_norm": 13.6875, "learning_rate": 4.737875591272586e-06, "loss": 1.9865272045135498, "step": 9958 }, { "epoch": 1.8129607718212433, "grad_norm": 9.3125, "learning_rate": 4.7364216636082895e-06, "loss": 1.4596893787384033, "step": 9960 }, { "epoch": 1.8133248384454355, "grad_norm": 16.25, "learning_rate": 4.7349679213689925e-06, "loss": 1.4548094272613525, "step": 9962 }, { "epoch": 1.8136889050696277, "grad_norm": 25.125, "learning_rate": 4.7335143647680265e-06, "loss": 1.3416130542755127, "step": 9964 }, { "epoch": 1.8140529716938198, "grad_norm": 11.125, "learning_rate": 4.732060994018696e-06, "loss": 1.4447228908538818, "step": 9966 }, { "epoch": 1.814417038318012, "grad_norm": 11.5625, "learning_rate": 4.730607809334275e-06, "loss": 1.4781229496002197, "step": 9968 }, { "epoch": 1.8147811049422045, "grad_norm": 44.5, "learning_rate": 4.729154810928014e-06, "loss": 1.0811924934387207, "step": 9970 }, { "epoch": 1.8151451715663967, "grad_norm": 11.3125, "learning_rate": 4.727701999013133e-06, "loss": 1.3054535388946533, "step": 9972 }, { "epoch": 1.8155092381905888, "grad_norm": 7.3125, "learning_rate": 4.726249373802829e-06, "loss": 1.839009404182434, "step": 9974 }, { "epoch": 1.8158733048147813, "grad_norm": 5.40625, "learning_rate": 4.7247969355102675e-06, "loss": 1.3314069509506226, "step": 9976 }, { "epoch": 1.8162373714389735, "grad_norm": 9.9375, "learning_rate": 4.7233446843485854e-06, "loss": 1.6068639755249023, "step": 9978 }, { "epoch": 1.8166014380631657, "grad_norm": 8.375, "learning_rate": 4.7218926205309e-06, "loss": 1.2942819595336914, "step": 9980 }, { "epoch": 1.8169655046873578, "grad_norm": 15.0625, "learning_rate": 4.720440744270291e-06, "loss": 1.0803054571151733, "step": 9982 }, { "epoch": 1.81732957131155, "grad_norm": 23.875, "learning_rate": 4.718989055779817e-06, "loss": 0.6113383769989014, "step": 9984 }, { "epoch": 1.8176936379357422, "grad_norm": 25.875, "learning_rate": 4.717537555272509e-06, "loss": 1.4741604328155518, "step": 9986 }, { "epoch": 1.8180577045599344, "grad_norm": 17.75, "learning_rate": 4.716086242961367e-06, "loss": 1.5401732921600342, "step": 9988 }, { "epoch": 1.8184217711841266, "grad_norm": 41.5, "learning_rate": 4.714635119059366e-06, "loss": 1.5616130828857422, "step": 9990 }, { "epoch": 1.8187858378083188, "grad_norm": 12.3125, "learning_rate": 4.71318418377945e-06, "loss": 1.656862497329712, "step": 9992 }, { "epoch": 1.819149904432511, "grad_norm": 15.8125, "learning_rate": 4.711733437334541e-06, "loss": 1.5075008869171143, "step": 9994 }, { "epoch": 1.8195139710567034, "grad_norm": 9.8125, "learning_rate": 4.7102828799375315e-06, "loss": 1.3021337985992432, "step": 9996 }, { "epoch": 1.8198780376808956, "grad_norm": 10.5, "learning_rate": 4.708832511801279e-06, "loss": 1.0407510995864868, "step": 9998 }, { "epoch": 1.8202421043050878, "grad_norm": 14.5, "learning_rate": 4.707382333138626e-06, "loss": 1.476670265197754, "step": 10000 }, { "epoch": 1.82060617092928, "grad_norm": 15.25, "learning_rate": 4.705932344162374e-06, "loss": 1.5759352445602417, "step": 10002 }, { "epoch": 1.8209702375534724, "grad_norm": 33.5, "learning_rate": 4.704482545085307e-06, "loss": 0.4886060357093811, "step": 10004 }, { "epoch": 1.8213343041776646, "grad_norm": 45.5, "learning_rate": 4.7030329361201785e-06, "loss": 1.4418752193450928, "step": 10006 }, { "epoch": 1.8216983708018568, "grad_norm": 11.25, "learning_rate": 4.701583517479708e-06, "loss": 1.2543349266052246, "step": 10008 }, { "epoch": 1.822062437426049, "grad_norm": 40.25, "learning_rate": 4.700134289376597e-06, "loss": 1.3194719552993774, "step": 10010 }, { "epoch": 1.8224265040502412, "grad_norm": 12.9375, "learning_rate": 4.698685252023508e-06, "loss": 1.386330485343933, "step": 10012 }, { "epoch": 1.8227905706744334, "grad_norm": 26.5, "learning_rate": 4.6972364056330855e-06, "loss": 1.2390315532684326, "step": 10014 }, { "epoch": 1.8231546372986256, "grad_norm": 9.1875, "learning_rate": 4.695787750417942e-06, "loss": 0.41816461086273193, "step": 10016 }, { "epoch": 1.8235187039228178, "grad_norm": 8.5, "learning_rate": 4.694339286590659e-06, "loss": 1.2061243057250977, "step": 10018 }, { "epoch": 1.82388277054701, "grad_norm": 12.0625, "learning_rate": 4.692891014363796e-06, "loss": 1.2117953300476074, "step": 10020 }, { "epoch": 1.8242468371712022, "grad_norm": 10.0, "learning_rate": 4.6914429339498774e-06, "loss": 1.5956456661224365, "step": 10022 }, { "epoch": 1.8246109037953946, "grad_norm": 12.3125, "learning_rate": 4.689995045561406e-06, "loss": 1.4610309600830078, "step": 10024 }, { "epoch": 1.8249749704195868, "grad_norm": 8.8125, "learning_rate": 4.688547349410854e-06, "loss": 1.142910361289978, "step": 10026 }, { "epoch": 1.825339037043779, "grad_norm": 8.5, "learning_rate": 4.687099845710661e-06, "loss": 1.2656497955322266, "step": 10028 }, { "epoch": 1.8257031036679714, "grad_norm": 11.6875, "learning_rate": 4.685652534673248e-06, "loss": 1.1357722282409668, "step": 10030 }, { "epoch": 1.8260671702921636, "grad_norm": 20.625, "learning_rate": 4.6842054165109965e-06, "loss": 1.333534598350525, "step": 10032 }, { "epoch": 1.8264312369163558, "grad_norm": 14.3125, "learning_rate": 4.6827584914362675e-06, "loss": 1.4362547397613525, "step": 10034 }, { "epoch": 1.826795303540548, "grad_norm": 7.78125, "learning_rate": 4.681311759661394e-06, "loss": 1.5378859043121338, "step": 10036 }, { "epoch": 1.8271593701647402, "grad_norm": 6.5625, "learning_rate": 4.679865221398674e-06, "loss": 1.2298191785812378, "step": 10038 }, { "epoch": 1.8275234367889324, "grad_norm": 10.875, "learning_rate": 4.678418876860383e-06, "loss": 1.3100981712341309, "step": 10040 }, { "epoch": 1.8278875034131246, "grad_norm": 9.3125, "learning_rate": 4.676972726258766e-06, "loss": 1.1052302122116089, "step": 10042 }, { "epoch": 1.8282515700373168, "grad_norm": 5.15625, "learning_rate": 4.675526769806039e-06, "loss": 0.9375574588775635, "step": 10044 }, { "epoch": 1.828615636661509, "grad_norm": 16.625, "learning_rate": 4.674081007714392e-06, "loss": 1.1996800899505615, "step": 10046 }, { "epoch": 1.8289797032857011, "grad_norm": 18.125, "learning_rate": 4.672635440195982e-06, "loss": 1.739487648010254, "step": 10048 }, { "epoch": 1.8293437699098936, "grad_norm": 12.5, "learning_rate": 4.671190067462944e-06, "loss": 1.3939199447631836, "step": 10050 }, { "epoch": 1.8297078365340858, "grad_norm": 34.0, "learning_rate": 4.669744889727377e-06, "loss": 1.3368499279022217, "step": 10052 }, { "epoch": 1.830071903158278, "grad_norm": 11.25, "learning_rate": 4.6682999072013554e-06, "loss": 1.3886959552764893, "step": 10054 }, { "epoch": 1.8304359697824701, "grad_norm": 10.0625, "learning_rate": 4.6668551200969285e-06, "loss": 1.3839491605758667, "step": 10056 }, { "epoch": 1.8308000364066626, "grad_norm": 6.5625, "learning_rate": 4.665410528626107e-06, "loss": 0.9638664722442627, "step": 10058 }, { "epoch": 1.8311641030308548, "grad_norm": 43.25, "learning_rate": 4.663966133000884e-06, "loss": 1.2880927324295044, "step": 10060 }, { "epoch": 1.831528169655047, "grad_norm": 13.5625, "learning_rate": 4.662521933433215e-06, "loss": 1.6466302871704102, "step": 10062 }, { "epoch": 1.8318922362792391, "grad_norm": 8.125, "learning_rate": 4.661077930135033e-06, "loss": 1.3043192625045776, "step": 10064 }, { "epoch": 1.8322563029034313, "grad_norm": 8.75, "learning_rate": 4.659634123318238e-06, "loss": 1.303576946258545, "step": 10066 }, { "epoch": 1.8326203695276235, "grad_norm": 11.375, "learning_rate": 4.658190513194703e-06, "loss": 1.4081577062606812, "step": 10068 }, { "epoch": 1.8329844361518157, "grad_norm": 11.75, "learning_rate": 4.656747099976273e-06, "loss": 1.198384404182434, "step": 10070 }, { "epoch": 1.833348502776008, "grad_norm": 18.0, "learning_rate": 4.655303883874761e-06, "loss": 1.3355695009231567, "step": 10072 }, { "epoch": 1.8337125694002, "grad_norm": 13.875, "learning_rate": 4.653860865101956e-06, "loss": 1.8721139430999756, "step": 10074 }, { "epoch": 1.8340766360243923, "grad_norm": 22.625, "learning_rate": 4.652418043869614e-06, "loss": 1.429003119468689, "step": 10076 }, { "epoch": 1.8344407026485847, "grad_norm": 30.0, "learning_rate": 4.650975420389461e-06, "loss": 1.5270614624023438, "step": 10078 }, { "epoch": 1.834804769272777, "grad_norm": 12.9375, "learning_rate": 4.6495329948732e-06, "loss": 1.2187944650650024, "step": 10080 }, { "epoch": 1.835168835896969, "grad_norm": 53.75, "learning_rate": 4.648090767532496e-06, "loss": 1.854748010635376, "step": 10082 }, { "epoch": 1.8355329025211615, "grad_norm": 19.125, "learning_rate": 4.646648738578996e-06, "loss": 1.834840178489685, "step": 10084 }, { "epoch": 1.8358969691453537, "grad_norm": 4.3125, "learning_rate": 4.645206908224309e-06, "loss": 1.055801510810852, "step": 10086 }, { "epoch": 1.836261035769546, "grad_norm": 7.84375, "learning_rate": 4.643765276680016e-06, "loss": 1.058341145515442, "step": 10088 }, { "epoch": 1.836625102393738, "grad_norm": 10.25, "learning_rate": 4.642323844157674e-06, "loss": 0.9838607311248779, "step": 10090 }, { "epoch": 1.8369891690179303, "grad_norm": 10.625, "learning_rate": 4.6408826108688035e-06, "loss": 1.3639460802078247, "step": 10092 }, { "epoch": 1.8373532356421225, "grad_norm": 20.875, "learning_rate": 4.639441577024903e-06, "loss": 1.926166296005249, "step": 10094 }, { "epoch": 1.8377173022663147, "grad_norm": 20.375, "learning_rate": 4.638000742837438e-06, "loss": 1.8123565912246704, "step": 10096 }, { "epoch": 1.8380813688905069, "grad_norm": 34.75, "learning_rate": 4.636560108517842e-06, "loss": 1.4493234157562256, "step": 10098 }, { "epoch": 1.838445435514699, "grad_norm": 20.125, "learning_rate": 4.635119674277528e-06, "loss": 1.2812522649765015, "step": 10100 }, { "epoch": 1.8388095021388913, "grad_norm": 10.0, "learning_rate": 4.633679440327867e-06, "loss": 1.3218059539794922, "step": 10102 }, { "epoch": 1.8391735687630837, "grad_norm": 22.875, "learning_rate": 4.632239406880212e-06, "loss": 1.4651618003845215, "step": 10104 }, { "epoch": 1.8395376353872759, "grad_norm": 22.0, "learning_rate": 4.630799574145883e-06, "loss": 1.6552772521972656, "step": 10106 }, { "epoch": 1.839901702011468, "grad_norm": 4.875, "learning_rate": 4.629359942336164e-06, "loss": 1.0609335899353027, "step": 10108 }, { "epoch": 1.8402657686356603, "grad_norm": 1.9375, "learning_rate": 4.627920511662323e-06, "loss": 1.0964921712875366, "step": 10110 }, { "epoch": 1.8406298352598527, "grad_norm": 26.75, "learning_rate": 4.626481282335582e-06, "loss": 1.1380648612976074, "step": 10112 }, { "epoch": 1.8409939018840449, "grad_norm": 17.625, "learning_rate": 4.6250422545671495e-06, "loss": 1.6771175861358643, "step": 10114 }, { "epoch": 1.841357968508237, "grad_norm": 8.6875, "learning_rate": 4.623603428568193e-06, "loss": 1.4338878393173218, "step": 10116 }, { "epoch": 1.8417220351324293, "grad_norm": 6.71875, "learning_rate": 4.622164804549855e-06, "loss": 1.366791844367981, "step": 10118 }, { "epoch": 1.8420861017566215, "grad_norm": 18.75, "learning_rate": 4.620726382723248e-06, "loss": 1.72682523727417, "step": 10120 }, { "epoch": 1.8424501683808137, "grad_norm": 13.5625, "learning_rate": 4.619288163299455e-06, "loss": 1.548140287399292, "step": 10122 }, { "epoch": 1.8428142350050059, "grad_norm": 39.75, "learning_rate": 4.617850146489529e-06, "loss": 1.6242822408676147, "step": 10124 }, { "epoch": 1.843178301629198, "grad_norm": 15.0625, "learning_rate": 4.616412332504493e-06, "loss": 1.9507834911346436, "step": 10126 }, { "epoch": 1.8435423682533902, "grad_norm": 68.0, "learning_rate": 4.6149747215553385e-06, "loss": 1.2795758247375488, "step": 10128 }, { "epoch": 1.8439064348775824, "grad_norm": 77.5, "learning_rate": 4.613537313853032e-06, "loss": 0.890384316444397, "step": 10130 }, { "epoch": 1.8442705015017749, "grad_norm": 8.125, "learning_rate": 4.612100109608503e-06, "loss": 1.4209778308868408, "step": 10132 }, { "epoch": 1.844634568125967, "grad_norm": 9.5, "learning_rate": 4.61066310903266e-06, "loss": 1.2322556972503662, "step": 10134 }, { "epoch": 1.8449986347501592, "grad_norm": 14.5625, "learning_rate": 4.6092263123363775e-06, "loss": 1.2881290912628174, "step": 10136 }, { "epoch": 1.8453627013743517, "grad_norm": 6.0625, "learning_rate": 4.607789719730494e-06, "loss": 1.3625489473342896, "step": 10138 }, { "epoch": 1.8457267679985438, "grad_norm": 12.3125, "learning_rate": 4.60635333142583e-06, "loss": 1.4665266275405884, "step": 10140 }, { "epoch": 1.846090834622736, "grad_norm": 9.625, "learning_rate": 4.604917147633163e-06, "loss": 1.5087851285934448, "step": 10142 }, { "epoch": 1.8464549012469282, "grad_norm": 205.0, "learning_rate": 4.603481168563253e-06, "loss": 0.543316125869751, "step": 10144 }, { "epoch": 1.8468189678711204, "grad_norm": 15.25, "learning_rate": 4.602045394426823e-06, "loss": 1.400770902633667, "step": 10146 }, { "epoch": 1.8471830344953126, "grad_norm": 11.4375, "learning_rate": 4.600609825434564e-06, "loss": 1.1860394477844238, "step": 10148 }, { "epoch": 1.8475471011195048, "grad_norm": 7.03125, "learning_rate": 4.599174461797143e-06, "loss": 1.3179806470870972, "step": 10150 }, { "epoch": 1.847911167743697, "grad_norm": 6.96875, "learning_rate": 4.597739303725192e-06, "loss": 1.2298710346221924, "step": 10152 }, { "epoch": 1.8482752343678892, "grad_norm": 16.375, "learning_rate": 4.596304351429315e-06, "loss": 1.504056453704834, "step": 10154 }, { "epoch": 1.8486393009920814, "grad_norm": 12.875, "learning_rate": 4.594869605120088e-06, "loss": 1.791691541671753, "step": 10156 }, { "epoch": 1.8490033676162738, "grad_norm": 3.9375, "learning_rate": 4.59343506500805e-06, "loss": 0.8267419338226318, "step": 10158 }, { "epoch": 1.849367434240466, "grad_norm": 6.03125, "learning_rate": 4.592000731303716e-06, "loss": 0.9686307311058044, "step": 10160 }, { "epoch": 1.8497315008646582, "grad_norm": 23.0, "learning_rate": 4.590566604217568e-06, "loss": 1.5720093250274658, "step": 10162 }, { "epoch": 1.8500955674888506, "grad_norm": 22.125, "learning_rate": 4.58913268396006e-06, "loss": 1.6447392702102661, "step": 10164 }, { "epoch": 1.8504596341130428, "grad_norm": 14.5, "learning_rate": 4.587698970741613e-06, "loss": 1.420624852180481, "step": 10166 }, { "epoch": 1.850823700737235, "grad_norm": 11.125, "learning_rate": 4.586265464772617e-06, "loss": 1.4557628631591797, "step": 10168 }, { "epoch": 1.8511877673614272, "grad_norm": 21.625, "learning_rate": 4.584832166263437e-06, "loss": 1.7179882526397705, "step": 10170 }, { "epoch": 1.8515518339856194, "grad_norm": 34.0, "learning_rate": 4.583399075424399e-06, "loss": 1.2712702751159668, "step": 10172 }, { "epoch": 1.8519159006098116, "grad_norm": 13.1875, "learning_rate": 4.581966192465807e-06, "loss": 1.2924363613128662, "step": 10174 }, { "epoch": 1.8522799672340038, "grad_norm": 16.25, "learning_rate": 4.580533517597931e-06, "loss": 1.3899078369140625, "step": 10176 }, { "epoch": 1.852644033858196, "grad_norm": 9.75, "learning_rate": 4.579101051031005e-06, "loss": 1.3126336336135864, "step": 10178 }, { "epoch": 1.8530081004823882, "grad_norm": 12.625, "learning_rate": 4.577668792975245e-06, "loss": 1.108689546585083, "step": 10180 }, { "epoch": 1.8533721671065804, "grad_norm": 27.25, "learning_rate": 4.576236743640823e-06, "loss": 1.0283482074737549, "step": 10182 }, { "epoch": 1.8537362337307728, "grad_norm": 15.1875, "learning_rate": 4.5748049032378895e-06, "loss": 1.379903793334961, "step": 10184 }, { "epoch": 1.854100300354965, "grad_norm": 8.375, "learning_rate": 4.5733732719765615e-06, "loss": 1.6895334720611572, "step": 10186 }, { "epoch": 1.8544643669791572, "grad_norm": 19.875, "learning_rate": 4.5719418500669234e-06, "loss": 1.9689158201217651, "step": 10188 }, { "epoch": 1.8548284336033494, "grad_norm": 12.25, "learning_rate": 4.570510637719032e-06, "loss": 1.6806762218475342, "step": 10190 }, { "epoch": 1.8551925002275418, "grad_norm": 8.4375, "learning_rate": 4.5690796351429105e-06, "loss": 1.3381773233413696, "step": 10192 }, { "epoch": 1.855556566851734, "grad_norm": 12.875, "learning_rate": 4.567648842548553e-06, "loss": 1.3012750148773193, "step": 10194 }, { "epoch": 1.8559206334759262, "grad_norm": 11.5625, "learning_rate": 4.5662182601459245e-06, "loss": 1.4648109674453735, "step": 10196 }, { "epoch": 1.8562847001001184, "grad_norm": 13.3125, "learning_rate": 4.5647878881449545e-06, "loss": 1.315039038658142, "step": 10198 }, { "epoch": 1.8566487667243106, "grad_norm": 10.3125, "learning_rate": 4.563357726755547e-06, "loss": 1.0652984380722046, "step": 10200 }, { "epoch": 1.8570128333485028, "grad_norm": 8.0625, "learning_rate": 4.561927776187569e-06, "loss": 1.421647310256958, "step": 10202 }, { "epoch": 1.857376899972695, "grad_norm": 18.625, "learning_rate": 4.560498036650863e-06, "loss": 1.300749659538269, "step": 10204 }, { "epoch": 1.8577409665968871, "grad_norm": 50.25, "learning_rate": 4.559068508355237e-06, "loss": 0.8592878580093384, "step": 10206 }, { "epoch": 1.8581050332210793, "grad_norm": 6.53125, "learning_rate": 4.557639191510466e-06, "loss": 0.3649294674396515, "step": 10208 }, { "epoch": 1.8584690998452715, "grad_norm": 11.5625, "learning_rate": 4.5562100863263e-06, "loss": 1.651382327079773, "step": 10210 }, { "epoch": 1.858833166469464, "grad_norm": 6.6875, "learning_rate": 4.554781193012451e-06, "loss": 1.2910923957824707, "step": 10212 }, { "epoch": 1.8591972330936561, "grad_norm": 15.75, "learning_rate": 4.553352511778606e-06, "loss": 1.9338804483413696, "step": 10214 }, { "epoch": 1.8595612997178483, "grad_norm": 15.25, "learning_rate": 4.551924042834418e-06, "loss": 1.8648362159729004, "step": 10216 }, { "epoch": 1.8599253663420408, "grad_norm": 14.1875, "learning_rate": 4.550495786389507e-06, "loss": 1.2972800731658936, "step": 10218 }, { "epoch": 1.860289432966233, "grad_norm": 55.5, "learning_rate": 4.549067742653466e-06, "loss": 1.4796545505523682, "step": 10220 }, { "epoch": 1.8606534995904251, "grad_norm": 24.375, "learning_rate": 4.547639911835852e-06, "loss": 1.3768882751464844, "step": 10222 }, { "epoch": 1.8610175662146173, "grad_norm": 7.28125, "learning_rate": 4.546212294146196e-06, "loss": 1.211484670639038, "step": 10224 }, { "epoch": 1.8613816328388095, "grad_norm": 10.75, "learning_rate": 4.544784889793994e-06, "loss": 1.3258265256881714, "step": 10226 }, { "epoch": 1.8617456994630017, "grad_norm": 12.5, "learning_rate": 4.543357698988712e-06, "loss": 1.4483202695846558, "step": 10228 }, { "epoch": 1.862109766087194, "grad_norm": 15.6875, "learning_rate": 4.541930721939785e-06, "loss": 1.4156239032745361, "step": 10230 }, { "epoch": 1.8624738327113861, "grad_norm": 24.875, "learning_rate": 4.540503958856615e-06, "loss": 1.218030333518982, "step": 10232 }, { "epoch": 1.8628378993355783, "grad_norm": 10.8125, "learning_rate": 4.5390774099485735e-06, "loss": 1.024248480796814, "step": 10234 }, { "epoch": 1.8632019659597705, "grad_norm": 12.0625, "learning_rate": 4.537651075425003e-06, "loss": 0.5905171632766724, "step": 10236 }, { "epoch": 1.863566032583963, "grad_norm": 9.6875, "learning_rate": 4.536224955495209e-06, "loss": 1.526942253112793, "step": 10238 }, { "epoch": 1.8639300992081551, "grad_norm": 9.5625, "learning_rate": 4.534799050368473e-06, "loss": 1.4773681163787842, "step": 10240 }, { "epoch": 1.8642941658323473, "grad_norm": 12.1875, "learning_rate": 4.533373360254036e-06, "loss": 1.3674228191375732, "step": 10242 }, { "epoch": 1.8646582324565395, "grad_norm": 52.75, "learning_rate": 4.531947885361115e-06, "loss": 1.23373544216156, "step": 10244 }, { "epoch": 1.865022299080732, "grad_norm": 21.375, "learning_rate": 4.5305226258988945e-06, "loss": 1.6065036058425903, "step": 10246 }, { "epoch": 1.865386365704924, "grad_norm": 15.6875, "learning_rate": 4.529097582076521e-06, "loss": 1.4302940368652344, "step": 10248 }, { "epoch": 1.8657504323291163, "grad_norm": 9.125, "learning_rate": 4.527672754103118e-06, "loss": 1.3910472393035889, "step": 10250 }, { "epoch": 1.8661144989533085, "grad_norm": 12.5, "learning_rate": 4.52624814218777e-06, "loss": 1.1082059144973755, "step": 10252 }, { "epoch": 1.8664785655775007, "grad_norm": 5.34375, "learning_rate": 4.524823746539535e-06, "loss": 1.2091387510299683, "step": 10254 }, { "epoch": 1.8668426322016929, "grad_norm": 8.8125, "learning_rate": 4.523399567367437e-06, "loss": 1.4354215860366821, "step": 10256 }, { "epoch": 1.867206698825885, "grad_norm": 12.625, "learning_rate": 4.521975604880469e-06, "loss": 1.4698941707611084, "step": 10258 }, { "epoch": 1.8675707654500773, "grad_norm": 6.03125, "learning_rate": 4.520551859287591e-06, "loss": 1.2633163928985596, "step": 10260 }, { "epoch": 1.8679348320742695, "grad_norm": 11.9375, "learning_rate": 4.519128330797731e-06, "loss": 1.2887117862701416, "step": 10262 }, { "epoch": 1.8682988986984617, "grad_norm": 16.125, "learning_rate": 4.517705019619787e-06, "loss": 1.693401575088501, "step": 10264 }, { "epoch": 1.868662965322654, "grad_norm": 7.125, "learning_rate": 4.516281925962626e-06, "loss": 1.133830189704895, "step": 10266 }, { "epoch": 1.8690270319468463, "grad_norm": 9.875, "learning_rate": 4.5148590500350766e-06, "loss": 1.4101113080978394, "step": 10268 }, { "epoch": 1.8693910985710385, "grad_norm": 7.125, "learning_rate": 4.513436392045945e-06, "loss": 1.1699773073196411, "step": 10270 }, { "epoch": 1.8697551651952309, "grad_norm": 24.25, "learning_rate": 4.512013952203997e-06, "loss": 1.2959704399108887, "step": 10272 }, { "epoch": 1.870119231819423, "grad_norm": 9.875, "learning_rate": 4.510591730717972e-06, "loss": 1.1705671548843384, "step": 10274 }, { "epoch": 1.8704832984436153, "grad_norm": 13.375, "learning_rate": 4.509169727796574e-06, "loss": 1.3706467151641846, "step": 10276 }, { "epoch": 1.8708473650678075, "grad_norm": 18.875, "learning_rate": 4.507747943648477e-06, "loss": 1.4365633726119995, "step": 10278 }, { "epoch": 1.8712114316919997, "grad_norm": 23.625, "learning_rate": 4.506326378482322e-06, "loss": 1.3904542922973633, "step": 10280 }, { "epoch": 1.8715754983161919, "grad_norm": 9.875, "learning_rate": 4.504905032506717e-06, "loss": 1.3144638538360596, "step": 10282 }, { "epoch": 1.871939564940384, "grad_norm": 9.3125, "learning_rate": 4.503483905930239e-06, "loss": 1.3469189405441284, "step": 10284 }, { "epoch": 1.8723036315645762, "grad_norm": 19.125, "learning_rate": 4.502062998961434e-06, "loss": 1.4580113887786865, "step": 10286 }, { "epoch": 1.8726676981887684, "grad_norm": 16.0, "learning_rate": 4.5006423118088136e-06, "loss": 1.3660454750061035, "step": 10288 }, { "epoch": 1.8730317648129606, "grad_norm": 34.5, "learning_rate": 4.499221844680857e-06, "loss": 2.1086161136627197, "step": 10290 }, { "epoch": 1.873395831437153, "grad_norm": 18.75, "learning_rate": 4.497801597786011e-06, "loss": 1.1748324632644653, "step": 10292 }, { "epoch": 1.8737598980613452, "grad_norm": 19.0, "learning_rate": 4.496381571332695e-06, "loss": 1.843911051750183, "step": 10294 }, { "epoch": 1.8741239646855374, "grad_norm": 11.5625, "learning_rate": 4.494961765529289e-06, "loss": 1.594116449356079, "step": 10296 }, { "epoch": 1.8744880313097296, "grad_norm": 5.15625, "learning_rate": 4.493542180584145e-06, "loss": 1.3023351430892944, "step": 10298 }, { "epoch": 1.874852097933922, "grad_norm": 7.09375, "learning_rate": 4.4921228167055805e-06, "loss": 1.3033276796340942, "step": 10300 }, { "epoch": 1.8752161645581142, "grad_norm": 18.125, "learning_rate": 4.490703674101881e-06, "loss": 1.3037970066070557, "step": 10302 }, { "epoch": 1.8755802311823064, "grad_norm": 12.375, "learning_rate": 4.4892847529813005e-06, "loss": 0.8734592199325562, "step": 10304 }, { "epoch": 1.8759442978064986, "grad_norm": 8.125, "learning_rate": 4.487866053552062e-06, "loss": 1.3332806825637817, "step": 10306 }, { "epoch": 1.8763083644306908, "grad_norm": 35.5, "learning_rate": 4.4864475760223495e-06, "loss": 1.343780279159546, "step": 10308 }, { "epoch": 1.876672431054883, "grad_norm": 18.75, "learning_rate": 4.4850293206003235e-06, "loss": 1.638200044631958, "step": 10310 }, { "epoch": 1.8770364976790752, "grad_norm": 16.125, "learning_rate": 4.483611287494104e-06, "loss": 1.4613993167877197, "step": 10312 }, { "epoch": 1.8774005643032674, "grad_norm": 22.875, "learning_rate": 4.482193476911782e-06, "loss": 1.8559073209762573, "step": 10314 }, { "epoch": 1.8777646309274596, "grad_norm": 41.75, "learning_rate": 4.480775889061418e-06, "loss": 1.727916955947876, "step": 10316 }, { "epoch": 1.8781286975516518, "grad_norm": 10.0625, "learning_rate": 4.479358524151034e-06, "loss": 1.1891539096832275, "step": 10318 }, { "epoch": 1.8784927641758442, "grad_norm": 270.0, "learning_rate": 4.477941382388625e-06, "loss": 1.0184335708618164, "step": 10320 }, { "epoch": 1.8788568308000364, "grad_norm": 10.1875, "learning_rate": 4.476524463982149e-06, "loss": 1.256813645362854, "step": 10322 }, { "epoch": 1.8792208974242286, "grad_norm": 16.5, "learning_rate": 4.475107769139534e-06, "loss": 1.7416784763336182, "step": 10324 }, { "epoch": 1.879584964048421, "grad_norm": 9.875, "learning_rate": 4.4736912980686745e-06, "loss": 0.8409368991851807, "step": 10326 }, { "epoch": 1.8799490306726132, "grad_norm": 20.625, "learning_rate": 4.47227505097743e-06, "loss": 1.396700143814087, "step": 10328 }, { "epoch": 1.8803130972968054, "grad_norm": 4.84375, "learning_rate": 4.470859028073632e-06, "loss": 0.9189120531082153, "step": 10330 }, { "epoch": 1.8806771639209976, "grad_norm": 7.96875, "learning_rate": 4.469443229565073e-06, "loss": 1.0982768535614014, "step": 10332 }, { "epoch": 1.8810412305451898, "grad_norm": 6.53125, "learning_rate": 4.468027655659518e-06, "loss": 1.2586933374404907, "step": 10334 }, { "epoch": 1.881405297169382, "grad_norm": 20.25, "learning_rate": 4.4666123065646975e-06, "loss": 1.3678619861602783, "step": 10336 }, { "epoch": 1.8817693637935742, "grad_norm": 22.5, "learning_rate": 4.465197182488304e-06, "loss": 0.8838649988174438, "step": 10338 }, { "epoch": 1.8821334304177664, "grad_norm": 45.75, "learning_rate": 4.463782283638006e-06, "loss": 1.314103364944458, "step": 10340 }, { "epoch": 1.8824974970419586, "grad_norm": 11.25, "learning_rate": 4.462367610221431e-06, "loss": 0.8045669794082642, "step": 10342 }, { "epoch": 1.8828615636661508, "grad_norm": 15.6875, "learning_rate": 4.460953162446178e-06, "loss": 1.2336615324020386, "step": 10344 }, { "epoch": 1.8832256302903432, "grad_norm": 35.0, "learning_rate": 4.459538940519813e-06, "loss": 1.5921138525009155, "step": 10346 }, { "epoch": 1.8835896969145354, "grad_norm": 18.875, "learning_rate": 4.458124944649863e-06, "loss": 0.7174089550971985, "step": 10348 }, { "epoch": 1.8839537635387276, "grad_norm": 16.0, "learning_rate": 4.45671117504383e-06, "loss": 1.6809015274047852, "step": 10350 }, { "epoch": 1.8843178301629198, "grad_norm": 9.8125, "learning_rate": 4.455297631909177e-06, "loss": 1.4973671436309814, "step": 10352 }, { "epoch": 1.8846818967871122, "grad_norm": 24.875, "learning_rate": 4.453884315453336e-06, "loss": 1.8407665491104126, "step": 10354 }, { "epoch": 1.8850459634113044, "grad_norm": 10.125, "learning_rate": 4.452471225883708e-06, "loss": 1.7408779859542847, "step": 10356 }, { "epoch": 1.8854100300354966, "grad_norm": 5.1875, "learning_rate": 4.4510583634076535e-06, "loss": 1.2544187307357788, "step": 10358 }, { "epoch": 1.8857740966596888, "grad_norm": 8.75, "learning_rate": 4.4496457282325084e-06, "loss": 0.773290753364563, "step": 10360 }, { "epoch": 1.886138163283881, "grad_norm": 11.375, "learning_rate": 4.448233320565569e-06, "loss": 0.9481028914451599, "step": 10362 }, { "epoch": 1.8865022299080731, "grad_norm": 11.5625, "learning_rate": 4.4468211406141e-06, "loss": 0.9567828178405762, "step": 10364 }, { "epoch": 1.8868662965322653, "grad_norm": 10.1875, "learning_rate": 4.445409188585337e-06, "loss": 1.2877835035324097, "step": 10366 }, { "epoch": 1.8872303631564575, "grad_norm": 13.3125, "learning_rate": 4.443997464686472e-06, "loss": 1.4767482280731201, "step": 10368 }, { "epoch": 1.8875944297806497, "grad_norm": 20.625, "learning_rate": 4.442585969124676e-06, "loss": 1.4532097578048706, "step": 10370 }, { "epoch": 1.887958496404842, "grad_norm": 21.75, "learning_rate": 4.441174702107076e-06, "loss": 1.56870436668396, "step": 10372 }, { "epoch": 1.8883225630290343, "grad_norm": 4.5, "learning_rate": 4.439763663840771e-06, "loss": 1.191565990447998, "step": 10374 }, { "epoch": 1.8886866296532265, "grad_norm": 6.5625, "learning_rate": 4.438352854532826e-06, "loss": 1.1797826290130615, "step": 10376 }, { "epoch": 1.8890506962774187, "grad_norm": 14.1875, "learning_rate": 4.43694227439027e-06, "loss": 1.0686910152435303, "step": 10378 }, { "epoch": 1.8894147629016111, "grad_norm": 4.59375, "learning_rate": 4.435531923620102e-06, "loss": 1.491809368133545, "step": 10380 }, { "epoch": 1.8897788295258033, "grad_norm": 8.8125, "learning_rate": 4.434121802429282e-06, "loss": 1.065504789352417, "step": 10382 }, { "epoch": 1.8901428961499955, "grad_norm": 15.9375, "learning_rate": 4.432711911024743e-06, "loss": 1.457078456878662, "step": 10384 }, { "epoch": 1.8905069627741877, "grad_norm": 25.0, "learning_rate": 4.431302249613379e-06, "loss": 1.5185307264328003, "step": 10386 }, { "epoch": 1.89087102939838, "grad_norm": 6.8125, "learning_rate": 4.429892818402052e-06, "loss": 1.2498611211776733, "step": 10388 }, { "epoch": 1.8912350960225721, "grad_norm": 12.0625, "learning_rate": 4.428483617597592e-06, "loss": 0.7743430137634277, "step": 10390 }, { "epoch": 1.8915991626467643, "grad_norm": 24.375, "learning_rate": 4.427074647406791e-06, "loss": 0.6134580373764038, "step": 10392 }, { "epoch": 1.8919632292709565, "grad_norm": 3.625, "learning_rate": 4.425665908036412e-06, "loss": 0.9048395156860352, "step": 10394 }, { "epoch": 1.8923272958951487, "grad_norm": 8.25, "learning_rate": 4.424257399693181e-06, "loss": 1.2680261135101318, "step": 10396 }, { "epoch": 1.892691362519341, "grad_norm": 24.875, "learning_rate": 4.422849122583789e-06, "loss": 1.261030673980713, "step": 10398 }, { "epoch": 1.8930554291435333, "grad_norm": 21.375, "learning_rate": 4.421441076914898e-06, "loss": 1.0411328077316284, "step": 10400 }, { "epoch": 1.8934194957677255, "grad_norm": 20.25, "learning_rate": 4.420033262893131e-06, "loss": 1.1487151384353638, "step": 10402 }, { "epoch": 1.8937835623919177, "grad_norm": 22.125, "learning_rate": 4.41862568072508e-06, "loss": 1.7431784868240356, "step": 10404 }, { "epoch": 1.8941476290161101, "grad_norm": 25.875, "learning_rate": 4.417218330617302e-06, "loss": 1.1477004289627075, "step": 10406 }, { "epoch": 1.8945116956403023, "grad_norm": 8.75, "learning_rate": 4.415811212776319e-06, "loss": 1.4733740091323853, "step": 10408 }, { "epoch": 1.8948757622644945, "grad_norm": 5.9375, "learning_rate": 4.41440432740862e-06, "loss": 1.3657646179199219, "step": 10410 }, { "epoch": 1.8952398288886867, "grad_norm": 69.5, "learning_rate": 4.4129976747206605e-06, "loss": 1.1342673301696777, "step": 10412 }, { "epoch": 1.895603895512879, "grad_norm": 29.5, "learning_rate": 4.4115912549188606e-06, "loss": 1.9397865533828735, "step": 10414 }, { "epoch": 1.895967962137071, "grad_norm": 18.0, "learning_rate": 4.410185068209608e-06, "loss": 1.4048717021942139, "step": 10416 }, { "epoch": 1.8963320287612633, "grad_norm": 17.0, "learning_rate": 4.408779114799251e-06, "loss": 1.068558692932129, "step": 10418 }, { "epoch": 1.8966960953854555, "grad_norm": 10.0, "learning_rate": 4.407373394894113e-06, "loss": 1.4947537183761597, "step": 10420 }, { "epoch": 1.8970601620096477, "grad_norm": 7.28125, "learning_rate": 4.405967908700472e-06, "loss": 1.3134336471557617, "step": 10422 }, { "epoch": 1.8974242286338399, "grad_norm": 42.0, "learning_rate": 4.404562656424583e-06, "loss": 1.6720588207244873, "step": 10424 }, { "epoch": 1.897788295258032, "grad_norm": 11.5625, "learning_rate": 4.4031576382726585e-06, "loss": 1.2858198881149292, "step": 10426 }, { "epoch": 1.8981523618822245, "grad_norm": 19.5, "learning_rate": 4.401752854450877e-06, "loss": 1.5044560432434082, "step": 10428 }, { "epoch": 1.8985164285064167, "grad_norm": 6.09375, "learning_rate": 4.4003483051653885e-06, "loss": 1.1447221040725708, "step": 10430 }, { "epoch": 1.8988804951306089, "grad_norm": 6.53125, "learning_rate": 4.398943990622303e-06, "loss": 1.2118511199951172, "step": 10432 }, { "epoch": 1.8992445617548013, "grad_norm": 17.625, "learning_rate": 4.397539911027698e-06, "loss": 1.5545129776000977, "step": 10434 }, { "epoch": 1.8996086283789935, "grad_norm": 14.0, "learning_rate": 4.3961360665876176e-06, "loss": 1.7559641599655151, "step": 10436 }, { "epoch": 1.8999726950031857, "grad_norm": 5.5625, "learning_rate": 4.394732457508069e-06, "loss": 1.1165286302566528, "step": 10438 }, { "epoch": 1.9003367616273779, "grad_norm": 8.3125, "learning_rate": 4.393329083995028e-06, "loss": 1.2374608516693115, "step": 10440 }, { "epoch": 1.90070082825157, "grad_norm": 9.0, "learning_rate": 4.39192594625443e-06, "loss": 1.300781488418579, "step": 10442 }, { "epoch": 1.9010648948757622, "grad_norm": 8.8125, "learning_rate": 4.3905230444921845e-06, "loss": 1.2841315269470215, "step": 10444 }, { "epoch": 1.9014289614999544, "grad_norm": 30.5, "learning_rate": 4.38912037891416e-06, "loss": 1.8599058389663696, "step": 10446 }, { "epoch": 1.9017930281241466, "grad_norm": 10.0, "learning_rate": 4.3877179497261894e-06, "loss": 0.9666900634765625, "step": 10448 }, { "epoch": 1.9021570947483388, "grad_norm": 20.125, "learning_rate": 4.386315757134078e-06, "loss": 1.015418529510498, "step": 10450 }, { "epoch": 1.902521161372531, "grad_norm": 8.9375, "learning_rate": 4.384913801343588e-06, "loss": 1.3500049114227295, "step": 10452 }, { "epoch": 1.9028852279967234, "grad_norm": 33.0, "learning_rate": 4.383512082560452e-06, "loss": 1.4793367385864258, "step": 10454 }, { "epoch": 1.9032492946209156, "grad_norm": 5.9375, "learning_rate": 4.382110600990368e-06, "loss": 1.116987705230713, "step": 10456 }, { "epoch": 1.9036133612451078, "grad_norm": 20.875, "learning_rate": 4.380709356838996e-06, "loss": 1.3126630783081055, "step": 10458 }, { "epoch": 1.9039774278693002, "grad_norm": 15.0625, "learning_rate": 4.3793083503119645e-06, "loss": 1.2656311988830566, "step": 10460 }, { "epoch": 1.9043414944934924, "grad_norm": 15.4375, "learning_rate": 4.377907581614862e-06, "loss": 0.9602458477020264, "step": 10462 }, { "epoch": 1.9047055611176846, "grad_norm": 13.625, "learning_rate": 4.37650705095325e-06, "loss": 1.130002498626709, "step": 10464 }, { "epoch": 1.9050696277418768, "grad_norm": 16.75, "learning_rate": 4.37510675853265e-06, "loss": 1.4291210174560547, "step": 10466 }, { "epoch": 1.905433694366069, "grad_norm": 50.5, "learning_rate": 4.373706704558546e-06, "loss": 1.5221493244171143, "step": 10468 }, { "epoch": 1.9057977609902612, "grad_norm": 9.8125, "learning_rate": 4.372306889236394e-06, "loss": 1.388022541999817, "step": 10470 }, { "epoch": 1.9061618276144534, "grad_norm": 9.3125, "learning_rate": 4.370907312771607e-06, "loss": 1.3597133159637451, "step": 10472 }, { "epoch": 1.9065258942386456, "grad_norm": 13.0625, "learning_rate": 4.3695079753695716e-06, "loss": 1.316415548324585, "step": 10474 }, { "epoch": 1.9068899608628378, "grad_norm": 15.8125, "learning_rate": 4.368108877235633e-06, "loss": 1.2514973878860474, "step": 10476 }, { "epoch": 1.90725402748703, "grad_norm": 63.5, "learning_rate": 4.366710018575102e-06, "loss": 1.9783823490142822, "step": 10478 }, { "epoch": 1.9076180941112224, "grad_norm": 8.375, "learning_rate": 4.365311399593258e-06, "loss": 1.1456583738327026, "step": 10480 }, { "epoch": 1.9079821607354146, "grad_norm": 24.375, "learning_rate": 4.363913020495341e-06, "loss": 1.7451246976852417, "step": 10482 }, { "epoch": 1.9083462273596068, "grad_norm": 19.625, "learning_rate": 4.362514881486557e-06, "loss": 1.3901368379592896, "step": 10484 }, { "epoch": 1.908710293983799, "grad_norm": 59.25, "learning_rate": 4.3611169827720795e-06, "loss": 1.9135520458221436, "step": 10486 }, { "epoch": 1.9090743606079914, "grad_norm": 37.25, "learning_rate": 4.35971932455704e-06, "loss": 1.4027525186538696, "step": 10488 }, { "epoch": 1.9094384272321836, "grad_norm": 9.875, "learning_rate": 4.3583219070465455e-06, "loss": 1.3585864305496216, "step": 10490 }, { "epoch": 1.9098024938563758, "grad_norm": 14.6875, "learning_rate": 4.356924730445656e-06, "loss": 0.8971824645996094, "step": 10492 }, { "epoch": 1.910166560480568, "grad_norm": 8.5625, "learning_rate": 4.3555277949594044e-06, "loss": 0.9596958756446838, "step": 10494 }, { "epoch": 1.9105306271047602, "grad_norm": 12.6875, "learning_rate": 4.354131100792785e-06, "loss": 0.4878948926925659, "step": 10496 }, { "epoch": 1.9108946937289524, "grad_norm": 9.1875, "learning_rate": 4.352734648150754e-06, "loss": 1.3880257606506348, "step": 10498 }, { "epoch": 1.9112587603531446, "grad_norm": 34.25, "learning_rate": 4.35133843723824e-06, "loss": 1.8474534749984741, "step": 10500 }, { "epoch": 1.9116228269773368, "grad_norm": 9.0, "learning_rate": 4.349942468260128e-06, "loss": 1.1689791679382324, "step": 10502 }, { "epoch": 1.911986893601529, "grad_norm": 9.25, "learning_rate": 4.348546741421271e-06, "loss": 1.4555976390838623, "step": 10504 }, { "epoch": 1.9123509602257212, "grad_norm": 6.59375, "learning_rate": 4.347151256926489e-06, "loss": 1.0344799757003784, "step": 10506 }, { "epoch": 1.9127150268499136, "grad_norm": 9.5, "learning_rate": 4.345756014980559e-06, "loss": 1.4202744960784912, "step": 10508 }, { "epoch": 1.9130790934741058, "grad_norm": 10.6875, "learning_rate": 4.344361015788232e-06, "loss": 1.1562318801879883, "step": 10510 }, { "epoch": 1.913443160098298, "grad_norm": 6.6875, "learning_rate": 4.342966259554215e-06, "loss": 1.3088953495025635, "step": 10512 }, { "epoch": 1.9138072267224904, "grad_norm": 14.0625, "learning_rate": 4.341571746483184e-06, "loss": 1.3426487445831299, "step": 10514 }, { "epoch": 1.9141712933466826, "grad_norm": 10.8125, "learning_rate": 4.34017747677978e-06, "loss": 1.414621114730835, "step": 10516 }, { "epoch": 1.9145353599708748, "grad_norm": 14.1875, "learning_rate": 4.338783450648602e-06, "loss": 1.2499535083770752, "step": 10518 }, { "epoch": 1.914899426595067, "grad_norm": 5.8125, "learning_rate": 4.3373896682942215e-06, "loss": 1.1770846843719482, "step": 10520 }, { "epoch": 1.9152634932192591, "grad_norm": 24.25, "learning_rate": 4.335996129921168e-06, "loss": 1.4655331373214722, "step": 10522 }, { "epoch": 1.9156275598434513, "grad_norm": 25.125, "learning_rate": 4.33460283573394e-06, "loss": 1.5368444919586182, "step": 10524 }, { "epoch": 1.9159916264676435, "grad_norm": 24.25, "learning_rate": 4.3332097859369985e-06, "loss": 1.507416009902954, "step": 10526 }, { "epoch": 1.9163556930918357, "grad_norm": 16.125, "learning_rate": 4.331816980734762e-06, "loss": 1.5046542882919312, "step": 10528 }, { "epoch": 1.916719759716028, "grad_norm": 10.5625, "learning_rate": 4.330424420331626e-06, "loss": 1.3257375955581665, "step": 10530 }, { "epoch": 1.9170838263402201, "grad_norm": 15.75, "learning_rate": 4.32903210493194e-06, "loss": 1.4912198781967163, "step": 10532 }, { "epoch": 1.9174478929644125, "grad_norm": 10.75, "learning_rate": 4.327640034740018e-06, "loss": 1.3608543872833252, "step": 10534 }, { "epoch": 1.9178119595886047, "grad_norm": 7.46875, "learning_rate": 4.326248209960147e-06, "loss": 1.2356034517288208, "step": 10536 }, { "epoch": 1.918176026212797, "grad_norm": 6.25, "learning_rate": 4.3248566307965645e-06, "loss": 0.938212513923645, "step": 10538 }, { "epoch": 1.9185400928369891, "grad_norm": 14.875, "learning_rate": 4.323465297453486e-06, "loss": 1.8588355779647827, "step": 10540 }, { "epoch": 1.9189041594611815, "grad_norm": 15.0, "learning_rate": 4.32207421013508e-06, "loss": 1.1650190353393555, "step": 10542 }, { "epoch": 1.9192682260853737, "grad_norm": 3.25, "learning_rate": 4.320683369045481e-06, "loss": 0.8501220941543579, "step": 10544 }, { "epoch": 1.919632292709566, "grad_norm": 9.125, "learning_rate": 4.3192927743887955e-06, "loss": 1.5645121335983276, "step": 10546 }, { "epoch": 1.9199963593337581, "grad_norm": 7.8125, "learning_rate": 4.317902426369081e-06, "loss": 0.9423190355300903, "step": 10548 }, { "epoch": 1.9203604259579503, "grad_norm": 10.5625, "learning_rate": 4.316512325190371e-06, "loss": 1.2525100708007812, "step": 10550 }, { "epoch": 1.9207244925821425, "grad_norm": 8.5, "learning_rate": 4.315122471056653e-06, "loss": 1.223229169845581, "step": 10552 }, { "epoch": 1.9210885592063347, "grad_norm": 8.4375, "learning_rate": 4.313732864171884e-06, "loss": 1.190844178199768, "step": 10554 }, { "epoch": 1.921452625830527, "grad_norm": 4.1875, "learning_rate": 4.312343504739985e-06, "loss": 1.204777717590332, "step": 10556 }, { "epoch": 1.921816692454719, "grad_norm": 6.375, "learning_rate": 4.310954392964835e-06, "loss": 1.1608490943908691, "step": 10558 }, { "epoch": 1.9221807590789113, "grad_norm": 13.0625, "learning_rate": 4.309565529050286e-06, "loss": 1.3439178466796875, "step": 10560 }, { "epoch": 1.9225448257031037, "grad_norm": 22.75, "learning_rate": 4.308176913200142e-06, "loss": 1.4133667945861816, "step": 10562 }, { "epoch": 1.922908892327296, "grad_norm": 11.0625, "learning_rate": 4.306788545618179e-06, "loss": 1.536535620689392, "step": 10564 }, { "epoch": 1.923272958951488, "grad_norm": 10.0, "learning_rate": 4.305400426508138e-06, "loss": 0.4578596353530884, "step": 10566 }, { "epoch": 1.9236370255756805, "grad_norm": 16.25, "learning_rate": 4.304012556073714e-06, "loss": 1.3371623754501343, "step": 10568 }, { "epoch": 1.9240010921998727, "grad_norm": 14.3125, "learning_rate": 4.302624934518577e-06, "loss": 1.0038235187530518, "step": 10570 }, { "epoch": 1.924365158824065, "grad_norm": 4.9375, "learning_rate": 4.301237562046351e-06, "loss": 0.9368493556976318, "step": 10572 }, { "epoch": 1.924729225448257, "grad_norm": 11.3125, "learning_rate": 4.299850438860625e-06, "loss": 1.2630239725112915, "step": 10574 }, { "epoch": 1.9250932920724493, "grad_norm": 11.1875, "learning_rate": 4.298463565164962e-06, "loss": 1.2804149389266968, "step": 10576 }, { "epoch": 1.9254573586966415, "grad_norm": 19.875, "learning_rate": 4.297076941162871e-06, "loss": 1.0377178192138672, "step": 10578 }, { "epoch": 1.9258214253208337, "grad_norm": 14.5, "learning_rate": 4.295690567057841e-06, "loss": 1.1005542278289795, "step": 10580 }, { "epoch": 1.9261854919450259, "grad_norm": 9.125, "learning_rate": 4.294304443053311e-06, "loss": 1.353513240814209, "step": 10582 }, { "epoch": 1.926549558569218, "grad_norm": 13.375, "learning_rate": 4.292918569352691e-06, "loss": 1.2248767614364624, "step": 10584 }, { "epoch": 1.9269136251934103, "grad_norm": 11.6875, "learning_rate": 4.291532946159355e-06, "loss": 1.732736349105835, "step": 10586 }, { "epoch": 1.9272776918176027, "grad_norm": 14.3125, "learning_rate": 4.290147573676633e-06, "loss": 1.9308345317840576, "step": 10588 }, { "epoch": 1.9276417584417949, "grad_norm": 6.125, "learning_rate": 4.288762452107827e-06, "loss": 1.313896656036377, "step": 10590 }, { "epoch": 1.928005825065987, "grad_norm": 15.875, "learning_rate": 4.287377581656196e-06, "loss": 1.16761314868927, "step": 10592 }, { "epoch": 1.9283698916901792, "grad_norm": 4.375, "learning_rate": 4.285992962524962e-06, "loss": 1.0834652185440063, "step": 10594 }, { "epoch": 1.9287339583143717, "grad_norm": 14.4375, "learning_rate": 4.284608594917318e-06, "loss": 1.215872049331665, "step": 10596 }, { "epoch": 1.9290980249385639, "grad_norm": 10.375, "learning_rate": 4.2832244790364075e-06, "loss": 1.291481614112854, "step": 10598 }, { "epoch": 1.929462091562756, "grad_norm": 19.5, "learning_rate": 4.28184061508535e-06, "loss": 1.6211323738098145, "step": 10600 }, { "epoch": 1.9298261581869482, "grad_norm": 6.125, "learning_rate": 4.280457003267218e-06, "loss": 1.3089375495910645, "step": 10602 }, { "epoch": 1.9301902248111404, "grad_norm": 5.375, "learning_rate": 4.27907364378505e-06, "loss": 1.319156289100647, "step": 10604 }, { "epoch": 1.9305542914353326, "grad_norm": 22.25, "learning_rate": 4.277690536841854e-06, "loss": 1.1688460111618042, "step": 10606 }, { "epoch": 1.9309183580595248, "grad_norm": 12.3125, "learning_rate": 4.276307682640588e-06, "loss": 0.8768727779388428, "step": 10608 }, { "epoch": 1.931282424683717, "grad_norm": 25.75, "learning_rate": 4.274925081384189e-06, "loss": 1.332898497581482, "step": 10610 }, { "epoch": 1.9316464913079092, "grad_norm": 12.3125, "learning_rate": 4.2735427332755395e-06, "loss": 1.9641807079315186, "step": 10612 }, { "epoch": 1.9320105579321014, "grad_norm": 6.78125, "learning_rate": 4.2721606385174966e-06, "loss": 1.1488454341888428, "step": 10614 }, { "epoch": 1.9323746245562938, "grad_norm": 10.5, "learning_rate": 4.27077879731288e-06, "loss": 1.191027283668518, "step": 10616 }, { "epoch": 1.932738691180486, "grad_norm": 16.625, "learning_rate": 4.269397209864465e-06, "loss": 1.5805479288101196, "step": 10618 }, { "epoch": 1.9331027578046782, "grad_norm": 6.90625, "learning_rate": 4.268015876374999e-06, "loss": 1.1137583255767822, "step": 10620 }, { "epoch": 1.9334668244288706, "grad_norm": 3.078125, "learning_rate": 4.266634797047182e-06, "loss": 1.2779500484466553, "step": 10622 }, { "epoch": 1.9338308910530628, "grad_norm": 7.5625, "learning_rate": 4.2652539720836826e-06, "loss": 0.9298569560050964, "step": 10624 }, { "epoch": 1.934194957677255, "grad_norm": 11.0625, "learning_rate": 4.2638734016871355e-06, "loss": 1.3740111589431763, "step": 10626 }, { "epoch": 1.9345590243014472, "grad_norm": 11.1875, "learning_rate": 4.262493086060127e-06, "loss": 1.0150814056396484, "step": 10628 }, { "epoch": 1.9349230909256394, "grad_norm": 10.4375, "learning_rate": 4.2611130254052204e-06, "loss": 1.4758589267730713, "step": 10630 }, { "epoch": 1.9352871575498316, "grad_norm": 33.75, "learning_rate": 4.259733219924929e-06, "loss": 1.858879804611206, "step": 10632 }, { "epoch": 1.9356512241740238, "grad_norm": 13.8125, "learning_rate": 4.258353669821732e-06, "loss": 1.7634761333465576, "step": 10634 }, { "epoch": 1.936015290798216, "grad_norm": 8.4375, "learning_rate": 4.256974375298079e-06, "loss": 1.4033633470535278, "step": 10636 }, { "epoch": 1.9363793574224082, "grad_norm": 21.625, "learning_rate": 4.255595336556371e-06, "loss": 1.578845739364624, "step": 10638 }, { "epoch": 1.9367434240466004, "grad_norm": 7.21875, "learning_rate": 4.25421655379898e-06, "loss": 1.3658075332641602, "step": 10640 }, { "epoch": 1.9371074906707928, "grad_norm": 4.71875, "learning_rate": 4.252838027228233e-06, "loss": 0.9435325860977173, "step": 10642 }, { "epoch": 1.937471557294985, "grad_norm": 2.75, "learning_rate": 4.251459757046424e-06, "loss": 0.9834281206130981, "step": 10644 }, { "epoch": 1.9378356239191772, "grad_norm": 8.6875, "learning_rate": 4.250081743455812e-06, "loss": 1.2774618864059448, "step": 10646 }, { "epoch": 1.9381996905433696, "grad_norm": 8.6875, "learning_rate": 4.248703986658609e-06, "loss": 1.2626127004623413, "step": 10648 }, { "epoch": 1.9385637571675618, "grad_norm": 15.3125, "learning_rate": 4.247326486857002e-06, "loss": 1.7468152046203613, "step": 10650 }, { "epoch": 1.938927823791754, "grad_norm": 7.375, "learning_rate": 4.245949244253129e-06, "loss": 1.2844215631484985, "step": 10652 }, { "epoch": 1.9392918904159462, "grad_norm": 14.375, "learning_rate": 4.2445722590490934e-06, "loss": 1.3195265531539917, "step": 10654 }, { "epoch": 1.9396559570401384, "grad_norm": 28.875, "learning_rate": 4.2431955314469686e-06, "loss": 1.322016954421997, "step": 10656 }, { "epoch": 1.9400200236643306, "grad_norm": 9.25, "learning_rate": 4.241819061648777e-06, "loss": 1.160323977470398, "step": 10658 }, { "epoch": 1.9403840902885228, "grad_norm": 9.75, "learning_rate": 4.240442849856515e-06, "loss": 0.9990017414093018, "step": 10660 }, { "epoch": 1.940748156912715, "grad_norm": 13.625, "learning_rate": 4.239066896272133e-06, "loss": 1.4734833240509033, "step": 10662 }, { "epoch": 1.9411122235369072, "grad_norm": 30.125, "learning_rate": 4.237691201097545e-06, "loss": 1.6726047992706299, "step": 10664 }, { "epoch": 1.9414762901610993, "grad_norm": 26.25, "learning_rate": 4.236315764534635e-06, "loss": 1.4053781032562256, "step": 10666 }, { "epoch": 1.9418403567852915, "grad_norm": 8.9375, "learning_rate": 4.234940586785236e-06, "loss": 1.4766428470611572, "step": 10668 }, { "epoch": 1.942204423409484, "grad_norm": 12.5625, "learning_rate": 4.233565668051156e-06, "loss": 1.1533327102661133, "step": 10670 }, { "epoch": 1.9425684900336762, "grad_norm": 12.875, "learning_rate": 4.232191008534154e-06, "loss": 1.349991798400879, "step": 10672 }, { "epoch": 1.9429325566578683, "grad_norm": 21.0, "learning_rate": 4.230816608435955e-06, "loss": 1.355294108390808, "step": 10674 }, { "epoch": 1.9432966232820608, "grad_norm": 14.9375, "learning_rate": 4.2294424679582514e-06, "loss": 1.3827952146530151, "step": 10676 }, { "epoch": 1.943660689906253, "grad_norm": 11.25, "learning_rate": 4.228068587302688e-06, "loss": 1.0594600439071655, "step": 10678 }, { "epoch": 1.9440247565304452, "grad_norm": 23.75, "learning_rate": 4.226694966670882e-06, "loss": 0.8736613988876343, "step": 10680 }, { "epoch": 1.9443888231546373, "grad_norm": 11.8125, "learning_rate": 4.225321606264401e-06, "loss": 1.3247336149215698, "step": 10682 }, { "epoch": 1.9447528897788295, "grad_norm": 8.0, "learning_rate": 4.22394850628478e-06, "loss": 1.281076192855835, "step": 10684 }, { "epoch": 1.9451169564030217, "grad_norm": 6.21875, "learning_rate": 4.222575666933521e-06, "loss": 1.1978704929351807, "step": 10686 }, { "epoch": 1.945481023027214, "grad_norm": 18.375, "learning_rate": 4.221203088412078e-06, "loss": 1.4343655109405518, "step": 10688 }, { "epoch": 1.9458450896514061, "grad_norm": 10.625, "learning_rate": 4.219830770921875e-06, "loss": 1.3603440523147583, "step": 10690 }, { "epoch": 1.9462091562755983, "grad_norm": 10.9375, "learning_rate": 4.218458714664291e-06, "loss": 1.562471628189087, "step": 10692 }, { "epoch": 1.9465732228997905, "grad_norm": 10.6875, "learning_rate": 4.217086919840669e-06, "loss": 1.9436063766479492, "step": 10694 }, { "epoch": 1.946937289523983, "grad_norm": 12.5625, "learning_rate": 4.2157153866523185e-06, "loss": 1.183310866355896, "step": 10696 }, { "epoch": 1.9473013561481751, "grad_norm": 14.0625, "learning_rate": 4.214344115300501e-06, "loss": 1.3032796382904053, "step": 10698 }, { "epoch": 1.9476654227723673, "grad_norm": 15.1875, "learning_rate": 4.212973105986451e-06, "loss": 1.7914981842041016, "step": 10700 }, { "epoch": 1.9480294893965597, "grad_norm": 7.5, "learning_rate": 4.211602358911354e-06, "loss": 1.3102962970733643, "step": 10702 }, { "epoch": 1.948393556020752, "grad_norm": 9.625, "learning_rate": 4.210231874276362e-06, "loss": 0.9511297941207886, "step": 10704 }, { "epoch": 1.9487576226449441, "grad_norm": 11.8125, "learning_rate": 4.208861652282592e-06, "loss": 1.601237416267395, "step": 10706 }, { "epoch": 1.9491216892691363, "grad_norm": 13.4375, "learning_rate": 4.2074916931311124e-06, "loss": 1.498426079750061, "step": 10708 }, { "epoch": 1.9494857558933285, "grad_norm": 27.375, "learning_rate": 4.206121997022966e-06, "loss": 1.3698067665100098, "step": 10710 }, { "epoch": 1.9498498225175207, "grad_norm": 17.625, "learning_rate": 4.204752564159144e-06, "loss": 1.1138907670974731, "step": 10712 }, { "epoch": 1.950213889141713, "grad_norm": 41.5, "learning_rate": 4.203383394740607e-06, "loss": 0.8284420371055603, "step": 10714 }, { "epoch": 1.950577955765905, "grad_norm": 8.9375, "learning_rate": 4.202014488968279e-06, "loss": 1.0195693969726562, "step": 10716 }, { "epoch": 1.9509420223900973, "grad_norm": 5.3125, "learning_rate": 4.200645847043034e-06, "loss": 0.914522647857666, "step": 10718 }, { "epoch": 1.9513060890142895, "grad_norm": 7.84375, "learning_rate": 4.199277469165724e-06, "loss": 1.1466375589370728, "step": 10720 }, { "epoch": 1.951670155638482, "grad_norm": 7.59375, "learning_rate": 4.197909355537144e-06, "loss": 1.3353852033615112, "step": 10722 }, { "epoch": 1.952034222262674, "grad_norm": 10.8125, "learning_rate": 4.196541506358062e-06, "loss": 1.3710817098617554, "step": 10724 }, { "epoch": 1.9523982888868663, "grad_norm": 6.71875, "learning_rate": 4.195173921829208e-06, "loss": 1.2640271186828613, "step": 10726 }, { "epoch": 1.9527623555110585, "grad_norm": 19.625, "learning_rate": 4.193806602151264e-06, "loss": 1.4582655429840088, "step": 10728 }, { "epoch": 1.953126422135251, "grad_norm": 28.25, "learning_rate": 4.192439547524885e-06, "loss": 2.082249402999878, "step": 10730 }, { "epoch": 1.953490488759443, "grad_norm": 12.5, "learning_rate": 4.191072758150674e-06, "loss": 1.3775582313537598, "step": 10732 }, { "epoch": 1.9538545553836353, "grad_norm": 14.0, "learning_rate": 4.189706234229204e-06, "loss": 1.4993131160736084, "step": 10734 }, { "epoch": 1.9542186220078275, "grad_norm": 22.125, "learning_rate": 4.1883399759610114e-06, "loss": 1.3735322952270508, "step": 10736 }, { "epoch": 1.9545826886320197, "grad_norm": 13.9375, "learning_rate": 4.18697398354658e-06, "loss": 1.4608542919158936, "step": 10738 }, { "epoch": 1.9549467552562119, "grad_norm": 20.0, "learning_rate": 4.185608257186374e-06, "loss": 1.4929122924804688, "step": 10740 }, { "epoch": 1.955310821880404, "grad_norm": 10.125, "learning_rate": 4.184242797080802e-06, "loss": 1.2406938076019287, "step": 10742 }, { "epoch": 1.9556748885045963, "grad_norm": 9.5625, "learning_rate": 4.182877603430238e-06, "loss": 0.9509090185165405, "step": 10744 }, { "epoch": 1.9560389551287884, "grad_norm": 9.8125, "learning_rate": 4.1815126764350255e-06, "loss": 1.6715571880340576, "step": 10746 }, { "epoch": 1.9564030217529806, "grad_norm": 10.8125, "learning_rate": 4.180148016295454e-06, "loss": 1.6843599081039429, "step": 10748 }, { "epoch": 1.956767088377173, "grad_norm": 17.25, "learning_rate": 4.1787836232117905e-06, "loss": 1.501615285873413, "step": 10750 }, { "epoch": 1.9571311550013653, "grad_norm": 20.5, "learning_rate": 4.177419497384247e-06, "loss": 1.7046353816986084, "step": 10752 }, { "epoch": 1.9574952216255574, "grad_norm": 15.75, "learning_rate": 4.176055639013005e-06, "loss": 1.4324826002120972, "step": 10754 }, { "epoch": 1.9578592882497499, "grad_norm": 20.625, "learning_rate": 4.174692048298208e-06, "loss": 1.0059279203414917, "step": 10756 }, { "epoch": 1.958223354873942, "grad_norm": 8.75, "learning_rate": 4.173328725439953e-06, "loss": 1.5212697982788086, "step": 10758 }, { "epoch": 1.9585874214981343, "grad_norm": 14.1875, "learning_rate": 4.171965670638309e-06, "loss": 1.4247984886169434, "step": 10760 }, { "epoch": 1.9589514881223264, "grad_norm": 8.8125, "learning_rate": 4.1706028840932924e-06, "loss": 1.3881982564926147, "step": 10762 }, { "epoch": 1.9593155547465186, "grad_norm": 4.125, "learning_rate": 4.169240366004887e-06, "loss": 1.0480122566223145, "step": 10764 }, { "epoch": 1.9596796213707108, "grad_norm": 12.6875, "learning_rate": 4.167878116573041e-06, "loss": 1.0187500715255737, "step": 10766 }, { "epoch": 1.960043687994903, "grad_norm": 16.5, "learning_rate": 4.166516135997654e-06, "loss": 1.4734399318695068, "step": 10768 }, { "epoch": 1.9604077546190952, "grad_norm": 13.4375, "learning_rate": 4.165154424478597e-06, "loss": 1.3496389389038086, "step": 10770 }, { "epoch": 1.9607718212432874, "grad_norm": 7.6875, "learning_rate": 4.1637929822156885e-06, "loss": 1.3059468269348145, "step": 10772 }, { "epoch": 1.9611358878674796, "grad_norm": 8.25, "learning_rate": 4.162431809408719e-06, "loss": 1.4281243085861206, "step": 10774 }, { "epoch": 1.961499954491672, "grad_norm": 12.8125, "learning_rate": 4.161070906257437e-06, "loss": 1.4421210289001465, "step": 10776 }, { "epoch": 1.9618640211158642, "grad_norm": 7.6875, "learning_rate": 4.1597102729615435e-06, "loss": 1.0801186561584473, "step": 10778 }, { "epoch": 1.9622280877400564, "grad_norm": 13.375, "learning_rate": 4.158349909720713e-06, "loss": 1.1573163270950317, "step": 10780 }, { "epoch": 1.9625921543642486, "grad_norm": 5.46875, "learning_rate": 4.156989816734568e-06, "loss": 1.343355655670166, "step": 10782 }, { "epoch": 1.962956220988441, "grad_norm": 9.0, "learning_rate": 4.1556299942026965e-06, "loss": 0.8314065337181091, "step": 10784 }, { "epoch": 1.9633202876126332, "grad_norm": 9.0, "learning_rate": 4.154270442324653e-06, "loss": 0.9527825117111206, "step": 10786 }, { "epoch": 1.9636843542368254, "grad_norm": 22.0, "learning_rate": 4.1529111612999376e-06, "loss": 1.5368661880493164, "step": 10788 }, { "epoch": 1.9640484208610176, "grad_norm": 31.5, "learning_rate": 4.151552151328026e-06, "loss": 1.8109310865402222, "step": 10790 }, { "epoch": 1.9644124874852098, "grad_norm": 9.0, "learning_rate": 4.150193412608346e-06, "loss": 0.9711395502090454, "step": 10792 }, { "epoch": 1.964776554109402, "grad_norm": 23.125, "learning_rate": 4.148834945340283e-06, "loss": 0.9717168211936951, "step": 10794 }, { "epoch": 1.9651406207335942, "grad_norm": 32.0, "learning_rate": 4.1474767497231924e-06, "loss": 1.4291647672653198, "step": 10796 }, { "epoch": 1.9655046873577864, "grad_norm": 4.9375, "learning_rate": 4.146118825956379e-06, "loss": 1.1666853427886963, "step": 10798 }, { "epoch": 1.9658687539819786, "grad_norm": 22.125, "learning_rate": 4.144761174239118e-06, "loss": 1.2857627868652344, "step": 10800 }, { "epoch": 1.9662328206061708, "grad_norm": 55.25, "learning_rate": 4.143403794770633e-06, "loss": 1.5387756824493408, "step": 10802 }, { "epoch": 1.9665968872303632, "grad_norm": 10.125, "learning_rate": 4.142046687750117e-06, "loss": 1.6857328414916992, "step": 10804 }, { "epoch": 1.9669609538545554, "grad_norm": 8.5625, "learning_rate": 4.1406898533767225e-06, "loss": 1.089185357093811, "step": 10806 }, { "epoch": 1.9673250204787476, "grad_norm": 6.59375, "learning_rate": 4.139333291849555e-06, "loss": 1.3702129125595093, "step": 10808 }, { "epoch": 1.96768908710294, "grad_norm": 13.625, "learning_rate": 4.137977003367687e-06, "loss": 1.2946057319641113, "step": 10810 }, { "epoch": 1.9680531537271322, "grad_norm": 13.875, "learning_rate": 4.136620988130148e-06, "loss": 1.5037271976470947, "step": 10812 }, { "epoch": 1.9684172203513244, "grad_norm": 11.1875, "learning_rate": 4.135265246335924e-06, "loss": 1.3822699785232544, "step": 10814 }, { "epoch": 1.9687812869755166, "grad_norm": 62.0, "learning_rate": 4.133909778183973e-06, "loss": 1.1629102230072021, "step": 10816 }, { "epoch": 1.9691453535997088, "grad_norm": 11.8125, "learning_rate": 4.132554583873195e-06, "loss": 1.7002830505371094, "step": 10818 }, { "epoch": 1.969509420223901, "grad_norm": 12.3125, "learning_rate": 4.131199663602468e-06, "loss": 1.3595935106277466, "step": 10820 }, { "epoch": 1.9698734868480932, "grad_norm": 9.5625, "learning_rate": 4.129845017570615e-06, "loss": 1.2798871994018555, "step": 10822 }, { "epoch": 1.9702375534722854, "grad_norm": 15.25, "learning_rate": 4.128490645976424e-06, "loss": 1.2766427993774414, "step": 10824 }, { "epoch": 1.9706016200964775, "grad_norm": 21.875, "learning_rate": 4.12713654901865e-06, "loss": 1.4783878326416016, "step": 10826 }, { "epoch": 1.9709656867206697, "grad_norm": 18.875, "learning_rate": 4.125782726895995e-06, "loss": 1.8505593538284302, "step": 10828 }, { "epoch": 1.9713297533448622, "grad_norm": 17.0, "learning_rate": 4.124429179807131e-06, "loss": 1.2525584697723389, "step": 10830 }, { "epoch": 1.9716938199690544, "grad_norm": 16.375, "learning_rate": 4.123075907950683e-06, "loss": 1.4278440475463867, "step": 10832 }, { "epoch": 1.9720578865932465, "grad_norm": 17.5, "learning_rate": 4.121722911525237e-06, "loss": 1.7229260206222534, "step": 10834 }, { "epoch": 1.9724219532174387, "grad_norm": 20.625, "learning_rate": 4.120370190729345e-06, "loss": 1.595231294631958, "step": 10836 }, { "epoch": 1.9727860198416312, "grad_norm": 18.875, "learning_rate": 4.119017745761507e-06, "loss": 0.9405343532562256, "step": 10838 }, { "epoch": 1.9731500864658233, "grad_norm": 26.875, "learning_rate": 4.117665576820196e-06, "loss": 1.1189336776733398, "step": 10840 }, { "epoch": 1.9735141530900155, "grad_norm": 8.3125, "learning_rate": 4.11631368410383e-06, "loss": 1.4718209505081177, "step": 10842 }, { "epoch": 1.9738782197142077, "grad_norm": 5.3125, "learning_rate": 4.114962067810796e-06, "loss": 1.3799368143081665, "step": 10844 }, { "epoch": 1.9742422863384, "grad_norm": 7.34375, "learning_rate": 4.113610728139443e-06, "loss": 1.3353067636489868, "step": 10846 }, { "epoch": 1.9746063529625921, "grad_norm": 6.90625, "learning_rate": 4.112259665288067e-06, "loss": 1.2081918716430664, "step": 10848 }, { "epoch": 1.9749704195867843, "grad_norm": 5.15625, "learning_rate": 4.110908879454938e-06, "loss": 1.038845419883728, "step": 10850 }, { "epoch": 1.9753344862109765, "grad_norm": 6.0, "learning_rate": 4.109558370838273e-06, "loss": 1.1737656593322754, "step": 10852 }, { "epoch": 1.9756985528351687, "grad_norm": 14.8125, "learning_rate": 4.108208139636255e-06, "loss": 1.2012486457824707, "step": 10854 }, { "epoch": 1.976062619459361, "grad_norm": 12.125, "learning_rate": 4.106858186047028e-06, "loss": 1.0143401622772217, "step": 10856 }, { "epoch": 1.9764266860835533, "grad_norm": 19.25, "learning_rate": 4.105508510268688e-06, "loss": 1.3670973777770996, "step": 10858 }, { "epoch": 1.9767907527077455, "grad_norm": 12.5625, "learning_rate": 4.104159112499298e-06, "loss": 1.4914178848266602, "step": 10860 }, { "epoch": 1.9771548193319377, "grad_norm": 19.375, "learning_rate": 4.102809992936875e-06, "loss": 1.5278525352478027, "step": 10862 }, { "epoch": 1.9775188859561301, "grad_norm": 6.28125, "learning_rate": 4.101461151779395e-06, "loss": 1.3893541097640991, "step": 10864 }, { "epoch": 1.9778829525803223, "grad_norm": 20.375, "learning_rate": 4.1001125892248e-06, "loss": 1.114429235458374, "step": 10866 }, { "epoch": 1.9782470192045145, "grad_norm": 24.125, "learning_rate": 4.098764305470979e-06, "loss": 2.0433480739593506, "step": 10868 }, { "epoch": 1.9786110858287067, "grad_norm": 21.125, "learning_rate": 4.0974163007157956e-06, "loss": 1.8635485172271729, "step": 10870 }, { "epoch": 1.978975152452899, "grad_norm": 45.5, "learning_rate": 4.0960685751570565e-06, "loss": 1.1606404781341553, "step": 10872 }, { "epoch": 1.979339219077091, "grad_norm": 48.75, "learning_rate": 4.0947211289925375e-06, "loss": 0.9447245597839355, "step": 10874 }, { "epoch": 1.9797032857012833, "grad_norm": 18.375, "learning_rate": 4.093373962419974e-06, "loss": 1.3852027654647827, "step": 10876 }, { "epoch": 1.9800673523254755, "grad_norm": 16.625, "learning_rate": 4.092027075637053e-06, "loss": 1.3616926670074463, "step": 10878 }, { "epoch": 1.9804314189496677, "grad_norm": 11.25, "learning_rate": 4.090680468841428e-06, "loss": 1.2145402431488037, "step": 10880 }, { "epoch": 1.9807954855738599, "grad_norm": 30.0, "learning_rate": 4.089334142230704e-06, "loss": 0.5566257238388062, "step": 10882 }, { "epoch": 1.9811595521980523, "grad_norm": 8.75, "learning_rate": 4.087988096002451e-06, "loss": 0.8723641633987427, "step": 10884 }, { "epoch": 1.9815236188222445, "grad_norm": 8.0625, "learning_rate": 4.0866423303541995e-06, "loss": 1.459789514541626, "step": 10886 }, { "epoch": 1.9818876854464367, "grad_norm": 8.1875, "learning_rate": 4.085296845483429e-06, "loss": 0.9855976700782776, "step": 10888 }, { "epoch": 1.9822517520706289, "grad_norm": 7.375, "learning_rate": 4.083951641587589e-06, "loss": 1.1203888654708862, "step": 10890 }, { "epoch": 1.9826158186948213, "grad_norm": 8.625, "learning_rate": 4.082606718864079e-06, "loss": 1.3982939720153809, "step": 10892 }, { "epoch": 1.9829798853190135, "grad_norm": 9.375, "learning_rate": 4.081262077510262e-06, "loss": 1.5338878631591797, "step": 10894 }, { "epoch": 1.9833439519432057, "grad_norm": 6.375, "learning_rate": 4.079917717723461e-06, "loss": 1.0103753805160522, "step": 10896 }, { "epoch": 1.9837080185673979, "grad_norm": 7.0625, "learning_rate": 4.078573639700951e-06, "loss": 1.4745938777923584, "step": 10898 }, { "epoch": 1.98407208519159, "grad_norm": 12.125, "learning_rate": 4.077229843639976e-06, "loss": 1.472595453262329, "step": 10900 }, { "epoch": 1.9844361518157823, "grad_norm": 23.0, "learning_rate": 4.075886329737727e-06, "loss": 1.6663594245910645, "step": 10902 }, { "epoch": 1.9848002184399745, "grad_norm": 10.25, "learning_rate": 4.07454309819136e-06, "loss": 1.2754945755004883, "step": 10904 }, { "epoch": 1.9851642850641666, "grad_norm": 12.1875, "learning_rate": 4.073200149197993e-06, "loss": 1.1397364139556885, "step": 10906 }, { "epoch": 1.9855283516883588, "grad_norm": 10.125, "learning_rate": 4.071857482954692e-06, "loss": 1.3538984060287476, "step": 10908 }, { "epoch": 1.985892418312551, "grad_norm": 5.84375, "learning_rate": 4.070515099658496e-06, "loss": 1.2290761470794678, "step": 10910 }, { "epoch": 1.9862564849367434, "grad_norm": 7.28125, "learning_rate": 4.069172999506387e-06, "loss": 0.6596604585647583, "step": 10912 }, { "epoch": 1.9866205515609356, "grad_norm": 35.25, "learning_rate": 4.067831182695313e-06, "loss": 0.29634204506874084, "step": 10914 }, { "epoch": 1.9869846181851278, "grad_norm": 12.9375, "learning_rate": 4.0664896494221875e-06, "loss": 1.6917842626571655, "step": 10916 }, { "epoch": 1.9873486848093203, "grad_norm": 8.875, "learning_rate": 4.0651483998838655e-06, "loss": 1.1050643920898438, "step": 10918 }, { "epoch": 1.9877127514335124, "grad_norm": 8.375, "learning_rate": 4.0638074342771784e-06, "loss": 1.35196852684021, "step": 10920 }, { "epoch": 1.9880768180577046, "grad_norm": 7.0, "learning_rate": 4.062466752798901e-06, "loss": 1.3063468933105469, "step": 10922 }, { "epoch": 1.9884408846818968, "grad_norm": 16.625, "learning_rate": 4.061126355645775e-06, "loss": 1.1754240989685059, "step": 10924 }, { "epoch": 1.988804951306089, "grad_norm": 18.125, "learning_rate": 4.059786243014503e-06, "loss": 1.528351068496704, "step": 10926 }, { "epoch": 1.9891690179302812, "grad_norm": 9.9375, "learning_rate": 4.0584464151017315e-06, "loss": 1.0657646656036377, "step": 10928 }, { "epoch": 1.9895330845544734, "grad_norm": 10.0625, "learning_rate": 4.057106872104084e-06, "loss": 1.4041496515274048, "step": 10930 }, { "epoch": 1.9898971511786656, "grad_norm": 14.9375, "learning_rate": 4.055767614218128e-06, "loss": 1.2570862770080566, "step": 10932 }, { "epoch": 1.9902612178028578, "grad_norm": 16.25, "learning_rate": 4.054428641640393e-06, "loss": 1.221881628036499, "step": 10934 }, { "epoch": 1.99062528442705, "grad_norm": 28.625, "learning_rate": 4.053089954567374e-06, "loss": 0.8489706516265869, "step": 10936 }, { "epoch": 1.9909893510512424, "grad_norm": 11.3125, "learning_rate": 4.051751553195511e-06, "loss": 1.3841204643249512, "step": 10938 }, { "epoch": 1.9913534176754346, "grad_norm": 5.3125, "learning_rate": 4.050413437721214e-06, "loss": 1.3610183000564575, "step": 10940 }, { "epoch": 1.9917174842996268, "grad_norm": 207.0, "learning_rate": 4.049075608340845e-06, "loss": 1.3409671783447266, "step": 10942 }, { "epoch": 1.9920815509238192, "grad_norm": 8.8125, "learning_rate": 4.04773806525072e-06, "loss": 1.311365008354187, "step": 10944 }, { "epoch": 1.9924456175480114, "grad_norm": 9.6875, "learning_rate": 4.046400808647126e-06, "loss": 1.4495058059692383, "step": 10946 }, { "epoch": 1.9928096841722036, "grad_norm": 22.375, "learning_rate": 4.045063838726293e-06, "loss": 1.3023087978363037, "step": 10948 }, { "epoch": 1.9931737507963958, "grad_norm": 28.25, "learning_rate": 4.043727155684422e-06, "loss": 1.3837381601333618, "step": 10950 }, { "epoch": 1.993537817420588, "grad_norm": 20.875, "learning_rate": 4.042390759717661e-06, "loss": 1.3718791007995605, "step": 10952 }, { "epoch": 1.9939018840447802, "grad_norm": 22.125, "learning_rate": 4.041054651022121e-06, "loss": 1.923041582107544, "step": 10954 }, { "epoch": 1.9942659506689724, "grad_norm": 14.25, "learning_rate": 4.039718829793876e-06, "loss": 1.4297645092010498, "step": 10956 }, { "epoch": 1.9946300172931646, "grad_norm": 9.25, "learning_rate": 4.038383296228945e-06, "loss": 1.2713063955307007, "step": 10958 }, { "epoch": 1.9949940839173568, "grad_norm": 228.0, "learning_rate": 4.037048050523318e-06, "loss": 1.1857223510742188, "step": 10960 }, { "epoch": 1.995358150541549, "grad_norm": 4.5625, "learning_rate": 4.035713092872933e-06, "loss": 1.20783269405365, "step": 10962 }, { "epoch": 1.9957222171657414, "grad_norm": 12.875, "learning_rate": 4.0343784234736905e-06, "loss": 1.4405242204666138, "step": 10964 }, { "epoch": 1.9960862837899336, "grad_norm": 21.625, "learning_rate": 4.03304404252145e-06, "loss": 1.31243097782135, "step": 10966 }, { "epoch": 1.9964503504141258, "grad_norm": 96.0, "learning_rate": 4.031709950212023e-06, "loss": 0.7867608070373535, "step": 10968 }, { "epoch": 1.996814417038318, "grad_norm": 6.28125, "learning_rate": 4.030376146741187e-06, "loss": 1.3857314586639404, "step": 10970 }, { "epoch": 1.9971784836625104, "grad_norm": 6.09375, "learning_rate": 4.029042632304667e-06, "loss": 1.2450498342514038, "step": 10972 }, { "epoch": 1.9975425502867026, "grad_norm": 36.0, "learning_rate": 4.027709407098152e-06, "loss": 1.1632217168807983, "step": 10974 }, { "epoch": 1.9979066169108948, "grad_norm": 7.8125, "learning_rate": 4.026376471317292e-06, "loss": 1.3292152881622314, "step": 10976 }, { "epoch": 1.998270683535087, "grad_norm": 27.0, "learning_rate": 4.025043825157683e-06, "loss": 1.2493515014648438, "step": 10978 }, { "epoch": 1.9986347501592792, "grad_norm": 9.0, "learning_rate": 4.023711468814892e-06, "loss": 1.3530218601226807, "step": 10980 }, { "epoch": 1.9989988167834714, "grad_norm": 9.1875, "learning_rate": 4.022379402484432e-06, "loss": 1.179415225982666, "step": 10982 }, { "epoch": 1.9993628834076635, "grad_norm": 11.0625, "learning_rate": 4.021047626361778e-06, "loss": 1.8628883361816406, "step": 10984 }, { "epoch": 1.9997269500318557, "grad_norm": 20.625, "learning_rate": 4.019716140642369e-06, "loss": 1.5577448606491089, "step": 10986 }, { "epoch": 2.0, "grad_norm": 26.125, "learning_rate": 4.018384945521587e-06, "loss": 1.4067811965942383, "step": 10988 }, { "epoch": 2.000364066624192, "grad_norm": 4.5, "learning_rate": 4.017054041194788e-06, "loss": 1.392914891242981, "step": 10990 }, { "epoch": 2.0007281332483844, "grad_norm": 26.375, "learning_rate": 4.015723427857269e-06, "loss": 0.8794772624969482, "step": 10992 }, { "epoch": 2.0010921998725766, "grad_norm": 10.125, "learning_rate": 4.014393105704295e-06, "loss": 1.4271111488342285, "step": 10994 }, { "epoch": 2.0014562664967688, "grad_norm": 6.28125, "learning_rate": 4.0130630749310885e-06, "loss": 1.029604196548462, "step": 10996 }, { "epoch": 2.001820333120961, "grad_norm": 24.5, "learning_rate": 4.01173333573282e-06, "loss": 1.4767204523086548, "step": 10998 }, { "epoch": 2.002184399745153, "grad_norm": 30.5, "learning_rate": 4.010403888304631e-06, "loss": 0.2808459997177124, "step": 11000 }, { "epoch": 2.002548466369346, "grad_norm": 7.28125, "learning_rate": 4.009074732841605e-06, "loss": 1.2969317436218262, "step": 11002 }, { "epoch": 2.002912532993538, "grad_norm": 3.171875, "learning_rate": 4.007745869538793e-06, "loss": 1.2992312908172607, "step": 11004 }, { "epoch": 2.00327659961773, "grad_norm": 15.0, "learning_rate": 4.006417298591203e-06, "loss": 1.2189079523086548, "step": 11006 }, { "epoch": 2.0036406662419224, "grad_norm": 23.375, "learning_rate": 4.005089020193793e-06, "loss": 1.8249293565750122, "step": 11008 }, { "epoch": 2.0040047328661146, "grad_norm": 65.0, "learning_rate": 4.003761034541487e-06, "loss": 1.1246477365493774, "step": 11010 }, { "epoch": 2.0043687994903068, "grad_norm": 17.125, "learning_rate": 4.0024333418291586e-06, "loss": 1.397111177444458, "step": 11012 }, { "epoch": 2.004732866114499, "grad_norm": 6.90625, "learning_rate": 4.001105942251639e-06, "loss": 1.3780319690704346, "step": 11014 }, { "epoch": 2.005096932738691, "grad_norm": 19.625, "learning_rate": 3.999778836003725e-06, "loss": 1.4079086780548096, "step": 11016 }, { "epoch": 2.0054609993628834, "grad_norm": 9.4375, "learning_rate": 3.998452023280158e-06, "loss": 1.456685185432434, "step": 11018 }, { "epoch": 2.0058250659870756, "grad_norm": 13.5625, "learning_rate": 3.997125504275649e-06, "loss": 1.3323144912719727, "step": 11020 }, { "epoch": 2.0061891326112677, "grad_norm": 35.75, "learning_rate": 3.995799279184852e-06, "loss": 1.7964556217193604, "step": 11022 }, { "epoch": 2.00655319923546, "grad_norm": 10.875, "learning_rate": 3.9944733482023874e-06, "loss": 1.0823254585266113, "step": 11024 }, { "epoch": 2.006917265859652, "grad_norm": 15.625, "learning_rate": 3.993147711522835e-06, "loss": 1.273159384727478, "step": 11026 }, { "epoch": 2.0072813324838443, "grad_norm": 20.875, "learning_rate": 3.991822369340719e-06, "loss": 1.3612158298492432, "step": 11028 }, { "epoch": 2.007645399108037, "grad_norm": 11.0, "learning_rate": 3.9904973218505355e-06, "loss": 1.4647706747055054, "step": 11030 }, { "epoch": 2.008009465732229, "grad_norm": 9.625, "learning_rate": 3.989172569246724e-06, "loss": 1.4063844680786133, "step": 11032 }, { "epoch": 2.0083735323564214, "grad_norm": 18.0, "learning_rate": 3.987848111723689e-06, "loss": 1.2605879306793213, "step": 11034 }, { "epoch": 2.0087375989806135, "grad_norm": 8.625, "learning_rate": 3.9865239494757905e-06, "loss": 1.2857003211975098, "step": 11036 }, { "epoch": 2.0091016656048057, "grad_norm": 25.0, "learning_rate": 3.98520008269734e-06, "loss": 1.5507296323776245, "step": 11038 }, { "epoch": 2.009465732228998, "grad_norm": 13.3125, "learning_rate": 3.983876511582615e-06, "loss": 1.1744754314422607, "step": 11040 }, { "epoch": 2.00982979885319, "grad_norm": 1.796875, "learning_rate": 3.982553236325839e-06, "loss": 1.1620770692825317, "step": 11042 }, { "epoch": 2.0101938654773823, "grad_norm": 7.46875, "learning_rate": 3.981230257121199e-06, "loss": 0.9784259796142578, "step": 11044 }, { "epoch": 2.0105579321015745, "grad_norm": 26.0, "learning_rate": 3.97990757416284e-06, "loss": 1.6096962690353394, "step": 11046 }, { "epoch": 2.0109219987257667, "grad_norm": 4.59375, "learning_rate": 3.978585187644855e-06, "loss": 1.377596378326416, "step": 11048 }, { "epoch": 2.011286065349959, "grad_norm": 13.375, "learning_rate": 3.977263097761305e-06, "loss": 1.538359522819519, "step": 11050 }, { "epoch": 2.011650131974151, "grad_norm": 7.09375, "learning_rate": 3.975941304706195e-06, "loss": 1.344555139541626, "step": 11052 }, { "epoch": 2.0120141985983433, "grad_norm": 12.1875, "learning_rate": 3.974619808673496e-06, "loss": 1.3899462223052979, "step": 11054 }, { "epoch": 2.012378265222536, "grad_norm": 12.5625, "learning_rate": 3.973298609857135e-06, "loss": 1.104241967201233, "step": 11056 }, { "epoch": 2.012742331846728, "grad_norm": 5.65625, "learning_rate": 3.971977708450984e-06, "loss": 0.7514855861663818, "step": 11058 }, { "epoch": 2.0131063984709203, "grad_norm": 10.0625, "learning_rate": 3.970657104648892e-06, "loss": 1.4637733697891235, "step": 11060 }, { "epoch": 2.0134704650951125, "grad_norm": 13.625, "learning_rate": 3.969336798644642e-06, "loss": 1.3208070993423462, "step": 11062 }, { "epoch": 2.0138345317193047, "grad_norm": 9.4375, "learning_rate": 3.968016790631986e-06, "loss": 1.2899292707443237, "step": 11064 }, { "epoch": 2.014198598343497, "grad_norm": 17.125, "learning_rate": 3.966697080804634e-06, "loss": 1.3354780673980713, "step": 11066 }, { "epoch": 2.014562664967689, "grad_norm": 5.15625, "learning_rate": 3.965377669356242e-06, "loss": 1.1566604375839233, "step": 11068 }, { "epoch": 2.0149267315918813, "grad_norm": 15.375, "learning_rate": 3.964058556480435e-06, "loss": 1.470249891281128, "step": 11070 }, { "epoch": 2.0152907982160735, "grad_norm": 4.125, "learning_rate": 3.9627397423707825e-06, "loss": 1.1017526388168335, "step": 11072 }, { "epoch": 2.0156548648402657, "grad_norm": 6.59375, "learning_rate": 3.961421227220814e-06, "loss": 1.3219066858291626, "step": 11074 }, { "epoch": 2.016018931464458, "grad_norm": 13.9375, "learning_rate": 3.960103011224023e-06, "loss": 1.4901299476623535, "step": 11076 }, { "epoch": 2.01638299808865, "grad_norm": 5.4375, "learning_rate": 3.958785094573844e-06, "loss": 1.5103120803833008, "step": 11078 }, { "epoch": 2.0167470647128423, "grad_norm": 12.125, "learning_rate": 3.957467477463684e-06, "loss": 1.3990949392318726, "step": 11080 }, { "epoch": 2.0171111313370345, "grad_norm": 21.25, "learning_rate": 3.956150160086892e-06, "loss": 1.569573998451233, "step": 11082 }, { "epoch": 2.017475197961227, "grad_norm": 4.5, "learning_rate": 3.95483314263678e-06, "loss": 0.9206288456916809, "step": 11084 }, { "epoch": 2.0178392645854193, "grad_norm": 21.0, "learning_rate": 3.953516425306618e-06, "loss": 2.0021207332611084, "step": 11086 }, { "epoch": 2.0182033312096115, "grad_norm": 15.875, "learning_rate": 3.952200008289624e-06, "loss": 1.2481396198272705, "step": 11088 }, { "epoch": 2.0185673978338037, "grad_norm": 13.6875, "learning_rate": 3.950883891778985e-06, "loss": 1.3283307552337646, "step": 11090 }, { "epoch": 2.018931464457996, "grad_norm": 30.625, "learning_rate": 3.949568075967826e-06, "loss": 1.3686829805374146, "step": 11092 }, { "epoch": 2.019295531082188, "grad_norm": 30.875, "learning_rate": 3.948252561049242e-06, "loss": 1.3300321102142334, "step": 11094 }, { "epoch": 2.0196595977063803, "grad_norm": 51.5, "learning_rate": 3.946937347216283e-06, "loss": 1.2486698627471924, "step": 11096 }, { "epoch": 2.0200236643305725, "grad_norm": 17.0, "learning_rate": 3.945622434661944e-06, "loss": 1.2822213172912598, "step": 11098 }, { "epoch": 2.0203877309547646, "grad_norm": 8.25, "learning_rate": 3.944307823579192e-06, "loss": 1.1264550685882568, "step": 11100 }, { "epoch": 2.020751797578957, "grad_norm": 32.0, "learning_rate": 3.942993514160933e-06, "loss": 1.4714895486831665, "step": 11102 }, { "epoch": 2.021115864203149, "grad_norm": 31.5, "learning_rate": 3.941679506600037e-06, "loss": 1.3934190273284912, "step": 11104 }, { "epoch": 2.0214799308273412, "grad_norm": 17.875, "learning_rate": 3.940365801089336e-06, "loss": 1.7528350353240967, "step": 11106 }, { "epoch": 2.0218439974515334, "grad_norm": 12.0625, "learning_rate": 3.9390523978216034e-06, "loss": 1.0156023502349854, "step": 11108 }, { "epoch": 2.022208064075726, "grad_norm": 5.875, "learning_rate": 3.9377392969895825e-06, "loss": 1.0663440227508545, "step": 11110 }, { "epoch": 2.0225721306999183, "grad_norm": 19.125, "learning_rate": 3.9364264987859605e-06, "loss": 1.446930170059204, "step": 11112 }, { "epoch": 2.0229361973241105, "grad_norm": 7.34375, "learning_rate": 3.935114003403385e-06, "loss": 1.0982558727264404, "step": 11114 }, { "epoch": 2.0233002639483026, "grad_norm": 12.75, "learning_rate": 3.933801811034465e-06, "loss": 1.4989956617355347, "step": 11116 }, { "epoch": 2.023664330572495, "grad_norm": 15.4375, "learning_rate": 3.932489921871752e-06, "loss": 1.4031810760498047, "step": 11118 }, { "epoch": 2.024028397196687, "grad_norm": 18.625, "learning_rate": 3.931178336107768e-06, "loss": 1.4167940616607666, "step": 11120 }, { "epoch": 2.0243924638208792, "grad_norm": 17.875, "learning_rate": 3.929867053934976e-06, "loss": 1.3779305219650269, "step": 11122 }, { "epoch": 2.0247565304450714, "grad_norm": 15.9375, "learning_rate": 3.928556075545804e-06, "loss": 1.3135592937469482, "step": 11124 }, { "epoch": 2.0251205970692636, "grad_norm": 59.5, "learning_rate": 3.927245401132635e-06, "loss": 1.4298725128173828, "step": 11126 }, { "epoch": 2.025484663693456, "grad_norm": 91.5, "learning_rate": 3.925935030887802e-06, "loss": 1.334614634513855, "step": 11128 }, { "epoch": 2.025848730317648, "grad_norm": 3.265625, "learning_rate": 3.9246249650035985e-06, "loss": 0.9276155829429626, "step": 11130 }, { "epoch": 2.02621279694184, "grad_norm": 9.8125, "learning_rate": 3.923315203672271e-06, "loss": 1.5382261276245117, "step": 11132 }, { "epoch": 2.0265768635660324, "grad_norm": 18.25, "learning_rate": 3.922005747086019e-06, "loss": 0.7835226058959961, "step": 11134 }, { "epoch": 2.026940930190225, "grad_norm": 5.40625, "learning_rate": 3.9206965954370055e-06, "loss": 0.8796530365943909, "step": 11136 }, { "epoch": 2.0273049968144172, "grad_norm": 24.25, "learning_rate": 3.919387748917337e-06, "loss": 0.8988465666770935, "step": 11138 }, { "epoch": 2.0276690634386094, "grad_norm": 22.5, "learning_rate": 3.918079207719086e-06, "loss": 1.7242884635925293, "step": 11140 }, { "epoch": 2.0280331300628016, "grad_norm": 13.25, "learning_rate": 3.916770972034274e-06, "loss": 1.3035143613815308, "step": 11142 }, { "epoch": 2.028397196686994, "grad_norm": 7.375, "learning_rate": 3.915463042054878e-06, "loss": 1.4893219470977783, "step": 11144 }, { "epoch": 2.028761263311186, "grad_norm": 11.8125, "learning_rate": 3.914155417972834e-06, "loss": 1.3972722291946411, "step": 11146 }, { "epoch": 2.029125329935378, "grad_norm": 7.40625, "learning_rate": 3.912848099980028e-06, "loss": 1.5186147689819336, "step": 11148 }, { "epoch": 2.0294893965595704, "grad_norm": 12.25, "learning_rate": 3.911541088268307e-06, "loss": 1.3541737794876099, "step": 11150 }, { "epoch": 2.0298534631837626, "grad_norm": 4.6875, "learning_rate": 3.910234383029467e-06, "loss": 1.1067891120910645, "step": 11152 }, { "epoch": 2.030217529807955, "grad_norm": 47.75, "learning_rate": 3.908927984455259e-06, "loss": 1.448687195777893, "step": 11154 }, { "epoch": 2.030581596432147, "grad_norm": 8.75, "learning_rate": 3.907621892737399e-06, "loss": 0.577587902545929, "step": 11156 }, { "epoch": 2.030945663056339, "grad_norm": 7.21875, "learning_rate": 3.906316108067543e-06, "loss": 1.4380801916122437, "step": 11158 }, { "epoch": 2.0313097296805314, "grad_norm": 8.125, "learning_rate": 3.905010630637317e-06, "loss": 1.3181122541427612, "step": 11160 }, { "epoch": 2.0316737963047236, "grad_norm": 9.625, "learning_rate": 3.903705460638289e-06, "loss": 1.0836912393569946, "step": 11162 }, { "epoch": 2.032037862928916, "grad_norm": 17.125, "learning_rate": 3.902400598261986e-06, "loss": 1.470362901687622, "step": 11164 }, { "epoch": 2.0324019295531084, "grad_norm": 13.6875, "learning_rate": 3.9010960436999e-06, "loss": 1.3512824773788452, "step": 11166 }, { "epoch": 2.0327659961773006, "grad_norm": 10.1875, "learning_rate": 3.899791797143459e-06, "loss": 1.1623103618621826, "step": 11168 }, { "epoch": 2.0331300628014928, "grad_norm": 33.75, "learning_rate": 3.8984878587840635e-06, "loss": 1.8825910091400146, "step": 11170 }, { "epoch": 2.033494129425685, "grad_norm": 3.640625, "learning_rate": 3.8971842288130564e-06, "loss": 1.0461876392364502, "step": 11172 }, { "epoch": 2.033858196049877, "grad_norm": 9.3125, "learning_rate": 3.895880907421741e-06, "loss": 1.3835265636444092, "step": 11174 }, { "epoch": 2.0342222626740694, "grad_norm": 2.375, "learning_rate": 3.894577894801377e-06, "loss": 1.050366997718811, "step": 11176 }, { "epoch": 2.0345863292982616, "grad_norm": 10.125, "learning_rate": 3.893275191143172e-06, "loss": 1.6249148845672607, "step": 11178 }, { "epoch": 2.0349503959224537, "grad_norm": 7.84375, "learning_rate": 3.891972796638298e-06, "loss": 1.4690312147140503, "step": 11180 }, { "epoch": 2.035314462546646, "grad_norm": 15.0, "learning_rate": 3.8906707114778715e-06, "loss": 0.18220192193984985, "step": 11182 }, { "epoch": 2.035678529170838, "grad_norm": 10.375, "learning_rate": 3.889368935852968e-06, "loss": 1.3766498565673828, "step": 11184 }, { "epoch": 2.0360425957950303, "grad_norm": 5.34375, "learning_rate": 3.888067469954624e-06, "loss": 1.3434513807296753, "step": 11186 }, { "epoch": 2.0364066624192225, "grad_norm": 18.75, "learning_rate": 3.886766313973815e-06, "loss": 1.4428796768188477, "step": 11188 }, { "epoch": 2.036770729043415, "grad_norm": 3.359375, "learning_rate": 3.885465468101488e-06, "loss": 0.7841074466705322, "step": 11190 }, { "epoch": 2.0371347956676074, "grad_norm": 8.75, "learning_rate": 3.884164932528534e-06, "loss": 0.930863082408905, "step": 11192 }, { "epoch": 2.0374988622917996, "grad_norm": 9.25, "learning_rate": 3.882864707445799e-06, "loss": 1.3061288595199585, "step": 11194 }, { "epoch": 2.0378629289159917, "grad_norm": 9.0, "learning_rate": 3.881564793044092e-06, "loss": 1.2532784938812256, "step": 11196 }, { "epoch": 2.038226995540184, "grad_norm": 2.78125, "learning_rate": 3.880265189514163e-06, "loss": 1.035947561264038, "step": 11198 }, { "epoch": 2.038591062164376, "grad_norm": 13.0625, "learning_rate": 3.878965897046729e-06, "loss": 1.5455666780471802, "step": 11200 }, { "epoch": 2.0389551287885683, "grad_norm": 7.84375, "learning_rate": 3.8776669158324535e-06, "loss": 0.8870929479598999, "step": 11202 }, { "epoch": 2.0393191954127605, "grad_norm": 18.625, "learning_rate": 3.8763682460619554e-06, "loss": 1.224616527557373, "step": 11204 }, { "epoch": 2.0396832620369527, "grad_norm": 35.0, "learning_rate": 3.875069887925813e-06, "loss": 1.652757167816162, "step": 11206 }, { "epoch": 2.040047328661145, "grad_norm": 8.0625, "learning_rate": 3.873771841614551e-06, "loss": 1.435332179069519, "step": 11208 }, { "epoch": 2.040411395285337, "grad_norm": 3.359375, "learning_rate": 3.872474107318656e-06, "loss": 1.0747565031051636, "step": 11210 }, { "epoch": 2.0407754619095293, "grad_norm": 63.5, "learning_rate": 3.871176685228564e-06, "loss": 1.3681832551956177, "step": 11212 }, { "epoch": 2.0411395285337215, "grad_norm": 27.75, "learning_rate": 3.8698795755346626e-06, "loss": 1.3852930068969727, "step": 11214 }, { "epoch": 2.0415035951579137, "grad_norm": 8.8125, "learning_rate": 3.868582778427306e-06, "loss": 1.4589409828186035, "step": 11216 }, { "epoch": 2.0418676617821063, "grad_norm": 6.5, "learning_rate": 3.867286294096784e-06, "loss": 1.1996346712112427, "step": 11218 }, { "epoch": 2.0422317284062985, "grad_norm": 10.5625, "learning_rate": 3.865990122733359e-06, "loss": 1.3251181840896606, "step": 11220 }, { "epoch": 2.0425957950304907, "grad_norm": 9.8125, "learning_rate": 3.864694264527234e-06, "loss": 1.1638710498809814, "step": 11222 }, { "epoch": 2.042959861654683, "grad_norm": 31.625, "learning_rate": 3.863398719668569e-06, "loss": 1.9275388717651367, "step": 11224 }, { "epoch": 2.043323928278875, "grad_norm": 2.875, "learning_rate": 3.862103488347488e-06, "loss": 0.816726803779602, "step": 11226 }, { "epoch": 2.0436879949030673, "grad_norm": 10.8125, "learning_rate": 3.860808570754052e-06, "loss": 1.4633066654205322, "step": 11228 }, { "epoch": 2.0440520615272595, "grad_norm": 8.4375, "learning_rate": 3.8595139670782925e-06, "loss": 0.8681273460388184, "step": 11230 }, { "epoch": 2.0444161281514517, "grad_norm": 25.0, "learning_rate": 3.858219677510181e-06, "loss": 1.2102720737457275, "step": 11232 }, { "epoch": 2.044780194775644, "grad_norm": 13.1875, "learning_rate": 3.856925702239651e-06, "loss": 1.6891427040100098, "step": 11234 }, { "epoch": 2.045144261399836, "grad_norm": 13.625, "learning_rate": 3.855632041456592e-06, "loss": 1.4816524982452393, "step": 11236 }, { "epoch": 2.0455083280240283, "grad_norm": 4.625, "learning_rate": 3.854338695350838e-06, "loss": 1.0426243543624878, "step": 11238 }, { "epoch": 2.0458723946482205, "grad_norm": 3.84375, "learning_rate": 3.853045664112187e-06, "loss": 1.1335670948028564, "step": 11240 }, { "epoch": 2.0462364612724127, "grad_norm": 20.125, "learning_rate": 3.8517529479303825e-06, "loss": 2.009348154067993, "step": 11242 }, { "epoch": 2.0466005278966053, "grad_norm": 39.25, "learning_rate": 3.850460546995126e-06, "loss": 1.5380167961120605, "step": 11244 }, { "epoch": 2.0469645945207975, "grad_norm": 19.0, "learning_rate": 3.849168461496076e-06, "loss": 0.7257711291313171, "step": 11246 }, { "epoch": 2.0473286611449897, "grad_norm": 16.125, "learning_rate": 3.847876691622835e-06, "loss": 1.3549449443817139, "step": 11248 }, { "epoch": 2.047692727769182, "grad_norm": 10.4375, "learning_rate": 3.8465852375649696e-06, "loss": 2.0038070678710938, "step": 11250 }, { "epoch": 2.048056794393374, "grad_norm": 7.84375, "learning_rate": 3.845294099511993e-06, "loss": 1.1168270111083984, "step": 11252 }, { "epoch": 2.0484208610175663, "grad_norm": 14.75, "learning_rate": 3.844003277653372e-06, "loss": 0.11435945332050323, "step": 11254 }, { "epoch": 2.0487849276417585, "grad_norm": 13.125, "learning_rate": 3.842712772178537e-06, "loss": 1.4028353691101074, "step": 11256 }, { "epoch": 2.0491489942659507, "grad_norm": 15.5, "learning_rate": 3.8414225832768574e-06, "loss": 0.9009632468223572, "step": 11258 }, { "epoch": 2.049513060890143, "grad_norm": 9.3125, "learning_rate": 3.840132711137667e-06, "loss": 1.3315761089324951, "step": 11260 }, { "epoch": 2.049877127514335, "grad_norm": 5.1875, "learning_rate": 3.838843155950248e-06, "loss": 0.6985101103782654, "step": 11262 }, { "epoch": 2.0502411941385272, "grad_norm": 2.203125, "learning_rate": 3.837553917903835e-06, "loss": 0.7500526309013367, "step": 11264 }, { "epoch": 2.0506052607627194, "grad_norm": 24.625, "learning_rate": 3.8362649971876246e-06, "loss": 1.187584638595581, "step": 11266 }, { "epoch": 2.0509693273869116, "grad_norm": 17.375, "learning_rate": 3.834976393990753e-06, "loss": 0.5070589780807495, "step": 11268 }, { "epoch": 2.051333394011104, "grad_norm": 30.0, "learning_rate": 3.833688108502326e-06, "loss": 1.5097854137420654, "step": 11270 }, { "epoch": 2.0516974606352965, "grad_norm": 12.5625, "learning_rate": 3.832400140911388e-06, "loss": 1.521182894706726, "step": 11272 }, { "epoch": 2.0520615272594886, "grad_norm": 22.75, "learning_rate": 3.831112491406943e-06, "loss": 1.826069951057434, "step": 11274 }, { "epoch": 2.052425593883681, "grad_norm": 2.46875, "learning_rate": 3.829825160177952e-06, "loss": 1.0081204175949097, "step": 11276 }, { "epoch": 2.052789660507873, "grad_norm": 80.0, "learning_rate": 3.828538147413322e-06, "loss": 2.2084999084472656, "step": 11278 }, { "epoch": 2.0531537271320652, "grad_norm": 27.0, "learning_rate": 3.827251453301922e-06, "loss": 1.9426029920578003, "step": 11280 }, { "epoch": 2.0535177937562574, "grad_norm": 17.875, "learning_rate": 3.825965078032563e-06, "loss": 1.8204866647720337, "step": 11282 }, { "epoch": 2.0538818603804496, "grad_norm": 9.8125, "learning_rate": 3.824679021794018e-06, "loss": 1.1048431396484375, "step": 11284 }, { "epoch": 2.054245927004642, "grad_norm": 11.0, "learning_rate": 3.8233932847750134e-06, "loss": 1.450287103652954, "step": 11286 }, { "epoch": 2.054609993628834, "grad_norm": 15.6875, "learning_rate": 3.82210786716422e-06, "loss": 1.7937439680099487, "step": 11288 }, { "epoch": 2.054974060253026, "grad_norm": 3.515625, "learning_rate": 3.8208227691502745e-06, "loss": 1.0372117757797241, "step": 11290 }, { "epoch": 2.0553381268772184, "grad_norm": 6.09375, "learning_rate": 3.8195379909217535e-06, "loss": 1.0177583694458008, "step": 11292 }, { "epoch": 2.0557021935014106, "grad_norm": 11.8125, "learning_rate": 3.818253532667195e-06, "loss": 1.0975708961486816, "step": 11294 }, { "epoch": 2.056066260125603, "grad_norm": 14.9375, "learning_rate": 3.816969394575092e-06, "loss": 1.6444745063781738, "step": 11296 }, { "epoch": 2.0564303267497954, "grad_norm": 9.1875, "learning_rate": 3.81568557683388e-06, "loss": 1.3613479137420654, "step": 11298 }, { "epoch": 2.0567943933739876, "grad_norm": 34.0, "learning_rate": 3.81440207963196e-06, "loss": 0.6512670516967773, "step": 11300 }, { "epoch": 2.05715845999818, "grad_norm": 18.625, "learning_rate": 3.8131189031576765e-06, "loss": 1.3207588195800781, "step": 11302 }, { "epoch": 2.057522526622372, "grad_norm": 31.125, "learning_rate": 3.8118360475993293e-06, "loss": 1.1373512744903564, "step": 11304 }, { "epoch": 2.057886593246564, "grad_norm": 27.5, "learning_rate": 3.8105535131451774e-06, "loss": 2.149296283721924, "step": 11306 }, { "epoch": 2.0582506598707564, "grad_norm": 15.6875, "learning_rate": 3.809271299983421e-06, "loss": 1.4325075149536133, "step": 11308 }, { "epoch": 2.0586147264949486, "grad_norm": 9.9375, "learning_rate": 3.807989408302227e-06, "loss": 0.20745335519313812, "step": 11310 }, { "epoch": 2.058978793119141, "grad_norm": 16.875, "learning_rate": 3.806707838289702e-06, "loss": 1.7461762428283691, "step": 11312 }, { "epoch": 2.059342859743333, "grad_norm": 16.375, "learning_rate": 3.8054265901339114e-06, "loss": 1.3343355655670166, "step": 11314 }, { "epoch": 2.059706926367525, "grad_norm": 19.0, "learning_rate": 3.804145664022878e-06, "loss": 1.6899375915527344, "step": 11316 }, { "epoch": 2.0600709929917174, "grad_norm": 16.5, "learning_rate": 3.8028650601445664e-06, "loss": 1.9408360719680786, "step": 11318 }, { "epoch": 2.0604350596159096, "grad_norm": 14.1875, "learning_rate": 3.8015847786869067e-06, "loss": 1.6124509572982788, "step": 11320 }, { "epoch": 2.0607991262401018, "grad_norm": 9.9375, "learning_rate": 3.8003048198377686e-06, "loss": 1.2918286323547363, "step": 11322 }, { "epoch": 2.061163192864294, "grad_norm": 18.125, "learning_rate": 3.799025183784983e-06, "loss": 1.3394672870635986, "step": 11324 }, { "epoch": 2.0615272594884866, "grad_norm": 15.75, "learning_rate": 3.797745870716334e-06, "loss": 1.372384786605835, "step": 11326 }, { "epoch": 2.061891326112679, "grad_norm": 9.3125, "learning_rate": 3.7964668808195515e-06, "loss": 1.3626676797866821, "step": 11328 }, { "epoch": 2.062255392736871, "grad_norm": 7.125, "learning_rate": 3.795188214282326e-06, "loss": 1.2860833406448364, "step": 11330 }, { "epoch": 2.062619459361063, "grad_norm": 11.0625, "learning_rate": 3.793909871292294e-06, "loss": 1.3646643161773682, "step": 11332 }, { "epoch": 2.0629835259852554, "grad_norm": 16.5, "learning_rate": 3.7926318520370465e-06, "loss": 1.3925278186798096, "step": 11334 }, { "epoch": 2.0633475926094476, "grad_norm": 20.625, "learning_rate": 3.7913541567041317e-06, "loss": 1.871671438217163, "step": 11336 }, { "epoch": 2.0637116592336398, "grad_norm": 12.5625, "learning_rate": 3.7900767854810405e-06, "loss": 1.4626822471618652, "step": 11338 }, { "epoch": 2.064075725857832, "grad_norm": 9.1875, "learning_rate": 3.788799738555228e-06, "loss": 1.534839391708374, "step": 11340 }, { "epoch": 2.064439792482024, "grad_norm": 24.875, "learning_rate": 3.7875230161140896e-06, "loss": 1.0633103847503662, "step": 11342 }, { "epoch": 2.0648038591062163, "grad_norm": 7.5625, "learning_rate": 3.786246618344984e-06, "loss": 1.4124540090560913, "step": 11344 }, { "epoch": 2.0651679257304085, "grad_norm": 13.75, "learning_rate": 3.784970545435217e-06, "loss": 1.5882494449615479, "step": 11346 }, { "epoch": 2.0655319923546007, "grad_norm": 10.9375, "learning_rate": 3.783694797572043e-06, "loss": 1.3247013092041016, "step": 11348 }, { "epoch": 2.065896058978793, "grad_norm": 20.25, "learning_rate": 3.7824193749426784e-06, "loss": 1.5170966386795044, "step": 11350 }, { "epoch": 2.0662601256029856, "grad_norm": 10.1875, "learning_rate": 3.78114427773428e-06, "loss": 1.1787314414978027, "step": 11352 }, { "epoch": 2.0666241922271777, "grad_norm": 7.71875, "learning_rate": 3.7798695061339686e-06, "loss": 1.1644823551177979, "step": 11354 }, { "epoch": 2.06698825885137, "grad_norm": 3.453125, "learning_rate": 3.7785950603288114e-06, "loss": 1.359602689743042, "step": 11356 }, { "epoch": 2.067352325475562, "grad_norm": 4.96875, "learning_rate": 3.7773209405058226e-06, "loss": 0.8447359800338745, "step": 11358 }, { "epoch": 2.0677163920997543, "grad_norm": 15.4375, "learning_rate": 3.7760471468519822e-06, "loss": 1.4743925333023071, "step": 11360 }, { "epoch": 2.0680804587239465, "grad_norm": 26.375, "learning_rate": 3.7747736795542062e-06, "loss": 1.3202470541000366, "step": 11362 }, { "epoch": 2.0684445253481387, "grad_norm": 8.9375, "learning_rate": 3.7735005387993768e-06, "loss": 1.404314398765564, "step": 11364 }, { "epoch": 2.068808591972331, "grad_norm": 10.625, "learning_rate": 3.772227724774321e-06, "loss": 1.4834040403366089, "step": 11366 }, { "epoch": 2.069172658596523, "grad_norm": 15.125, "learning_rate": 3.770955237665816e-06, "loss": 1.7776987552642822, "step": 11368 }, { "epoch": 2.0695367252207153, "grad_norm": 11.25, "learning_rate": 3.7696830776605985e-06, "loss": 1.4459638595581055, "step": 11370 }, { "epoch": 2.0699007918449075, "grad_norm": 20.5, "learning_rate": 3.7684112449453484e-06, "loss": 1.6306496858596802, "step": 11372 }, { "epoch": 2.0702648584690997, "grad_norm": 5.34375, "learning_rate": 3.7671397397067056e-06, "loss": 1.1137031316757202, "step": 11374 }, { "epoch": 2.070628925093292, "grad_norm": 41.5, "learning_rate": 3.7658685621312587e-06, "loss": 0.5532140731811523, "step": 11376 }, { "epoch": 2.0709929917174845, "grad_norm": 9.4375, "learning_rate": 3.764597712405542e-06, "loss": 1.3796281814575195, "step": 11378 }, { "epoch": 2.0713570583416767, "grad_norm": 8.625, "learning_rate": 3.7633271907160556e-06, "loss": 1.3515645265579224, "step": 11380 }, { "epoch": 2.071721124965869, "grad_norm": 13.125, "learning_rate": 3.762056997249237e-06, "loss": 1.1346735954284668, "step": 11382 }, { "epoch": 2.072085191590061, "grad_norm": 12.5, "learning_rate": 3.7607871321914853e-06, "loss": 1.5071709156036377, "step": 11384 }, { "epoch": 2.0724492582142533, "grad_norm": 20.375, "learning_rate": 3.7595175957291486e-06, "loss": 1.382782220840454, "step": 11386 }, { "epoch": 2.0728133248384455, "grad_norm": 4.0625, "learning_rate": 3.758248388048522e-06, "loss": 0.8292750716209412, "step": 11388 }, { "epoch": 2.0731773914626377, "grad_norm": 29.875, "learning_rate": 3.756979509335863e-06, "loss": 1.8969202041625977, "step": 11390 }, { "epoch": 2.07354145808683, "grad_norm": 11.1875, "learning_rate": 3.7557109597773674e-06, "loss": 1.4979089498519897, "step": 11392 }, { "epoch": 2.073905524711022, "grad_norm": 19.375, "learning_rate": 3.7544427395591943e-06, "loss": 1.3307009935379028, "step": 11394 }, { "epoch": 2.0742695913352143, "grad_norm": 9.25, "learning_rate": 3.753174848867451e-06, "loss": 1.5596909523010254, "step": 11396 }, { "epoch": 2.0746336579594065, "grad_norm": 17.0, "learning_rate": 3.751907287888189e-06, "loss": 1.345757007598877, "step": 11398 }, { "epoch": 2.0749977245835987, "grad_norm": 13.5, "learning_rate": 3.750640056807426e-06, "loss": 1.5483698844909668, "step": 11400 }, { "epoch": 2.075361791207791, "grad_norm": 7.40625, "learning_rate": 3.7493731558111147e-06, "loss": 1.0907464027404785, "step": 11402 }, { "epoch": 2.075725857831983, "grad_norm": 33.25, "learning_rate": 3.748106585085174e-06, "loss": 1.285001277923584, "step": 11404 }, { "epoch": 2.0760899244561757, "grad_norm": 30.5, "learning_rate": 3.7468403448154667e-06, "loss": 2.0893640518188477, "step": 11406 }, { "epoch": 2.076453991080368, "grad_norm": 11.875, "learning_rate": 3.745574435187805e-06, "loss": 1.3165132999420166, "step": 11408 }, { "epoch": 2.07681805770456, "grad_norm": 7.28125, "learning_rate": 3.7443088563879617e-06, "loss": 1.2641518115997314, "step": 11410 }, { "epoch": 2.0771821243287523, "grad_norm": 17.75, "learning_rate": 3.7430436086016486e-06, "loss": 1.3984546661376953, "step": 11412 }, { "epoch": 2.0775461909529445, "grad_norm": 10.9375, "learning_rate": 3.7417786920145415e-06, "loss": 1.1035141944885254, "step": 11414 }, { "epoch": 2.0779102575771367, "grad_norm": 12.0, "learning_rate": 3.7405141068122607e-06, "loss": 1.3482505083084106, "step": 11416 }, { "epoch": 2.078274324201329, "grad_norm": 22.875, "learning_rate": 3.7392498531803744e-06, "loss": 1.8221603631973267, "step": 11418 }, { "epoch": 2.078638390825521, "grad_norm": 10.9375, "learning_rate": 3.737985931304414e-06, "loss": 1.325646162033081, "step": 11420 }, { "epoch": 2.0790024574497132, "grad_norm": 98.0, "learning_rate": 3.7367223413698474e-06, "loss": 1.4047844409942627, "step": 11422 }, { "epoch": 2.0793665240739054, "grad_norm": 80.0, "learning_rate": 3.7354590835621067e-06, "loss": 1.4732098579406738, "step": 11424 }, { "epoch": 2.0797305906980976, "grad_norm": 9.6875, "learning_rate": 3.7341961580665696e-06, "loss": 1.3628209829330444, "step": 11426 }, { "epoch": 2.08009465732229, "grad_norm": 11.5, "learning_rate": 3.732933565068561e-06, "loss": 1.5524787902832031, "step": 11428 }, { "epoch": 2.080458723946482, "grad_norm": 4.875, "learning_rate": 3.7316713047533666e-06, "loss": 1.2368056774139404, "step": 11430 }, { "epoch": 2.080822790570674, "grad_norm": 16.5, "learning_rate": 3.730409377306213e-06, "loss": 2.109367847442627, "step": 11432 }, { "epoch": 2.081186857194867, "grad_norm": 9.1875, "learning_rate": 3.729147782912287e-06, "loss": 1.4427129030227661, "step": 11434 }, { "epoch": 2.081550923819059, "grad_norm": 3.015625, "learning_rate": 3.727886521756722e-06, "loss": 0.949647843837738, "step": 11436 }, { "epoch": 2.0819149904432512, "grad_norm": 16.375, "learning_rate": 3.7266255940245986e-06, "loss": 0.9821884036064148, "step": 11438 }, { "epoch": 2.0822790570674434, "grad_norm": 9.4375, "learning_rate": 3.72536499990096e-06, "loss": 1.3183737993240356, "step": 11440 }, { "epoch": 2.0826431236916356, "grad_norm": 87.0, "learning_rate": 3.724104739570786e-06, "loss": 1.8024661540985107, "step": 11442 }, { "epoch": 2.083007190315828, "grad_norm": 21.125, "learning_rate": 3.7228448132190186e-06, "loss": 1.5356311798095703, "step": 11444 }, { "epoch": 2.08337125694002, "grad_norm": 10.25, "learning_rate": 3.721585221030549e-06, "loss": 0.6040663123130798, "step": 11446 }, { "epoch": 2.083735323564212, "grad_norm": 11.75, "learning_rate": 3.720325963190211e-06, "loss": 1.276921272277832, "step": 11448 }, { "epoch": 2.0840993901884044, "grad_norm": 22.5, "learning_rate": 3.719067039882803e-06, "loss": 2.054856777191162, "step": 11450 }, { "epoch": 2.0844634568125966, "grad_norm": 9.125, "learning_rate": 3.717808451293059e-06, "loss": 1.3334741592407227, "step": 11452 }, { "epoch": 2.084827523436789, "grad_norm": 20.0, "learning_rate": 3.7165501976056783e-06, "loss": 1.9373116493225098, "step": 11454 }, { "epoch": 2.085191590060981, "grad_norm": 10.25, "learning_rate": 3.715292279005303e-06, "loss": 1.428931713104248, "step": 11456 }, { "epoch": 2.085555656685173, "grad_norm": 13.375, "learning_rate": 3.714034695676524e-06, "loss": 1.243409276008606, "step": 11458 }, { "epoch": 2.085919723309366, "grad_norm": 14.75, "learning_rate": 3.7127774478038917e-06, "loss": 0.7346692681312561, "step": 11460 }, { "epoch": 2.086283789933558, "grad_norm": 10.375, "learning_rate": 3.7115205355718976e-06, "loss": 1.1530519723892212, "step": 11462 }, { "epoch": 2.08664785655775, "grad_norm": 10.4375, "learning_rate": 3.7102639591649907e-06, "loss": 0.7723678350448608, "step": 11464 }, { "epoch": 2.0870119231819424, "grad_norm": 9.5, "learning_rate": 3.709007718767571e-06, "loss": 1.0693626403808594, "step": 11466 }, { "epoch": 2.0873759898061346, "grad_norm": 16.375, "learning_rate": 3.70775181456398e-06, "loss": 1.3240866661071777, "step": 11468 }, { "epoch": 2.087740056430327, "grad_norm": 10.625, "learning_rate": 3.706496246738525e-06, "loss": 1.3641058206558228, "step": 11470 }, { "epoch": 2.088104123054519, "grad_norm": 110.5, "learning_rate": 3.7052410154754463e-06, "loss": 1.300267219543457, "step": 11472 }, { "epoch": 2.088468189678711, "grad_norm": 27.25, "learning_rate": 3.703986120958951e-06, "loss": 1.1412022113800049, "step": 11474 }, { "epoch": 2.0888322563029034, "grad_norm": 12.9375, "learning_rate": 3.702731563373189e-06, "loss": 1.4686508178710938, "step": 11476 }, { "epoch": 2.0891963229270956, "grad_norm": 8.75, "learning_rate": 3.7014773429022576e-06, "loss": 1.3583439588546753, "step": 11478 }, { "epoch": 2.0895603895512878, "grad_norm": 18.5, "learning_rate": 3.7002234597302134e-06, "loss": 2.0067975521087646, "step": 11480 }, { "epoch": 2.08992445617548, "grad_norm": 199.0, "learning_rate": 3.6989699140410527e-06, "loss": 1.7385756969451904, "step": 11482 }, { "epoch": 2.090288522799672, "grad_norm": 14.25, "learning_rate": 3.6977167060187347e-06, "loss": 1.9352350234985352, "step": 11484 }, { "epoch": 2.090652589423865, "grad_norm": 20.875, "learning_rate": 3.6964638358471603e-06, "loss": 0.7607420086860657, "step": 11486 }, { "epoch": 2.091016656048057, "grad_norm": 5.78125, "learning_rate": 3.69521130371018e-06, "loss": 1.0673882961273193, "step": 11488 }, { "epoch": 2.091380722672249, "grad_norm": 15.75, "learning_rate": 3.6939591097916035e-06, "loss": 1.2340679168701172, "step": 11490 }, { "epoch": 2.0917447892964414, "grad_norm": 3.9375, "learning_rate": 3.6927072542751786e-06, "loss": 0.9380825757980347, "step": 11492 }, { "epoch": 2.0921088559206336, "grad_norm": 15.875, "learning_rate": 3.6914557373446158e-06, "loss": 1.5175800323486328, "step": 11494 }, { "epoch": 2.0924729225448258, "grad_norm": 127.5, "learning_rate": 3.6902045591835685e-06, "loss": 0.7016688585281372, "step": 11496 }, { "epoch": 2.092836989169018, "grad_norm": 18.125, "learning_rate": 3.6889537199756385e-06, "loss": 1.7808164358139038, "step": 11498 }, { "epoch": 2.09320105579321, "grad_norm": 13.375, "learning_rate": 3.687703219904388e-06, "loss": 1.443461298942566, "step": 11500 }, { "epoch": 2.0935651224174023, "grad_norm": 23.0, "learning_rate": 3.686453059153315e-06, "loss": 1.4294456243515015, "step": 11502 }, { "epoch": 2.0939291890415945, "grad_norm": 15.8125, "learning_rate": 3.6852032379058815e-06, "loss": 0.9319020509719849, "step": 11504 }, { "epoch": 2.0942932556657867, "grad_norm": 53.0, "learning_rate": 3.683953756345493e-06, "loss": 1.5301628112792969, "step": 11506 }, { "epoch": 2.094657322289979, "grad_norm": 25.375, "learning_rate": 3.682704614655502e-06, "loss": 1.4465856552124023, "step": 11508 }, { "epoch": 2.095021388914171, "grad_norm": 212.0, "learning_rate": 3.6814558130192212e-06, "loss": 1.5864955186843872, "step": 11510 }, { "epoch": 2.0953854555383633, "grad_norm": 18.75, "learning_rate": 3.6802073516199e-06, "loss": 2.0906662940979004, "step": 11512 }, { "epoch": 2.095749522162556, "grad_norm": 25.375, "learning_rate": 3.67895923064075e-06, "loss": 1.0467870235443115, "step": 11514 }, { "epoch": 2.096113588786748, "grad_norm": 9.0, "learning_rate": 3.677711450264928e-06, "loss": 1.5268776416778564, "step": 11516 }, { "epoch": 2.0964776554109403, "grad_norm": 12.625, "learning_rate": 3.6764640106755363e-06, "loss": 1.3490040302276611, "step": 11518 }, { "epoch": 2.0968417220351325, "grad_norm": 6.3125, "learning_rate": 3.675216912055638e-06, "loss": 1.0428179502487183, "step": 11520 }, { "epoch": 2.0972057886593247, "grad_norm": 4.46875, "learning_rate": 3.6739701545882333e-06, "loss": 1.1493134498596191, "step": 11522 }, { "epoch": 2.097569855283517, "grad_norm": 29.25, "learning_rate": 3.672723738456283e-06, "loss": 1.2034779787063599, "step": 11524 }, { "epoch": 2.097933921907709, "grad_norm": 7.46875, "learning_rate": 3.6714776638426935e-06, "loss": 1.3758158683776855, "step": 11526 }, { "epoch": 2.0982979885319013, "grad_norm": 7.5, "learning_rate": 3.670231930930318e-06, "loss": 1.081048846244812, "step": 11528 }, { "epoch": 2.0986620551560935, "grad_norm": 3.390625, "learning_rate": 3.6689865399019676e-06, "loss": 1.2714335918426514, "step": 11530 }, { "epoch": 2.0990261217802857, "grad_norm": 11.0, "learning_rate": 3.667741490940393e-06, "loss": 1.5025876760482788, "step": 11532 }, { "epoch": 2.099390188404478, "grad_norm": 3.53125, "learning_rate": 3.666496784228304e-06, "loss": 1.409659504890442, "step": 11534 }, { "epoch": 2.09975425502867, "grad_norm": 2.8125, "learning_rate": 3.6652524199483565e-06, "loss": 1.1324552297592163, "step": 11536 }, { "epoch": 2.1001183216528623, "grad_norm": 19.875, "learning_rate": 3.6640083982831514e-06, "loss": 1.2281301021575928, "step": 11538 }, { "epoch": 2.100482388277055, "grad_norm": 63.25, "learning_rate": 3.6627647194152498e-06, "loss": 0.4560912847518921, "step": 11540 }, { "epoch": 2.100846454901247, "grad_norm": 16.875, "learning_rate": 3.661521383527151e-06, "loss": 1.259648323059082, "step": 11542 }, { "epoch": 2.1012105215254393, "grad_norm": 11.875, "learning_rate": 3.6602783908013136e-06, "loss": 1.769161581993103, "step": 11544 }, { "epoch": 2.1015745881496315, "grad_norm": 10.3125, "learning_rate": 3.6590357414201415e-06, "loss": 1.790071964263916, "step": 11546 }, { "epoch": 2.1019386547738237, "grad_norm": 12.75, "learning_rate": 3.6577934355659844e-06, "loss": 1.2681019306182861, "step": 11548 }, { "epoch": 2.102302721398016, "grad_norm": 25.5, "learning_rate": 3.6565514734211515e-06, "loss": 1.7064169645309448, "step": 11550 }, { "epoch": 2.102666788022208, "grad_norm": 28.5, "learning_rate": 3.6553098551678902e-06, "loss": 0.9402908682823181, "step": 11552 }, { "epoch": 2.1030308546464003, "grad_norm": 11.75, "learning_rate": 3.654068580988406e-06, "loss": 1.2075145244598389, "step": 11554 }, { "epoch": 2.1033949212705925, "grad_norm": 12.625, "learning_rate": 3.6528276510648527e-06, "loss": 1.2668508291244507, "step": 11556 }, { "epoch": 2.1037589878947847, "grad_norm": 14.6875, "learning_rate": 3.6515870655793255e-06, "loss": 0.8139492869377136, "step": 11558 }, { "epoch": 2.104123054518977, "grad_norm": 15.5, "learning_rate": 3.650346824713883e-06, "loss": 1.3122189044952393, "step": 11560 }, { "epoch": 2.104487121143169, "grad_norm": 10.3125, "learning_rate": 3.649106928650518e-06, "loss": 1.3857519626617432, "step": 11562 }, { "epoch": 2.1048511877673612, "grad_norm": 14.5625, "learning_rate": 3.647867377571186e-06, "loss": 1.3880122900009155, "step": 11564 }, { "epoch": 2.1052152543915534, "grad_norm": 11.0625, "learning_rate": 3.6466281716577844e-06, "loss": 1.6191080808639526, "step": 11566 }, { "epoch": 2.105579321015746, "grad_norm": 26.125, "learning_rate": 3.6453893110921595e-06, "loss": 2.107329845428467, "step": 11568 }, { "epoch": 2.1059433876399383, "grad_norm": 12.6875, "learning_rate": 3.6441507960561134e-06, "loss": 1.3498234748840332, "step": 11570 }, { "epoch": 2.1063074542641305, "grad_norm": 10.0625, "learning_rate": 3.6429126267313873e-06, "loss": 1.5198556184768677, "step": 11572 }, { "epoch": 2.1066715208883227, "grad_norm": 21.125, "learning_rate": 3.6416748032996824e-06, "loss": 1.3618147373199463, "step": 11574 }, { "epoch": 2.107035587512515, "grad_norm": 27.375, "learning_rate": 3.640437325942644e-06, "loss": 1.2114366292953491, "step": 11576 }, { "epoch": 2.107399654136707, "grad_norm": 38.75, "learning_rate": 3.639200194841863e-06, "loss": 1.0727523565292358, "step": 11578 }, { "epoch": 2.1077637207608992, "grad_norm": 9.3125, "learning_rate": 3.6379634101788885e-06, "loss": 1.446696162223816, "step": 11580 }, { "epoch": 2.1081277873850914, "grad_norm": 9.4375, "learning_rate": 3.6367269721352083e-06, "loss": 1.2902332544326782, "step": 11582 }, { "epoch": 2.1084918540092836, "grad_norm": 15.125, "learning_rate": 3.635490880892269e-06, "loss": 1.6324158906936646, "step": 11584 }, { "epoch": 2.108855920633476, "grad_norm": 24.75, "learning_rate": 3.6342551366314618e-06, "loss": 1.7397615909576416, "step": 11586 }, { "epoch": 2.109219987257668, "grad_norm": 14.4375, "learning_rate": 3.633019739534123e-06, "loss": 1.2016204595565796, "step": 11588 }, { "epoch": 2.10958405388186, "grad_norm": 7.15625, "learning_rate": 3.6317846897815467e-06, "loss": 1.2775236368179321, "step": 11590 }, { "epoch": 2.1099481205060524, "grad_norm": 9.9375, "learning_rate": 3.6305499875549675e-06, "loss": 1.326016902923584, "step": 11592 }, { "epoch": 2.110312187130245, "grad_norm": 9.4375, "learning_rate": 3.629315633035577e-06, "loss": 1.3310823440551758, "step": 11594 }, { "epoch": 2.1106762537544372, "grad_norm": 11.1875, "learning_rate": 3.628081626404511e-06, "loss": 1.4421488046646118, "step": 11596 }, { "epoch": 2.1110403203786294, "grad_norm": 8.625, "learning_rate": 3.6268479678428505e-06, "loss": 1.3825407028198242, "step": 11598 }, { "epoch": 2.1114043870028216, "grad_norm": 7.5625, "learning_rate": 3.6256146575316366e-06, "loss": 1.1559851169586182, "step": 11600 }, { "epoch": 2.111768453627014, "grad_norm": 7.3125, "learning_rate": 3.624381695651846e-06, "loss": 1.1735668182373047, "step": 11602 }, { "epoch": 2.112132520251206, "grad_norm": 11.375, "learning_rate": 3.623149082384415e-06, "loss": 1.358276605606079, "step": 11604 }, { "epoch": 2.112496586875398, "grad_norm": 6.34375, "learning_rate": 3.6219168179102253e-06, "loss": 1.1880605220794678, "step": 11606 }, { "epoch": 2.1128606534995904, "grad_norm": 14.625, "learning_rate": 3.6206849024101033e-06, "loss": 1.2851117849349976, "step": 11608 }, { "epoch": 2.1132247201237826, "grad_norm": 20.5, "learning_rate": 3.619453336064831e-06, "loss": 1.5145275592803955, "step": 11610 }, { "epoch": 2.113588786747975, "grad_norm": 19.75, "learning_rate": 3.6182221190551315e-06, "loss": 1.297667384147644, "step": 11612 }, { "epoch": 2.113952853372167, "grad_norm": 16.25, "learning_rate": 3.616991251561685e-06, "loss": 1.5837275981903076, "step": 11614 }, { "epoch": 2.114316919996359, "grad_norm": 30.875, "learning_rate": 3.6157607337651163e-06, "loss": 1.446892261505127, "step": 11616 }, { "epoch": 2.1146809866205514, "grad_norm": 7.84375, "learning_rate": 3.6145305658459946e-06, "loss": 1.5234527587890625, "step": 11618 }, { "epoch": 2.115045053244744, "grad_norm": 9.1875, "learning_rate": 3.6133007479848474e-06, "loss": 1.3599328994750977, "step": 11620 }, { "epoch": 2.115409119868936, "grad_norm": 52.25, "learning_rate": 3.612071280362141e-06, "loss": 1.804826259613037, "step": 11622 }, { "epoch": 2.1157731864931284, "grad_norm": 8.125, "learning_rate": 3.6108421631582973e-06, "loss": 1.3728814125061035, "step": 11624 }, { "epoch": 2.1161372531173206, "grad_norm": 16.125, "learning_rate": 3.609613396553686e-06, "loss": 1.414371132850647, "step": 11626 }, { "epoch": 2.116501319741513, "grad_norm": 12.1875, "learning_rate": 3.608384980728618e-06, "loss": 1.5621743202209473, "step": 11628 }, { "epoch": 2.116865386365705, "grad_norm": 12.6875, "learning_rate": 3.6071569158633646e-06, "loss": 1.4366450309753418, "step": 11630 }, { "epoch": 2.117229452989897, "grad_norm": 11.1875, "learning_rate": 3.6059292021381336e-06, "loss": 1.2083649635314941, "step": 11632 }, { "epoch": 2.1175935196140894, "grad_norm": 7.59375, "learning_rate": 3.6047018397330913e-06, "loss": 1.3041483163833618, "step": 11634 }, { "epoch": 2.1179575862382816, "grad_norm": 19.125, "learning_rate": 3.6034748288283483e-06, "loss": 1.3927923440933228, "step": 11636 }, { "epoch": 2.1183216528624738, "grad_norm": 5.71875, "learning_rate": 3.602248169603959e-06, "loss": 0.922240674495697, "step": 11638 }, { "epoch": 2.118685719486666, "grad_norm": 14.1875, "learning_rate": 3.6010218622399363e-06, "loss": 1.390246033668518, "step": 11640 }, { "epoch": 2.119049786110858, "grad_norm": 15.875, "learning_rate": 3.5997959069162305e-06, "loss": 2.0122480392456055, "step": 11642 }, { "epoch": 2.1194138527350503, "grad_norm": 22.875, "learning_rate": 3.5985703038127494e-06, "loss": 0.9682165384292603, "step": 11644 }, { "epoch": 2.1197779193592425, "grad_norm": 10.3125, "learning_rate": 3.5973450531093453e-06, "loss": 0.9166086912155151, "step": 11646 }, { "epoch": 2.120141985983435, "grad_norm": 14.125, "learning_rate": 3.596120154985815e-06, "loss": 0.8686960339546204, "step": 11648 }, { "epoch": 2.1205060526076274, "grad_norm": 6.46875, "learning_rate": 3.5948956096219125e-06, "loss": 0.9494041204452515, "step": 11650 }, { "epoch": 2.1208701192318196, "grad_norm": 11.625, "learning_rate": 3.59367141719733e-06, "loss": 1.3254351615905762, "step": 11652 }, { "epoch": 2.1212341858560118, "grad_norm": 13.3125, "learning_rate": 3.5924475778917154e-06, "loss": 1.3369855880737305, "step": 11654 }, { "epoch": 2.121598252480204, "grad_norm": 13.0625, "learning_rate": 3.5912240918846644e-06, "loss": 1.413733720779419, "step": 11656 }, { "epoch": 2.121962319104396, "grad_norm": 8.1875, "learning_rate": 3.590000959355712e-06, "loss": 1.4298150539398193, "step": 11658 }, { "epoch": 2.1223263857285883, "grad_norm": 30.125, "learning_rate": 3.5887781804843558e-06, "loss": 2.0864064693450928, "step": 11660 }, { "epoch": 2.1226904523527805, "grad_norm": 21.875, "learning_rate": 3.5875557554500264e-06, "loss": 1.1395196914672852, "step": 11662 }, { "epoch": 2.1230545189769727, "grad_norm": 22.5, "learning_rate": 3.5863336844321144e-06, "loss": 1.1446491479873657, "step": 11664 }, { "epoch": 2.123418585601165, "grad_norm": 24.75, "learning_rate": 3.585111967609954e-06, "loss": 1.2273386716842651, "step": 11666 }, { "epoch": 2.123782652225357, "grad_norm": 9.875, "learning_rate": 3.5838906051628233e-06, "loss": 1.3664453029632568, "step": 11668 }, { "epoch": 2.1241467188495493, "grad_norm": 14.3125, "learning_rate": 3.5826695972699568e-06, "loss": 1.2502048015594482, "step": 11670 }, { "epoch": 2.1245107854737415, "grad_norm": 2.921875, "learning_rate": 3.5814489441105283e-06, "loss": 0.9418635368347168, "step": 11672 }, { "epoch": 2.1248748520979337, "grad_norm": 14.1875, "learning_rate": 3.5802286458636666e-06, "loss": 1.3807302713394165, "step": 11674 }, { "epoch": 2.1252389187221263, "grad_norm": 12.625, "learning_rate": 3.5790087027084456e-06, "loss": 1.4288439750671387, "step": 11676 }, { "epoch": 2.1256029853463185, "grad_norm": 10.0, "learning_rate": 3.577789114823884e-06, "loss": 1.3997688293457031, "step": 11678 }, { "epoch": 2.1259670519705107, "grad_norm": 4.0, "learning_rate": 3.5765698823889563e-06, "loss": 1.1656535863876343, "step": 11680 }, { "epoch": 2.126331118594703, "grad_norm": 8.4375, "learning_rate": 3.5753510055825737e-06, "loss": 1.1855342388153076, "step": 11682 }, { "epoch": 2.126695185218895, "grad_norm": 15.8125, "learning_rate": 3.574132484583606e-06, "loss": 1.4016551971435547, "step": 11684 }, { "epoch": 2.1270592518430873, "grad_norm": 6.03125, "learning_rate": 3.5729143195708677e-06, "loss": 1.3941712379455566, "step": 11686 }, { "epoch": 2.1274233184672795, "grad_norm": 36.0, "learning_rate": 3.5716965107231127e-06, "loss": 1.9570362567901611, "step": 11688 }, { "epoch": 2.1277873850914717, "grad_norm": 16.5, "learning_rate": 3.5704790582190575e-06, "loss": 1.331141471862793, "step": 11690 }, { "epoch": 2.128151451715664, "grad_norm": 6.625, "learning_rate": 3.5692619622373515e-06, "loss": 1.0926845073699951, "step": 11692 }, { "epoch": 2.128515518339856, "grad_norm": 8.4375, "learning_rate": 3.568045222956602e-06, "loss": 1.224328875541687, "step": 11694 }, { "epoch": 2.1288795849640483, "grad_norm": 13.125, "learning_rate": 3.566828840555363e-06, "loss": 1.718050479888916, "step": 11696 }, { "epoch": 2.1292436515882405, "grad_norm": 2.46875, "learning_rate": 3.5656128152121273e-06, "loss": 0.9652382135391235, "step": 11698 }, { "epoch": 2.1296077182124327, "grad_norm": 8.125, "learning_rate": 3.564397147105348e-06, "loss": 1.2943415641784668, "step": 11700 }, { "epoch": 2.1299717848366253, "grad_norm": 13.9375, "learning_rate": 3.563181836413414e-06, "loss": 0.5209476351737976, "step": 11702 }, { "epoch": 2.1303358514608175, "grad_norm": 10.9375, "learning_rate": 3.5619668833146712e-06, "loss": 1.2705987691879272, "step": 11704 }, { "epoch": 2.1306999180850097, "grad_norm": 18.5, "learning_rate": 3.56075228798741e-06, "loss": 0.9336791634559631, "step": 11706 }, { "epoch": 2.131063984709202, "grad_norm": 12.5625, "learning_rate": 3.5595380506098613e-06, "loss": 1.8572324514389038, "step": 11708 }, { "epoch": 2.131428051333394, "grad_norm": 22.5, "learning_rate": 3.558324171360217e-06, "loss": 1.4580127000808716, "step": 11710 }, { "epoch": 2.1317921179575863, "grad_norm": 237.0, "learning_rate": 3.557110650416602e-06, "loss": 1.339434266090393, "step": 11712 }, { "epoch": 2.1321561845817785, "grad_norm": 12.75, "learning_rate": 3.5558974879571007e-06, "loss": 1.6232883930206299, "step": 11714 }, { "epoch": 2.1325202512059707, "grad_norm": 22.0, "learning_rate": 3.5546846841597394e-06, "loss": 1.7452282905578613, "step": 11716 }, { "epoch": 2.132884317830163, "grad_norm": 11.9375, "learning_rate": 3.5534722392024868e-06, "loss": 0.8611134886741638, "step": 11718 }, { "epoch": 2.133248384454355, "grad_norm": 6.84375, "learning_rate": 3.552260153263272e-06, "loss": 1.0348193645477295, "step": 11720 }, { "epoch": 2.1336124510785472, "grad_norm": 13.375, "learning_rate": 3.5510484265199568e-06, "loss": 1.2420247793197632, "step": 11722 }, { "epoch": 2.1339765177027394, "grad_norm": 14.25, "learning_rate": 3.5498370591503616e-06, "loss": 1.4249308109283447, "step": 11724 }, { "epoch": 2.1343405843269316, "grad_norm": 8.75, "learning_rate": 3.5486260513322503e-06, "loss": 1.3123869895935059, "step": 11726 }, { "epoch": 2.1347046509511243, "grad_norm": 16.25, "learning_rate": 3.547415403243328e-06, "loss": 1.9426798820495605, "step": 11728 }, { "epoch": 2.1350687175753165, "grad_norm": 11.1875, "learning_rate": 3.5462051150612587e-06, "loss": 1.3553457260131836, "step": 11730 }, { "epoch": 2.1354327841995087, "grad_norm": 7.125, "learning_rate": 3.544995186963642e-06, "loss": 1.3344693183898926, "step": 11732 }, { "epoch": 2.135796850823701, "grad_norm": 22.125, "learning_rate": 3.5437856191280327e-06, "loss": 0.5752114653587341, "step": 11734 }, { "epoch": 2.136160917447893, "grad_norm": 16.25, "learning_rate": 3.542576411731933e-06, "loss": 1.3905531167984009, "step": 11736 }, { "epoch": 2.1365249840720852, "grad_norm": 4.78125, "learning_rate": 3.5413675649527814e-06, "loss": 1.1650147438049316, "step": 11738 }, { "epoch": 2.1368890506962774, "grad_norm": 14.3125, "learning_rate": 3.5401590789679787e-06, "loss": 1.4482753276824951, "step": 11740 }, { "epoch": 2.1372531173204696, "grad_norm": 11.25, "learning_rate": 3.5389509539548596e-06, "loss": 1.3894133567810059, "step": 11742 }, { "epoch": 2.137617183944662, "grad_norm": 9.8125, "learning_rate": 3.5377431900907157e-06, "loss": 1.3424606323242188, "step": 11744 }, { "epoch": 2.137981250568854, "grad_norm": 6.25, "learning_rate": 3.5365357875527816e-06, "loss": 1.211737871170044, "step": 11746 }, { "epoch": 2.138345317193046, "grad_norm": 6.875, "learning_rate": 3.535328746518234e-06, "loss": 1.3907968997955322, "step": 11748 }, { "epoch": 2.1387093838172384, "grad_norm": 14.5, "learning_rate": 3.5341220671642074e-06, "loss": 1.0780372619628906, "step": 11750 }, { "epoch": 2.1390734504414306, "grad_norm": 6.90625, "learning_rate": 3.5329157496677715e-06, "loss": 1.3604546785354614, "step": 11752 }, { "epoch": 2.1394375170656232, "grad_norm": 11.0625, "learning_rate": 3.531709794205952e-06, "loss": 1.4022518396377563, "step": 11754 }, { "epoch": 2.1398015836898154, "grad_norm": 9.75, "learning_rate": 3.530504200955719e-06, "loss": 1.324486494064331, "step": 11756 }, { "epoch": 2.1401656503140076, "grad_norm": 13.8125, "learning_rate": 3.5292989700939835e-06, "loss": 1.3497986793518066, "step": 11758 }, { "epoch": 2.1405297169382, "grad_norm": 11.6875, "learning_rate": 3.528094101797614e-06, "loss": 1.386899471282959, "step": 11760 }, { "epoch": 2.140893783562392, "grad_norm": 11.0625, "learning_rate": 3.526889596243415e-06, "loss": 1.3024482727050781, "step": 11762 }, { "epoch": 2.141257850186584, "grad_norm": 18.125, "learning_rate": 3.525685453608145e-06, "loss": 1.3635355234146118, "step": 11764 }, { "epoch": 2.1416219168107764, "grad_norm": 59.25, "learning_rate": 3.5244816740685103e-06, "loss": 1.3361120223999023, "step": 11766 }, { "epoch": 2.1419859834349686, "grad_norm": 17.875, "learning_rate": 3.523278257801154e-06, "loss": 1.527967095375061, "step": 11768 }, { "epoch": 2.142350050059161, "grad_norm": 26.5, "learning_rate": 3.522075204982679e-06, "loss": 1.9064382314682007, "step": 11770 }, { "epoch": 2.142714116683353, "grad_norm": 24.125, "learning_rate": 3.5208725157896223e-06, "loss": 1.6158825159072876, "step": 11772 }, { "epoch": 2.143078183307545, "grad_norm": 7.1875, "learning_rate": 3.5196701903984786e-06, "loss": 1.1356252431869507, "step": 11774 }, { "epoch": 2.1434422499317374, "grad_norm": 10.75, "learning_rate": 3.518468228985684e-06, "loss": 1.0046056509017944, "step": 11776 }, { "epoch": 2.1438063165559296, "grad_norm": 10.1875, "learning_rate": 3.5172666317276178e-06, "loss": 1.4122179746627808, "step": 11778 }, { "epoch": 2.1441703831801218, "grad_norm": 18.625, "learning_rate": 3.516065398800614e-06, "loss": 1.3657464981079102, "step": 11780 }, { "epoch": 2.144534449804314, "grad_norm": 12.75, "learning_rate": 3.5148645303809436e-06, "loss": 1.243217945098877, "step": 11782 }, { "epoch": 2.1448985164285066, "grad_norm": 4.375, "learning_rate": 3.5136640266448336e-06, "loss": 1.1562645435333252, "step": 11784 }, { "epoch": 2.145262583052699, "grad_norm": 10.625, "learning_rate": 3.5124638877684527e-06, "loss": 2.1999475955963135, "step": 11786 }, { "epoch": 2.145626649676891, "grad_norm": 6.15625, "learning_rate": 3.5112641139279126e-06, "loss": 1.5270907878875732, "step": 11788 }, { "epoch": 2.145990716301083, "grad_norm": 2.90625, "learning_rate": 3.51006470529928e-06, "loss": 1.1781255006790161, "step": 11790 }, { "epoch": 2.1463547829252754, "grad_norm": 6.84375, "learning_rate": 3.508865662058558e-06, "loss": 1.3630359172821045, "step": 11792 }, { "epoch": 2.1467188495494676, "grad_norm": 72.5, "learning_rate": 3.507666984381707e-06, "loss": 0.9165368676185608, "step": 11794 }, { "epoch": 2.1470829161736598, "grad_norm": 33.5, "learning_rate": 3.5064686724446263e-06, "loss": 1.914183259010315, "step": 11796 }, { "epoch": 2.147446982797852, "grad_norm": 13.0625, "learning_rate": 3.50527072642316e-06, "loss": 1.9940767288208008, "step": 11798 }, { "epoch": 2.147811049422044, "grad_norm": 15.25, "learning_rate": 3.5040731464931064e-06, "loss": 0.7465848326683044, "step": 11800 }, { "epoch": 2.1481751160462363, "grad_norm": 25.25, "learning_rate": 3.502875932830201e-06, "loss": 1.5110869407653809, "step": 11802 }, { "epoch": 2.1485391826704285, "grad_norm": 13.4375, "learning_rate": 3.5016790856101345e-06, "loss": 1.272931456565857, "step": 11804 }, { "epoch": 2.1489032492946207, "grad_norm": 14.9375, "learning_rate": 3.500482605008538e-06, "loss": 1.331356406211853, "step": 11806 }, { "epoch": 2.149267315918813, "grad_norm": 18.5, "learning_rate": 3.4992864912009873e-06, "loss": 1.2928212881088257, "step": 11808 }, { "epoch": 2.1496313825430056, "grad_norm": 13.6875, "learning_rate": 3.498090744363012e-06, "loss": 1.4406646490097046, "step": 11810 }, { "epoch": 2.1499954491671978, "grad_norm": 11.4375, "learning_rate": 3.4968953646700777e-06, "loss": 1.5231435298919678, "step": 11812 }, { "epoch": 2.15035951579139, "grad_norm": 10.0625, "learning_rate": 3.495700352297606e-06, "loss": 1.1035821437835693, "step": 11814 }, { "epoch": 2.150723582415582, "grad_norm": 23.0, "learning_rate": 3.49450570742096e-06, "loss": 0.7823556661605835, "step": 11816 }, { "epoch": 2.1510876490397743, "grad_norm": 32.25, "learning_rate": 3.4933114302154448e-06, "loss": 1.5058984756469727, "step": 11818 }, { "epoch": 2.1514517156639665, "grad_norm": 3.890625, "learning_rate": 3.492117520856322e-06, "loss": 1.0921863317489624, "step": 11820 }, { "epoch": 2.1518157822881587, "grad_norm": 47.25, "learning_rate": 3.4909239795187854e-06, "loss": 1.3037811517715454, "step": 11822 }, { "epoch": 2.152179848912351, "grad_norm": 5.53125, "learning_rate": 3.489730806377988e-06, "loss": 0.9490963816642761, "step": 11824 }, { "epoch": 2.152543915536543, "grad_norm": 16.875, "learning_rate": 3.4885380016090237e-06, "loss": 1.9181374311447144, "step": 11826 }, { "epoch": 2.1529079821607353, "grad_norm": 72.0, "learning_rate": 3.487345565386926e-06, "loss": 1.4276347160339355, "step": 11828 }, { "epoch": 2.1532720487849275, "grad_norm": 11.4375, "learning_rate": 3.486153497886687e-06, "loss": 1.294939637184143, "step": 11830 }, { "epoch": 2.1536361154091197, "grad_norm": 8.625, "learning_rate": 3.484961799283232e-06, "loss": 1.2862040996551514, "step": 11832 }, { "epoch": 2.154000182033312, "grad_norm": 12.0, "learning_rate": 3.4837704697514405e-06, "loss": 1.4528831243515015, "step": 11834 }, { "epoch": 2.1543642486575045, "grad_norm": 17.5, "learning_rate": 3.4825795094661375e-06, "loss": 1.5572094917297363, "step": 11836 }, { "epoch": 2.1547283152816967, "grad_norm": 16.875, "learning_rate": 3.4813889186020868e-06, "loss": 2.0158350467681885, "step": 11838 }, { "epoch": 2.155092381905889, "grad_norm": 7.25, "learning_rate": 3.4801986973340075e-06, "loss": 1.38032066822052, "step": 11840 }, { "epoch": 2.155456448530081, "grad_norm": 16.0, "learning_rate": 3.4790088458365546e-06, "loss": 1.3108739852905273, "step": 11842 }, { "epoch": 2.1558205151542733, "grad_norm": 17.375, "learning_rate": 3.4778193642843383e-06, "loss": 1.3640022277832031, "step": 11844 }, { "epoch": 2.1561845817784655, "grad_norm": 14.375, "learning_rate": 3.47663025285191e-06, "loss": 1.3018871545791626, "step": 11846 }, { "epoch": 2.1565486484026577, "grad_norm": 13.125, "learning_rate": 3.4754415117137643e-06, "loss": 1.208632230758667, "step": 11848 }, { "epoch": 2.15691271502685, "grad_norm": 15.9375, "learning_rate": 3.474253141044347e-06, "loss": 1.2062726020812988, "step": 11850 }, { "epoch": 2.157276781651042, "grad_norm": 15.3125, "learning_rate": 3.4730651410180426e-06, "loss": 0.43019193410873413, "step": 11852 }, { "epoch": 2.1576408482752343, "grad_norm": 7.0, "learning_rate": 3.47187751180919e-06, "loss": 1.3824466466903687, "step": 11854 }, { "epoch": 2.1580049148994265, "grad_norm": 5.8125, "learning_rate": 3.470690253592068e-06, "loss": 1.3821743726730347, "step": 11856 }, { "epoch": 2.1583689815236187, "grad_norm": 7.15625, "learning_rate": 3.4695033665408984e-06, "loss": 1.4098341464996338, "step": 11858 }, { "epoch": 2.158733048147811, "grad_norm": 7.5625, "learning_rate": 3.468316850829857e-06, "loss": 1.1751147508621216, "step": 11860 }, { "epoch": 2.1590971147720035, "grad_norm": 11.5625, "learning_rate": 3.467130706633055e-06, "loss": 1.5015373229980469, "step": 11862 }, { "epoch": 2.1594611813961957, "grad_norm": 8.625, "learning_rate": 3.4659449341245586e-06, "loss": 1.2517118453979492, "step": 11864 }, { "epoch": 2.159825248020388, "grad_norm": 15.875, "learning_rate": 3.4647595334783753e-06, "loss": 1.3400590419769287, "step": 11866 }, { "epoch": 2.16018931464458, "grad_norm": 14.3125, "learning_rate": 3.4635745048684523e-06, "loss": 1.5122504234313965, "step": 11868 }, { "epoch": 2.1605533812687723, "grad_norm": 27.875, "learning_rate": 3.4623898484686948e-06, "loss": 1.7282097339630127, "step": 11870 }, { "epoch": 2.1609174478929645, "grad_norm": 16.375, "learning_rate": 3.4612055644529397e-06, "loss": 1.5225062370300293, "step": 11872 }, { "epoch": 2.1612815145171567, "grad_norm": 16.5, "learning_rate": 3.4600216529949813e-06, "loss": 1.9339747428894043, "step": 11874 }, { "epoch": 2.161645581141349, "grad_norm": 8.375, "learning_rate": 3.4588381142685524e-06, "loss": 1.2551170587539673, "step": 11876 }, { "epoch": 2.162009647765541, "grad_norm": 2.84375, "learning_rate": 3.45765494844733e-06, "loss": 1.0581399202346802, "step": 11878 }, { "epoch": 2.1623737143897332, "grad_norm": 5.21875, "learning_rate": 3.4564721557049425e-06, "loss": 1.1101672649383545, "step": 11880 }, { "epoch": 2.1627377810139254, "grad_norm": 5.8125, "learning_rate": 3.4552897362149556e-06, "loss": 0.9227743148803711, "step": 11882 }, { "epoch": 2.1631018476381176, "grad_norm": 8.9375, "learning_rate": 3.4541076901508886e-06, "loss": 1.0068249702453613, "step": 11884 }, { "epoch": 2.16346591426231, "grad_norm": 8.8125, "learning_rate": 3.452926017686201e-06, "loss": 1.3254737854003906, "step": 11886 }, { "epoch": 2.163829980886502, "grad_norm": 9.25, "learning_rate": 3.4517447189942965e-06, "loss": 1.3896158933639526, "step": 11888 }, { "epoch": 2.1641940475106947, "grad_norm": 14.0, "learning_rate": 3.45056379424853e-06, "loss": 1.4383244514465332, "step": 11890 }, { "epoch": 2.164558114134887, "grad_norm": 10.875, "learning_rate": 3.4493832436221913e-06, "loss": 1.3949906826019287, "step": 11892 }, { "epoch": 2.164922180759079, "grad_norm": 9.9375, "learning_rate": 3.4482030672885258e-06, "loss": 1.3575925827026367, "step": 11894 }, { "epoch": 2.1652862473832712, "grad_norm": 10.125, "learning_rate": 3.4470232654207215e-06, "loss": 1.101464867591858, "step": 11896 }, { "epoch": 2.1656503140074634, "grad_norm": 7.1875, "learning_rate": 3.445843838191903e-06, "loss": 1.0390342473983765, "step": 11898 }, { "epoch": 2.1660143806316556, "grad_norm": 14.375, "learning_rate": 3.4446647857751523e-06, "loss": 0.9165394306182861, "step": 11900 }, { "epoch": 2.166378447255848, "grad_norm": 7.96875, "learning_rate": 3.443486108343487e-06, "loss": 1.411670446395874, "step": 11902 }, { "epoch": 2.16674251388004, "grad_norm": 20.875, "learning_rate": 3.4423078060698758e-06, "loss": 1.4992218017578125, "step": 11904 }, { "epoch": 2.167106580504232, "grad_norm": 32.75, "learning_rate": 3.4411298791272295e-06, "loss": 1.2359474897384644, "step": 11906 }, { "epoch": 2.1674706471284244, "grad_norm": 7.3125, "learning_rate": 3.4399523276884007e-06, "loss": 1.5338691473007202, "step": 11908 }, { "epoch": 2.1678347137526166, "grad_norm": 15.0, "learning_rate": 3.438775151926195e-06, "loss": 1.4390363693237305, "step": 11910 }, { "epoch": 2.168198780376809, "grad_norm": 21.25, "learning_rate": 3.4375983520133538e-06, "loss": 1.083348035812378, "step": 11912 }, { "epoch": 2.168562847001001, "grad_norm": 9.0, "learning_rate": 3.4364219281225713e-06, "loss": 1.3756667375564575, "step": 11914 }, { "epoch": 2.168926913625193, "grad_norm": 11.625, "learning_rate": 3.4352458804264834e-06, "loss": 1.4592182636260986, "step": 11916 }, { "epoch": 2.169290980249386, "grad_norm": 23.5, "learning_rate": 3.434070209097665e-06, "loss": 1.615593433380127, "step": 11918 }, { "epoch": 2.169655046873578, "grad_norm": 32.0, "learning_rate": 3.4328949143086475e-06, "loss": 1.8358324766159058, "step": 11920 }, { "epoch": 2.17001911349777, "grad_norm": 13.5, "learning_rate": 3.4317199962318954e-06, "loss": 1.3876359462738037, "step": 11922 }, { "epoch": 2.1703831801219624, "grad_norm": 7.46875, "learning_rate": 3.4305454550398265e-06, "loss": 1.1813454627990723, "step": 11924 }, { "epoch": 2.1707472467461546, "grad_norm": 35.75, "learning_rate": 3.4293712909048006e-06, "loss": 1.3358076810836792, "step": 11926 }, { "epoch": 2.171111313370347, "grad_norm": 13.5625, "learning_rate": 3.428197503999117e-06, "loss": 1.2706727981567383, "step": 11928 }, { "epoch": 2.171475379994539, "grad_norm": 13.375, "learning_rate": 3.4270240944950297e-06, "loss": 1.5030291080474854, "step": 11930 }, { "epoch": 2.171839446618731, "grad_norm": 15.25, "learning_rate": 3.425851062564727e-06, "loss": 1.4681525230407715, "step": 11932 }, { "epoch": 2.1722035132429234, "grad_norm": 25.5, "learning_rate": 3.4246784083803496e-06, "loss": 1.3972972631454468, "step": 11934 }, { "epoch": 2.1725675798671156, "grad_norm": 5.125, "learning_rate": 3.4235061321139807e-06, "loss": 1.0363470315933228, "step": 11936 }, { "epoch": 2.1729316464913078, "grad_norm": 9.9375, "learning_rate": 3.422334233937642e-06, "loss": 1.3285410404205322, "step": 11938 }, { "epoch": 2.1732957131155, "grad_norm": 18.375, "learning_rate": 3.4211627140233116e-06, "loss": 1.4808104038238525, "step": 11940 }, { "epoch": 2.173659779739692, "grad_norm": 10.3125, "learning_rate": 3.4199915725428984e-06, "loss": 1.3016339540481567, "step": 11942 }, { "epoch": 2.174023846363885, "grad_norm": 22.625, "learning_rate": 3.4188208096682673e-06, "loss": 1.4737329483032227, "step": 11944 }, { "epoch": 2.174387912988077, "grad_norm": 13.4375, "learning_rate": 3.417650425571224e-06, "loss": 1.5741899013519287, "step": 11946 }, { "epoch": 2.174751979612269, "grad_norm": 12.9375, "learning_rate": 3.4164804204235118e-06, "loss": 1.8713114261627197, "step": 11948 }, { "epoch": 2.1751160462364614, "grad_norm": 8.25, "learning_rate": 3.4153107943968313e-06, "loss": 1.374143362045288, "step": 11950 }, { "epoch": 2.1754801128606536, "grad_norm": 14.4375, "learning_rate": 3.4141415476628135e-06, "loss": 1.372848629951477, "step": 11952 }, { "epoch": 2.1758441794848458, "grad_norm": 39.75, "learning_rate": 3.412972680393046e-06, "loss": 1.4966922998428345, "step": 11954 }, { "epoch": 2.176208246109038, "grad_norm": 31.25, "learning_rate": 3.411804192759054e-06, "loss": 2.1731247901916504, "step": 11956 }, { "epoch": 2.17657231273323, "grad_norm": 6.5, "learning_rate": 3.410636084932305e-06, "loss": 1.2354437112808228, "step": 11958 }, { "epoch": 2.1769363793574223, "grad_norm": 10.1875, "learning_rate": 3.40946835708422e-06, "loss": 1.1443841457366943, "step": 11960 }, { "epoch": 2.1773004459816145, "grad_norm": 9.625, "learning_rate": 3.4083010093861524e-06, "loss": 1.3149747848510742, "step": 11962 }, { "epoch": 2.1776645126058067, "grad_norm": 12.5, "learning_rate": 3.407134042009409e-06, "loss": 1.5538043975830078, "step": 11964 }, { "epoch": 2.178028579229999, "grad_norm": 31.125, "learning_rate": 3.4059674551252396e-06, "loss": 1.489409327507019, "step": 11966 }, { "epoch": 2.178392645854191, "grad_norm": 11.125, "learning_rate": 3.4048012489048297e-06, "loss": 1.757889986038208, "step": 11968 }, { "epoch": 2.1787567124783838, "grad_norm": 7.53125, "learning_rate": 3.4036354235193224e-06, "loss": 1.307370901107788, "step": 11970 }, { "epoch": 2.179120779102576, "grad_norm": 8.125, "learning_rate": 3.4024699791397914e-06, "loss": 1.2587099075317383, "step": 11972 }, { "epoch": 2.179484845726768, "grad_norm": 11.875, "learning_rate": 3.4013049159372658e-06, "loss": 1.131037712097168, "step": 11974 }, { "epoch": 2.1798489123509603, "grad_norm": 96.0, "learning_rate": 3.400140234082714e-06, "loss": 1.0939170122146606, "step": 11976 }, { "epoch": 2.1802129789751525, "grad_norm": 35.25, "learning_rate": 3.3989759337470422e-06, "loss": 1.1858646869659424, "step": 11978 }, { "epoch": 2.1805770455993447, "grad_norm": 10.625, "learning_rate": 3.397812015101115e-06, "loss": 1.44175386428833, "step": 11980 }, { "epoch": 2.180941112223537, "grad_norm": 39.75, "learning_rate": 3.396648478315726e-06, "loss": 1.3288320302963257, "step": 11982 }, { "epoch": 2.181305178847729, "grad_norm": 14.25, "learning_rate": 3.3954853235616237e-06, "loss": 1.5406124591827393, "step": 11984 }, { "epoch": 2.1816692454719213, "grad_norm": 15.625, "learning_rate": 3.394322551009497e-06, "loss": 2.010634183883667, "step": 11986 }, { "epoch": 2.1820333120961135, "grad_norm": 8.625, "learning_rate": 3.393160160829972e-06, "loss": 1.1759456396102905, "step": 11988 }, { "epoch": 2.1823973787203057, "grad_norm": 9.5, "learning_rate": 3.3919981531936324e-06, "loss": 1.1139094829559326, "step": 11990 }, { "epoch": 2.182761445344498, "grad_norm": 11.125, "learning_rate": 3.3908365282709914e-06, "loss": 0.8248730301856995, "step": 11992 }, { "epoch": 2.18312551196869, "grad_norm": 10.0, "learning_rate": 3.3896752862325176e-06, "loss": 0.9815011620521545, "step": 11994 }, { "epoch": 2.1834895785928827, "grad_norm": 5.46875, "learning_rate": 3.3885144272486175e-06, "loss": 0.06556104123592377, "step": 11996 }, { "epoch": 2.183853645217075, "grad_norm": 12.75, "learning_rate": 3.3873539514896404e-06, "loss": 0.4528769552707672, "step": 11998 }, { "epoch": 2.184217711841267, "grad_norm": 17.375, "learning_rate": 3.386193859125884e-06, "loss": 1.5093042850494385, "step": 12000 }, { "epoch": 2.1845817784654593, "grad_norm": 19.75, "learning_rate": 3.3850341503275843e-06, "loss": 1.704003930091858, "step": 12002 }, { "epoch": 2.1849458450896515, "grad_norm": 8.1875, "learning_rate": 3.383874825264926e-06, "loss": 1.5869312286376953, "step": 12004 }, { "epoch": 2.1853099117138437, "grad_norm": 22.125, "learning_rate": 3.3827158841080363e-06, "loss": 2.0539438724517822, "step": 12006 }, { "epoch": 2.185673978338036, "grad_norm": 9.75, "learning_rate": 3.381557327026982e-06, "loss": 1.730271577835083, "step": 12008 }, { "epoch": 2.186038044962228, "grad_norm": 15.8125, "learning_rate": 3.38039915419178e-06, "loss": 1.1404756307601929, "step": 12010 }, { "epoch": 2.1864021115864203, "grad_norm": 14.0625, "learning_rate": 3.3792413657723833e-06, "loss": 1.7714409828186035, "step": 12012 }, { "epoch": 2.1867661782106125, "grad_norm": 10.0625, "learning_rate": 3.3780839619386968e-06, "loss": 1.3931514024734497, "step": 12014 }, { "epoch": 2.1871302448348047, "grad_norm": 15.1875, "learning_rate": 3.3769269428605646e-06, "loss": 1.1737124919891357, "step": 12016 }, { "epoch": 2.187494311458997, "grad_norm": 9.6875, "learning_rate": 3.37577030870777e-06, "loss": 1.6138839721679688, "step": 12018 }, { "epoch": 2.187858378083189, "grad_norm": 32.5, "learning_rate": 3.374614059650051e-06, "loss": 1.4430218935012817, "step": 12020 }, { "epoch": 2.1882224447073813, "grad_norm": 9.0, "learning_rate": 3.3734581958570754e-06, "loss": 1.4185245037078857, "step": 12022 }, { "epoch": 2.1885865113315734, "grad_norm": 10.25, "learning_rate": 3.372302717498467e-06, "loss": 1.3078869581222534, "step": 12024 }, { "epoch": 2.188950577955766, "grad_norm": 14.9375, "learning_rate": 3.371147624743787e-06, "loss": 1.4749256372451782, "step": 12026 }, { "epoch": 2.1893146445799583, "grad_norm": 17.25, "learning_rate": 3.369992917762536e-06, "loss": 1.4176750183105469, "step": 12028 }, { "epoch": 2.1896787112041505, "grad_norm": 13.6875, "learning_rate": 3.368838596724169e-06, "loss": 1.318245530128479, "step": 12030 }, { "epoch": 2.1900427778283427, "grad_norm": 9.6875, "learning_rate": 3.3676846617980724e-06, "loss": 1.1626064777374268, "step": 12032 }, { "epoch": 2.190406844452535, "grad_norm": 10.0625, "learning_rate": 3.366531113153585e-06, "loss": 1.1749370098114014, "step": 12034 }, { "epoch": 2.190770911076727, "grad_norm": 20.25, "learning_rate": 3.365377950959985e-06, "loss": 1.3727855682373047, "step": 12036 }, { "epoch": 2.1911349777009193, "grad_norm": 10.0625, "learning_rate": 3.3642251753864913e-06, "loss": 1.436610460281372, "step": 12038 }, { "epoch": 2.1914990443251114, "grad_norm": 13.0625, "learning_rate": 3.3630727866022737e-06, "loss": 1.3140615224838257, "step": 12040 }, { "epoch": 2.1918631109493036, "grad_norm": 12.125, "learning_rate": 3.3619207847764357e-06, "loss": 1.3537803888320923, "step": 12042 }, { "epoch": 2.192227177573496, "grad_norm": 13.5, "learning_rate": 3.360769170078033e-06, "loss": 1.4226624965667725, "step": 12044 }, { "epoch": 2.192591244197688, "grad_norm": 27.5, "learning_rate": 3.3596179426760594e-06, "loss": 1.5557861328125, "step": 12046 }, { "epoch": 2.19295531082188, "grad_norm": 9.5, "learning_rate": 3.3584671027394496e-06, "loss": 1.7801411151885986, "step": 12048 }, { "epoch": 2.1933193774460724, "grad_norm": 12.3125, "learning_rate": 3.3573166504370902e-06, "loss": 1.2176892757415771, "step": 12050 }, { "epoch": 2.193683444070265, "grad_norm": 3.734375, "learning_rate": 3.3561665859377997e-06, "loss": 0.8376621007919312, "step": 12052 }, { "epoch": 2.1940475106944572, "grad_norm": 9.3125, "learning_rate": 3.3550169094103497e-06, "loss": 1.181688666343689, "step": 12054 }, { "epoch": 2.1944115773186494, "grad_norm": 17.375, "learning_rate": 3.35386762102345e-06, "loss": 1.4156429767608643, "step": 12056 }, { "epoch": 2.1947756439428416, "grad_norm": 23.5, "learning_rate": 3.352718720945751e-06, "loss": 1.3488410711288452, "step": 12058 }, { "epoch": 2.195139710567034, "grad_norm": 14.5625, "learning_rate": 3.3515702093458534e-06, "loss": 1.056420922279358, "step": 12060 }, { "epoch": 2.195503777191226, "grad_norm": 12.75, "learning_rate": 3.350422086392292e-06, "loss": 1.1180254220962524, "step": 12062 }, { "epoch": 2.195867843815418, "grad_norm": 10.6875, "learning_rate": 3.349274352253553e-06, "loss": 1.0712347030639648, "step": 12064 }, { "epoch": 2.1962319104396104, "grad_norm": 9.6875, "learning_rate": 3.348127007098061e-06, "loss": 1.569072961807251, "step": 12066 }, { "epoch": 2.1965959770638026, "grad_norm": 12.3125, "learning_rate": 3.346980051094182e-06, "loss": 1.359500765800476, "step": 12068 }, { "epoch": 2.196960043687995, "grad_norm": 9.6875, "learning_rate": 3.34583348441023e-06, "loss": 1.3142786026000977, "step": 12070 }, { "epoch": 2.197324110312187, "grad_norm": 11.875, "learning_rate": 3.3446873072144566e-06, "loss": 1.2809697389602661, "step": 12072 }, { "epoch": 2.197688176936379, "grad_norm": 9.125, "learning_rate": 3.3435415196750605e-06, "loss": 1.0553083419799805, "step": 12074 }, { "epoch": 2.1980522435605714, "grad_norm": 5.1875, "learning_rate": 3.342396121960182e-06, "loss": 1.3698594570159912, "step": 12076 }, { "epoch": 2.198416310184764, "grad_norm": 3.21875, "learning_rate": 3.3412511142379002e-06, "loss": 0.8591696619987488, "step": 12078 }, { "epoch": 2.198780376808956, "grad_norm": 10.5625, "learning_rate": 3.3401064966762443e-06, "loss": 1.2664319276809692, "step": 12080 }, { "epoch": 2.1991444434331484, "grad_norm": 15.6875, "learning_rate": 3.3389622694431778e-06, "loss": 1.5193262100219727, "step": 12082 }, { "epoch": 2.1995085100573406, "grad_norm": 12.25, "learning_rate": 3.3378184327066156e-06, "loss": 1.5679194927215576, "step": 12084 }, { "epoch": 2.199872576681533, "grad_norm": 10.0625, "learning_rate": 3.3366749866344106e-06, "loss": 0.9765119552612305, "step": 12086 }, { "epoch": 2.200236643305725, "grad_norm": 7.5625, "learning_rate": 3.3355319313943556e-06, "loss": 1.3023643493652344, "step": 12088 }, { "epoch": 2.200600709929917, "grad_norm": 14.5625, "learning_rate": 3.3343892671541942e-06, "loss": 1.3609817028045654, "step": 12090 }, { "epoch": 2.2009647765541094, "grad_norm": 10.3125, "learning_rate": 3.3332469940816024e-06, "loss": 1.764069676399231, "step": 12092 }, { "epoch": 2.2013288431783016, "grad_norm": 8.125, "learning_rate": 3.3321051123442072e-06, "loss": 1.5629851818084717, "step": 12094 }, { "epoch": 2.2016929098024938, "grad_norm": 9.1875, "learning_rate": 3.3309636221095776e-06, "loss": 1.4875407218933105, "step": 12096 }, { "epoch": 2.202056976426686, "grad_norm": 8.3125, "learning_rate": 3.3298225235452164e-06, "loss": 1.309502124786377, "step": 12098 }, { "epoch": 2.202421043050878, "grad_norm": 15.5, "learning_rate": 3.328681816818581e-06, "loss": 1.3239184617996216, "step": 12100 }, { "epoch": 2.2027851096750704, "grad_norm": 2.9375, "learning_rate": 3.32754150209706e-06, "loss": 1.223656415939331, "step": 12102 }, { "epoch": 2.203149176299263, "grad_norm": 13.1875, "learning_rate": 3.3264015795479955e-06, "loss": 1.2184572219848633, "step": 12104 }, { "epoch": 2.203513242923455, "grad_norm": 14.1875, "learning_rate": 3.3252620493386646e-06, "loss": 1.6346687078475952, "step": 12106 }, { "epoch": 2.2038773095476474, "grad_norm": 12.5625, "learning_rate": 3.3241229116362855e-06, "loss": 1.3852317333221436, "step": 12108 }, { "epoch": 2.2042413761718396, "grad_norm": 53.0, "learning_rate": 3.3229841666080275e-06, "loss": 1.761644721031189, "step": 12110 }, { "epoch": 2.2046054427960318, "grad_norm": 11.9375, "learning_rate": 3.321845814420992e-06, "loss": 1.3510289192199707, "step": 12112 }, { "epoch": 2.204969509420224, "grad_norm": 8.6875, "learning_rate": 3.32070785524223e-06, "loss": 1.3094415664672852, "step": 12114 }, { "epoch": 2.205333576044416, "grad_norm": 28.5, "learning_rate": 3.319570289238734e-06, "loss": 0.8440742492675781, "step": 12116 }, { "epoch": 2.2056976426686083, "grad_norm": 4.0, "learning_rate": 3.318433116577433e-06, "loss": 1.1359409093856812, "step": 12118 }, { "epoch": 2.2060617092928005, "grad_norm": 10.3125, "learning_rate": 3.3172963374252064e-06, "loss": 0.9074627161026001, "step": 12120 }, { "epoch": 2.2064257759169927, "grad_norm": 10.9375, "learning_rate": 3.316159951948868e-06, "loss": 1.136393666267395, "step": 12122 }, { "epoch": 2.206789842541185, "grad_norm": 17.75, "learning_rate": 3.315023960315181e-06, "loss": 1.3155993223190308, "step": 12124 }, { "epoch": 2.207153909165377, "grad_norm": 11.4375, "learning_rate": 3.313888362690848e-06, "loss": 1.5051074028015137, "step": 12126 }, { "epoch": 2.2075179757895693, "grad_norm": 67.0, "learning_rate": 3.31275315924251e-06, "loss": 1.341226577758789, "step": 12128 }, { "epoch": 2.2078820424137615, "grad_norm": 19.25, "learning_rate": 3.3116183501367573e-06, "loss": 1.3257554769515991, "step": 12130 }, { "epoch": 2.208246109037954, "grad_norm": 20.75, "learning_rate": 3.310483935540114e-06, "loss": 1.3258298635482788, "step": 12132 }, { "epoch": 2.2086101756621463, "grad_norm": 9.5, "learning_rate": 3.3093499156190554e-06, "loss": 1.315000295639038, "step": 12134 }, { "epoch": 2.2089742422863385, "grad_norm": 5.9375, "learning_rate": 3.3082162905399928e-06, "loss": 1.1560317277908325, "step": 12136 }, { "epoch": 2.2093383089105307, "grad_norm": 5.1875, "learning_rate": 3.3070830604692796e-06, "loss": 0.8162409067153931, "step": 12138 }, { "epoch": 2.209702375534723, "grad_norm": 6.25, "learning_rate": 3.3059502255732155e-06, "loss": 0.9415969252586365, "step": 12140 }, { "epoch": 2.210066442158915, "grad_norm": 5.25, "learning_rate": 3.304817786018035e-06, "loss": 0.37937045097351074, "step": 12142 }, { "epoch": 2.2104305087831073, "grad_norm": 6.46875, "learning_rate": 3.303685741969923e-06, "loss": 1.390806794166565, "step": 12144 }, { "epoch": 2.2107945754072995, "grad_norm": 16.125, "learning_rate": 3.3025540935950027e-06, "loss": 0.951095461845398, "step": 12146 }, { "epoch": 2.2111586420314917, "grad_norm": 8.0625, "learning_rate": 3.301422841059335e-06, "loss": 1.3348033428192139, "step": 12148 }, { "epoch": 2.211522708655684, "grad_norm": 12.5, "learning_rate": 3.3002919845289315e-06, "loss": 1.309812307357788, "step": 12150 }, { "epoch": 2.211886775279876, "grad_norm": 126.0, "learning_rate": 3.299161524169736e-06, "loss": 1.5212230682373047, "step": 12152 }, { "epoch": 2.2122508419040683, "grad_norm": 52.25, "learning_rate": 3.2980314601476423e-06, "loss": 0.5571275949478149, "step": 12154 }, { "epoch": 2.2126149085282605, "grad_norm": 5.78125, "learning_rate": 3.2969017926284822e-06, "loss": 1.0546280145645142, "step": 12156 }, { "epoch": 2.2129789751524527, "grad_norm": 18.875, "learning_rate": 3.2957725217780277e-06, "loss": 1.5623667240142822, "step": 12158 }, { "epoch": 2.2133430417766453, "grad_norm": 13.625, "learning_rate": 3.294643647761999e-06, "loss": 1.4025273323059082, "step": 12160 }, { "epoch": 2.2137071084008375, "grad_norm": 10.5625, "learning_rate": 3.2935151707460476e-06, "loss": 1.3843116760253906, "step": 12162 }, { "epoch": 2.2140711750250297, "grad_norm": 6.09375, "learning_rate": 3.292387090895779e-06, "loss": 1.3283593654632568, "step": 12164 }, { "epoch": 2.214435241649222, "grad_norm": 6.90625, "learning_rate": 3.2912594083767326e-06, "loss": 1.0081628561019897, "step": 12166 }, { "epoch": 2.214799308273414, "grad_norm": 12.6875, "learning_rate": 3.2901321233543882e-06, "loss": 1.3083016872406006, "step": 12168 }, { "epoch": 2.2151633748976063, "grad_norm": 51.25, "learning_rate": 3.2890052359941742e-06, "loss": 1.4367625713348389, "step": 12170 }, { "epoch": 2.2155274415217985, "grad_norm": 14.875, "learning_rate": 3.287878746461453e-06, "loss": 1.7600932121276855, "step": 12172 }, { "epoch": 2.2158915081459907, "grad_norm": 8.0625, "learning_rate": 3.2867526549215354e-06, "loss": 1.391483187675476, "step": 12174 }, { "epoch": 2.216255574770183, "grad_norm": 10.8125, "learning_rate": 3.285626961539672e-06, "loss": 1.356466293334961, "step": 12176 }, { "epoch": 2.216619641394375, "grad_norm": 9.8125, "learning_rate": 3.2845016664810486e-06, "loss": 1.3807181119918823, "step": 12178 }, { "epoch": 2.2169837080185673, "grad_norm": 11.25, "learning_rate": 3.283376769910803e-06, "loss": 1.5090417861938477, "step": 12180 }, { "epoch": 2.2173477746427595, "grad_norm": 16.25, "learning_rate": 3.282252271994005e-06, "loss": 1.2962932586669922, "step": 12182 }, { "epoch": 2.2177118412669516, "grad_norm": 8.3125, "learning_rate": 3.2811281728956733e-06, "loss": 1.3331317901611328, "step": 12184 }, { "epoch": 2.2180759078911443, "grad_norm": 156.0, "learning_rate": 3.2800044727807645e-06, "loss": 1.4396144151687622, "step": 12186 }, { "epoch": 2.2184399745153365, "grad_norm": 13.75, "learning_rate": 3.278881171814173e-06, "loss": 1.1396660804748535, "step": 12188 }, { "epoch": 2.2188040411395287, "grad_norm": 9.9375, "learning_rate": 3.277758270160745e-06, "loss": 0.6598251461982727, "step": 12190 }, { "epoch": 2.219168107763721, "grad_norm": 15.3125, "learning_rate": 3.276635767985257e-06, "loss": 0.5037153959274292, "step": 12192 }, { "epoch": 2.219532174387913, "grad_norm": 9.5625, "learning_rate": 3.2755136654524326e-06, "loss": 1.090395450592041, "step": 12194 }, { "epoch": 2.2198962410121053, "grad_norm": 14.625, "learning_rate": 3.2743919627269396e-06, "loss": 0.9242769479751587, "step": 12196 }, { "epoch": 2.2202603076362974, "grad_norm": 28.75, "learning_rate": 3.273270659973376e-06, "loss": 1.5078730583190918, "step": 12198 }, { "epoch": 2.2206243742604896, "grad_norm": 12.0625, "learning_rate": 3.2721497573562955e-06, "loss": 1.3433661460876465, "step": 12200 }, { "epoch": 2.220988440884682, "grad_norm": 14.125, "learning_rate": 3.271029255040181e-06, "loss": 1.2620413303375244, "step": 12202 }, { "epoch": 2.221352507508874, "grad_norm": 8.0625, "learning_rate": 3.2699091531894646e-06, "loss": 1.3057994842529297, "step": 12204 }, { "epoch": 2.2217165741330662, "grad_norm": 4.1875, "learning_rate": 3.268789451968517e-06, "loss": 1.1626518964767456, "step": 12206 }, { "epoch": 2.2220806407572584, "grad_norm": 6.09375, "learning_rate": 3.267670151541647e-06, "loss": 1.0821871757507324, "step": 12208 }, { "epoch": 2.2224447073814506, "grad_norm": 10.0, "learning_rate": 3.266551252073111e-06, "loss": 1.0745364427566528, "step": 12210 }, { "epoch": 2.2228087740056433, "grad_norm": 91.5, "learning_rate": 3.2654327537270984e-06, "loss": 0.7835872173309326, "step": 12212 }, { "epoch": 2.2231728406298354, "grad_norm": 9.3125, "learning_rate": 3.2643146566677487e-06, "loss": 1.2483313083648682, "step": 12214 }, { "epoch": 2.2235369072540276, "grad_norm": 9.75, "learning_rate": 3.2631969610591375e-06, "loss": 1.2677079439163208, "step": 12216 }, { "epoch": 2.22390097387822, "grad_norm": 7.40625, "learning_rate": 3.262079667065279e-06, "loss": 1.2716532945632935, "step": 12218 }, { "epoch": 2.224265040502412, "grad_norm": 28.375, "learning_rate": 3.260962774850135e-06, "loss": 1.4709292650222778, "step": 12220 }, { "epoch": 2.224629107126604, "grad_norm": 14.3125, "learning_rate": 3.2598462845776014e-06, "loss": 1.2574224472045898, "step": 12222 }, { "epoch": 2.2249931737507964, "grad_norm": 5.8125, "learning_rate": 3.2587301964115213e-06, "loss": 1.220444917678833, "step": 12224 }, { "epoch": 2.2253572403749886, "grad_norm": 18.875, "learning_rate": 3.2576145105156777e-06, "loss": 0.8707526922225952, "step": 12226 }, { "epoch": 2.225721306999181, "grad_norm": 199.0, "learning_rate": 3.2564992270537878e-06, "loss": 0.9179031848907471, "step": 12228 }, { "epoch": 2.226085373623373, "grad_norm": 82.0, "learning_rate": 3.2553843461895206e-06, "loss": 1.598044514656067, "step": 12230 }, { "epoch": 2.226449440247565, "grad_norm": 14.25, "learning_rate": 3.2542698680864747e-06, "loss": 1.1532329320907593, "step": 12232 }, { "epoch": 2.2268135068717574, "grad_norm": 11.4375, "learning_rate": 3.2531557929082003e-06, "loss": 1.0540663003921509, "step": 12234 }, { "epoch": 2.2271775734959496, "grad_norm": 10.25, "learning_rate": 3.252042120818182e-06, "loss": 1.313855767250061, "step": 12236 }, { "epoch": 2.227541640120142, "grad_norm": 14.375, "learning_rate": 3.2509288519798433e-06, "loss": 1.1382684707641602, "step": 12238 }, { "epoch": 2.2279057067443344, "grad_norm": 19.75, "learning_rate": 3.249815986556557e-06, "loss": 0.746185302734375, "step": 12240 }, { "epoch": 2.2282697733685266, "grad_norm": 11.0, "learning_rate": 3.248703524711627e-06, "loss": 0.8475677371025085, "step": 12242 }, { "epoch": 2.228633839992719, "grad_norm": 12.5, "learning_rate": 3.247591466608306e-06, "loss": 1.4046236276626587, "step": 12244 }, { "epoch": 2.228997906616911, "grad_norm": 11.375, "learning_rate": 3.246479812409784e-06, "loss": 1.6883646249771118, "step": 12246 }, { "epoch": 2.229361973241103, "grad_norm": 60.25, "learning_rate": 3.2453685622791875e-06, "loss": 1.3166425228118896, "step": 12248 }, { "epoch": 2.2297260398652954, "grad_norm": 8.25, "learning_rate": 3.2442577163795935e-06, "loss": 1.2409021854400635, "step": 12250 }, { "epoch": 2.2300901064894876, "grad_norm": 13.6875, "learning_rate": 3.24314727487401e-06, "loss": 1.3663290739059448, "step": 12252 }, { "epoch": 2.2304541731136798, "grad_norm": 19.125, "learning_rate": 3.242037237925392e-06, "loss": 1.4763394594192505, "step": 12254 }, { "epoch": 2.230818239737872, "grad_norm": 7.375, "learning_rate": 3.240927605696633e-06, "loss": 1.2091948986053467, "step": 12256 }, { "epoch": 2.231182306362064, "grad_norm": 14.5, "learning_rate": 3.2398183783505643e-06, "loss": 1.0823699235916138, "step": 12258 }, { "epoch": 2.2315463729862564, "grad_norm": 13.875, "learning_rate": 3.2387095560499656e-06, "loss": 1.3707095384597778, "step": 12260 }, { "epoch": 2.2319104396104485, "grad_norm": 7.59375, "learning_rate": 3.2376011389575456e-06, "loss": 0.7047957181930542, "step": 12262 }, { "epoch": 2.2322745062346407, "grad_norm": 14.1875, "learning_rate": 3.236493127235965e-06, "loss": 1.3123741149902344, "step": 12264 }, { "epoch": 2.232638572858833, "grad_norm": 16.375, "learning_rate": 3.2353855210478204e-06, "loss": 0.8632066249847412, "step": 12266 }, { "epoch": 2.2330026394830256, "grad_norm": 12.8125, "learning_rate": 3.234278320555643e-06, "loss": 1.5546512603759766, "step": 12268 }, { "epoch": 2.2333667061072178, "grad_norm": 10.1875, "learning_rate": 3.2331715259219163e-06, "loss": 1.3444409370422363, "step": 12270 }, { "epoch": 2.23373077273141, "grad_norm": 8.0, "learning_rate": 3.2320651373090527e-06, "loss": 1.3481435775756836, "step": 12272 }, { "epoch": 2.234094839355602, "grad_norm": 8.0625, "learning_rate": 3.2309591548794145e-06, "loss": 1.3942453861236572, "step": 12274 }, { "epoch": 2.2344589059797944, "grad_norm": 410.0, "learning_rate": 3.2298535787952993e-06, "loss": 0.8943444490432739, "step": 12276 }, { "epoch": 2.2348229726039865, "grad_norm": 13.8125, "learning_rate": 3.2287484092189426e-06, "loss": 0.44548726081848145, "step": 12278 }, { "epoch": 2.2351870392281787, "grad_norm": 6.625, "learning_rate": 3.2276436463125284e-06, "loss": 1.2603511810302734, "step": 12280 }, { "epoch": 2.235551105852371, "grad_norm": 6.21875, "learning_rate": 3.226539290238171e-06, "loss": 0.9752300977706909, "step": 12282 }, { "epoch": 2.235915172476563, "grad_norm": 22.875, "learning_rate": 3.2254353411579346e-06, "loss": 0.966333270072937, "step": 12284 }, { "epoch": 2.2362792391007553, "grad_norm": 23.125, "learning_rate": 3.2243317992338184e-06, "loss": 0.6371181607246399, "step": 12286 }, { "epoch": 2.2366433057249475, "grad_norm": 55.25, "learning_rate": 3.22322866462776e-06, "loss": 1.4175171852111816, "step": 12288 }, { "epoch": 2.2370073723491397, "grad_norm": 6.15625, "learning_rate": 3.2221259375016434e-06, "loss": 1.317901611328125, "step": 12290 }, { "epoch": 2.237371438973332, "grad_norm": 7.46875, "learning_rate": 3.221023618017286e-06, "loss": 1.4108023643493652, "step": 12292 }, { "epoch": 2.2377355055975245, "grad_norm": 13.4375, "learning_rate": 3.2199217063364513e-06, "loss": 1.5835630893707275, "step": 12294 }, { "epoch": 2.2380995722217167, "grad_norm": 9.5, "learning_rate": 3.21882020262084e-06, "loss": 1.737712025642395, "step": 12296 }, { "epoch": 2.238463638845909, "grad_norm": 4.125, "learning_rate": 3.2177191070320916e-06, "loss": 0.8339627981185913, "step": 12298 }, { "epoch": 2.238827705470101, "grad_norm": 9.875, "learning_rate": 3.2166184197317914e-06, "loss": 1.1847567558288574, "step": 12300 }, { "epoch": 2.2391917720942933, "grad_norm": 9.4375, "learning_rate": 3.2155181408814544e-06, "loss": 1.3000099658966064, "step": 12302 }, { "epoch": 2.2395558387184855, "grad_norm": 9.8125, "learning_rate": 3.2144182706425474e-06, "loss": 1.4241355657577515, "step": 12304 }, { "epoch": 2.2399199053426777, "grad_norm": 4.8125, "learning_rate": 3.2133188091764706e-06, "loss": 1.340789556503296, "step": 12306 }, { "epoch": 2.24028397196687, "grad_norm": 8.9375, "learning_rate": 3.2122197566445646e-06, "loss": 0.9283239841461182, "step": 12308 }, { "epoch": 2.240648038591062, "grad_norm": 13.5625, "learning_rate": 3.211121113208113e-06, "loss": 1.868080735206604, "step": 12310 }, { "epoch": 2.2410121052152543, "grad_norm": 32.5, "learning_rate": 3.2100228790283327e-06, "loss": 1.7148187160491943, "step": 12312 }, { "epoch": 2.2413761718394465, "grad_norm": 9.625, "learning_rate": 3.2089250542663897e-06, "loss": 1.4423878192901611, "step": 12314 }, { "epoch": 2.2417402384636387, "grad_norm": 9.6875, "learning_rate": 3.207827639083384e-06, "loss": 1.3118674755096436, "step": 12316 }, { "epoch": 2.242104305087831, "grad_norm": 27.75, "learning_rate": 3.206730633640356e-06, "loss": 1.5447744131088257, "step": 12318 }, { "epoch": 2.2424683717120235, "grad_norm": 10.875, "learning_rate": 3.205634038098289e-06, "loss": 1.6219093799591064, "step": 12320 }, { "epoch": 2.2428324383362157, "grad_norm": 13.3125, "learning_rate": 3.2045378526181e-06, "loss": 1.3868744373321533, "step": 12322 }, { "epoch": 2.243196504960408, "grad_norm": 11.375, "learning_rate": 3.203442077360653e-06, "loss": 1.3873951435089111, "step": 12324 }, { "epoch": 2.2435605715846, "grad_norm": 27.625, "learning_rate": 3.202346712486748e-06, "loss": 1.4910807609558105, "step": 12326 }, { "epoch": 2.2439246382087923, "grad_norm": 19.0, "learning_rate": 3.2012517581571245e-06, "loss": 1.3788236379623413, "step": 12328 }, { "epoch": 2.2442887048329845, "grad_norm": 24.0, "learning_rate": 3.200157214532465e-06, "loss": 1.4303592443466187, "step": 12330 }, { "epoch": 2.2446527714571767, "grad_norm": 34.75, "learning_rate": 3.199063081773385e-06, "loss": 1.3633836507797241, "step": 12332 }, { "epoch": 2.245016838081369, "grad_norm": 16.5, "learning_rate": 3.197969360040447e-06, "loss": 1.4099583625793457, "step": 12334 }, { "epoch": 2.245380904705561, "grad_norm": 14.5625, "learning_rate": 3.196876049494151e-06, "loss": 1.6726154088974, "step": 12336 }, { "epoch": 2.2457449713297533, "grad_norm": 16.25, "learning_rate": 3.195783150294934e-06, "loss": 1.4227968454360962, "step": 12338 }, { "epoch": 2.2461090379539455, "grad_norm": 15.625, "learning_rate": 3.1946906626031767e-06, "loss": 1.5397989749908447, "step": 12340 }, { "epoch": 2.2464731045781376, "grad_norm": 14.3125, "learning_rate": 3.1935985865791926e-06, "loss": 1.360124111175537, "step": 12342 }, { "epoch": 2.24683717120233, "grad_norm": 6.1875, "learning_rate": 3.1925069223832446e-06, "loss": 1.1941728591918945, "step": 12344 }, { "epoch": 2.2472012378265225, "grad_norm": 22.125, "learning_rate": 3.191415670175527e-06, "loss": 1.358879566192627, "step": 12346 }, { "epoch": 2.2475653044507147, "grad_norm": 5.78125, "learning_rate": 3.190324830116178e-06, "loss": 1.4363832473754883, "step": 12348 }, { "epoch": 2.247929371074907, "grad_norm": 11.9375, "learning_rate": 3.1892344023652753e-06, "loss": 0.8540009260177612, "step": 12350 }, { "epoch": 2.248293437699099, "grad_norm": 19.375, "learning_rate": 3.18814438708283e-06, "loss": 1.1413938999176025, "step": 12352 }, { "epoch": 2.2486575043232913, "grad_norm": 17.625, "learning_rate": 3.1870547844288026e-06, "loss": 1.535794734954834, "step": 12354 }, { "epoch": 2.2490215709474835, "grad_norm": 9.1875, "learning_rate": 3.1859655945630846e-06, "loss": 1.399773359298706, "step": 12356 }, { "epoch": 2.2493856375716756, "grad_norm": 8.9375, "learning_rate": 3.184876817645512e-06, "loss": 1.3213396072387695, "step": 12358 }, { "epoch": 2.249749704195868, "grad_norm": 16.625, "learning_rate": 3.1837884538358587e-06, "loss": 1.392475962638855, "step": 12360 }, { "epoch": 2.25011377082006, "grad_norm": 10.6875, "learning_rate": 3.182700503293834e-06, "loss": 1.2594785690307617, "step": 12362 }, { "epoch": 2.2504778374442522, "grad_norm": 13.0625, "learning_rate": 3.1816129661790945e-06, "loss": 1.3794629573822021, "step": 12364 }, { "epoch": 2.2508419040684444, "grad_norm": 33.0, "learning_rate": 3.1805258426512297e-06, "loss": 1.4756104946136475, "step": 12366 }, { "epoch": 2.2512059706926366, "grad_norm": 38.5, "learning_rate": 3.179439132869772e-06, "loss": 1.2784026861190796, "step": 12368 }, { "epoch": 2.251570037316829, "grad_norm": 7.34375, "learning_rate": 3.1783528369941917e-06, "loss": 1.567686915397644, "step": 12370 }, { "epoch": 2.2519341039410214, "grad_norm": 20.125, "learning_rate": 3.1772669551838952e-06, "loss": 1.3300023078918457, "step": 12372 }, { "epoch": 2.252298170565213, "grad_norm": 15.9375, "learning_rate": 3.1761814875982344e-06, "loss": 1.584209680557251, "step": 12374 }, { "epoch": 2.252662237189406, "grad_norm": 7.28125, "learning_rate": 3.1750964343964964e-06, "loss": 1.3606492280960083, "step": 12376 }, { "epoch": 2.253026303813598, "grad_norm": 10.25, "learning_rate": 3.174011795737908e-06, "loss": 1.4643335342407227, "step": 12378 }, { "epoch": 2.2533903704377902, "grad_norm": 16.5, "learning_rate": 3.1729275717816377e-06, "loss": 1.5258057117462158, "step": 12380 }, { "epoch": 2.2537544370619824, "grad_norm": 8.75, "learning_rate": 3.171843762686786e-06, "loss": 1.6648608446121216, "step": 12382 }, { "epoch": 2.2541185036861746, "grad_norm": 12.0625, "learning_rate": 3.1707603686124023e-06, "loss": 1.6886719465255737, "step": 12384 }, { "epoch": 2.254482570310367, "grad_norm": 13.9375, "learning_rate": 3.1696773897174686e-06, "loss": 1.4433162212371826, "step": 12386 }, { "epoch": 2.254846636934559, "grad_norm": 8.625, "learning_rate": 3.168594826160908e-06, "loss": 1.3029026985168457, "step": 12388 }, { "epoch": 2.255210703558751, "grad_norm": 26.125, "learning_rate": 3.1675126781015815e-06, "loss": 1.481706142425537, "step": 12390 }, { "epoch": 2.2555747701829434, "grad_norm": 16.25, "learning_rate": 3.1664309456982904e-06, "loss": 1.5893282890319824, "step": 12392 }, { "epoch": 2.2559388368071356, "grad_norm": 7.53125, "learning_rate": 3.1653496291097746e-06, "loss": 1.158312201499939, "step": 12394 }, { "epoch": 2.2563029034313278, "grad_norm": 10.1875, "learning_rate": 3.1642687284947125e-06, "loss": 1.3891575336456299, "step": 12396 }, { "epoch": 2.25666697005552, "grad_norm": 7.5625, "learning_rate": 3.1631882440117235e-06, "loss": 1.2034833431243896, "step": 12398 }, { "epoch": 2.257031036679712, "grad_norm": 9.3125, "learning_rate": 3.1621081758193624e-06, "loss": 1.373058795928955, "step": 12400 }, { "epoch": 2.257395103303905, "grad_norm": 12.5, "learning_rate": 3.161028524076125e-06, "loss": 1.2438926696777344, "step": 12402 }, { "epoch": 2.257759169928097, "grad_norm": 26.25, "learning_rate": 3.1599492889404472e-06, "loss": 1.2456939220428467, "step": 12404 }, { "epoch": 2.258123236552289, "grad_norm": 12.8125, "learning_rate": 3.158870470570701e-06, "loss": 1.1077613830566406, "step": 12406 }, { "epoch": 2.2584873031764814, "grad_norm": 7.96875, "learning_rate": 3.157792069125199e-06, "loss": 1.30093514919281, "step": 12408 }, { "epoch": 2.2588513698006736, "grad_norm": 9.5625, "learning_rate": 3.156714084762193e-06, "loss": 1.6580302715301514, "step": 12410 }, { "epoch": 2.2592154364248658, "grad_norm": 26.625, "learning_rate": 3.1556365176398717e-06, "loss": 1.93913996219635, "step": 12412 }, { "epoch": 2.259579503049058, "grad_norm": 14.75, "learning_rate": 3.1545593679163642e-06, "loss": 1.3676456212997437, "step": 12414 }, { "epoch": 2.25994356967325, "grad_norm": 10.4375, "learning_rate": 3.1534826357497383e-06, "loss": 1.4838814735412598, "step": 12416 }, { "epoch": 2.2603076362974424, "grad_norm": 7.40625, "learning_rate": 3.1524063212979998e-06, "loss": 1.357383131980896, "step": 12418 }, { "epoch": 2.2606717029216346, "grad_norm": 5.28125, "learning_rate": 3.151330424719093e-06, "loss": 0.8611851930618286, "step": 12420 }, { "epoch": 2.2610357695458267, "grad_norm": 25.5, "learning_rate": 3.150254946170902e-06, "loss": 0.5798748731613159, "step": 12422 }, { "epoch": 2.261399836170019, "grad_norm": 9.875, "learning_rate": 3.1491798858112484e-06, "loss": 1.6534793376922607, "step": 12424 }, { "epoch": 2.261763902794211, "grad_norm": 9.875, "learning_rate": 3.1481052437978932e-06, "loss": 1.1673952341079712, "step": 12426 }, { "epoch": 2.2621279694184038, "grad_norm": 12.6875, "learning_rate": 3.147031020288536e-06, "loss": 1.3034383058547974, "step": 12428 }, { "epoch": 2.262492036042596, "grad_norm": 9.125, "learning_rate": 3.145957215440814e-06, "loss": 1.521657943725586, "step": 12430 }, { "epoch": 2.262856102666788, "grad_norm": 8.625, "learning_rate": 3.1448838294123046e-06, "loss": 1.0947458744049072, "step": 12432 }, { "epoch": 2.2632201692909804, "grad_norm": 10.25, "learning_rate": 3.1438108623605223e-06, "loss": 1.2592262029647827, "step": 12434 }, { "epoch": 2.2635842359151725, "grad_norm": 8.6875, "learning_rate": 3.1427383144429214e-06, "loss": 1.372435212135315, "step": 12436 }, { "epoch": 2.2639483025393647, "grad_norm": 9.3125, "learning_rate": 3.1416661858168925e-06, "loss": 1.3548641204833984, "step": 12438 }, { "epoch": 2.264312369163557, "grad_norm": 10.8125, "learning_rate": 3.1405944766397673e-06, "loss": 1.4571447372436523, "step": 12440 }, { "epoch": 2.264676435787749, "grad_norm": 6.3125, "learning_rate": 3.1395231870688148e-06, "loss": 1.2718209028244019, "step": 12442 }, { "epoch": 2.2650405024119413, "grad_norm": 7.3125, "learning_rate": 3.1384523172612417e-06, "loss": 1.0654772520065308, "step": 12444 }, { "epoch": 2.2654045690361335, "grad_norm": 15.3125, "learning_rate": 3.137381867374195e-06, "loss": 1.7056589126586914, "step": 12446 }, { "epoch": 2.2657686356603257, "grad_norm": 36.75, "learning_rate": 3.136311837564757e-06, "loss": 1.3483936786651611, "step": 12448 }, { "epoch": 2.266132702284518, "grad_norm": 11.75, "learning_rate": 3.135242227989952e-06, "loss": 1.4078763723373413, "step": 12450 }, { "epoch": 2.26649676890871, "grad_norm": 5.84375, "learning_rate": 3.13417303880674e-06, "loss": 1.3722052574157715, "step": 12452 }, { "epoch": 2.2668608355329027, "grad_norm": 12.0625, "learning_rate": 3.1331042701720203e-06, "loss": 1.3576308488845825, "step": 12454 }, { "epoch": 2.267224902157095, "grad_norm": 32.25, "learning_rate": 3.1320359222426304e-06, "loss": 1.8384394645690918, "step": 12456 }, { "epoch": 2.267588968781287, "grad_norm": 18.0, "learning_rate": 3.1309679951753462e-06, "loss": 1.1462087631225586, "step": 12458 }, { "epoch": 2.2679530354054793, "grad_norm": 19.25, "learning_rate": 3.129900489126882e-06, "loss": 1.3432759046554565, "step": 12460 }, { "epoch": 2.2683171020296715, "grad_norm": 10.3125, "learning_rate": 3.128833404253889e-06, "loss": 1.364044189453125, "step": 12462 }, { "epoch": 2.2686811686538637, "grad_norm": 10.9375, "learning_rate": 3.127766740712958e-06, "loss": 1.4586470127105713, "step": 12464 }, { "epoch": 2.269045235278056, "grad_norm": 10.75, "learning_rate": 3.1267004986606175e-06, "loss": 1.2342406511306763, "step": 12466 }, { "epoch": 2.269409301902248, "grad_norm": 10.625, "learning_rate": 3.125634678253335e-06, "loss": 1.669901967048645, "step": 12468 }, { "epoch": 2.2697733685264403, "grad_norm": 15.125, "learning_rate": 3.124569279647514e-06, "loss": 1.1866904497146606, "step": 12470 }, { "epoch": 2.2701374351506325, "grad_norm": 13.5625, "learning_rate": 3.123504302999499e-06, "loss": 1.430922269821167, "step": 12472 }, { "epoch": 2.2705015017748247, "grad_norm": 12.4375, "learning_rate": 3.1224397484655693e-06, "loss": 1.2870221138000488, "step": 12474 }, { "epoch": 2.270865568399017, "grad_norm": 15.5, "learning_rate": 3.121375616201945e-06, "loss": 1.7282233238220215, "step": 12476 }, { "epoch": 2.271229635023209, "grad_norm": 14.125, "learning_rate": 3.1203119063647834e-06, "loss": 1.7285444736480713, "step": 12478 }, { "epoch": 2.2715937016474017, "grad_norm": 16.75, "learning_rate": 3.1192486191101782e-06, "loss": 1.7052456140518188, "step": 12480 }, { "epoch": 2.2719577682715935, "grad_norm": 13.625, "learning_rate": 3.1181857545941647e-06, "loss": 1.547616720199585, "step": 12482 }, { "epoch": 2.272321834895786, "grad_norm": 10.6875, "learning_rate": 3.117123312972712e-06, "loss": 0.990469753742218, "step": 12484 }, { "epoch": 2.2726859015199783, "grad_norm": 35.5, "learning_rate": 3.11606129440173e-06, "loss": 0.9018906354904175, "step": 12486 }, { "epoch": 2.2730499681441705, "grad_norm": 9.3125, "learning_rate": 3.114999699037065e-06, "loss": 1.4034992456436157, "step": 12488 }, { "epoch": 2.2734140347683627, "grad_norm": 7.65625, "learning_rate": 3.1139385270345035e-06, "loss": 0.928857684135437, "step": 12490 }, { "epoch": 2.273778101392555, "grad_norm": 11.8125, "learning_rate": 3.1128777785497654e-06, "loss": 1.351243257522583, "step": 12492 }, { "epoch": 2.274142168016747, "grad_norm": 12.375, "learning_rate": 3.111817453738514e-06, "loss": 1.8022680282592773, "step": 12494 }, { "epoch": 2.2745062346409393, "grad_norm": 13.0625, "learning_rate": 3.110757552756346e-06, "loss": 1.4590038061141968, "step": 12496 }, { "epoch": 2.2748703012651315, "grad_norm": 15.5625, "learning_rate": 3.109698075758798e-06, "loss": 1.4637423753738403, "step": 12498 }, { "epoch": 2.2752343678893237, "grad_norm": 30.75, "learning_rate": 3.1086390229013448e-06, "loss": 1.5078284740447998, "step": 12500 }, { "epoch": 2.275598434513516, "grad_norm": 9.9375, "learning_rate": 3.1075803943393967e-06, "loss": 1.9286373853683472, "step": 12502 }, { "epoch": 2.275962501137708, "grad_norm": 9.25, "learning_rate": 3.106522190228304e-06, "loss": 1.584265947341919, "step": 12504 }, { "epoch": 2.2763265677619007, "grad_norm": 9.6875, "learning_rate": 3.1054644107233535e-06, "loss": 1.2462928295135498, "step": 12506 }, { "epoch": 2.2766906343860924, "grad_norm": 5.40625, "learning_rate": 3.10440705597977e-06, "loss": 1.471488356590271, "step": 12508 }, { "epoch": 2.277054701010285, "grad_norm": 6.125, "learning_rate": 3.103350126152716e-06, "loss": 1.0790464878082275, "step": 12510 }, { "epoch": 2.2774187676344773, "grad_norm": 16.625, "learning_rate": 3.102293621397292e-06, "loss": 1.2261512279510498, "step": 12512 }, { "epoch": 2.2777828342586695, "grad_norm": 9.375, "learning_rate": 3.101237541868536e-06, "loss": 1.3318239450454712, "step": 12514 }, { "epoch": 2.2781469008828616, "grad_norm": 12.4375, "learning_rate": 3.100181887721423e-06, "loss": 1.4968301057815552, "step": 12516 }, { "epoch": 2.278510967507054, "grad_norm": 29.0, "learning_rate": 3.0991266591108664e-06, "loss": 1.558044195175171, "step": 12518 }, { "epoch": 2.278875034131246, "grad_norm": 19.375, "learning_rate": 3.0980718561917158e-06, "loss": 1.632189154624939, "step": 12520 }, { "epoch": 2.2792391007554382, "grad_norm": 6.9375, "learning_rate": 3.09701747911876e-06, "loss": 1.0974186658859253, "step": 12522 }, { "epoch": 2.2796031673796304, "grad_norm": 8.5, "learning_rate": 3.095963528046725e-06, "loss": 1.3167983293533325, "step": 12524 }, { "epoch": 2.2799672340038226, "grad_norm": 7.0, "learning_rate": 3.094910003130272e-06, "loss": 1.2831642627716064, "step": 12526 }, { "epoch": 2.280331300628015, "grad_norm": 8.1875, "learning_rate": 3.0938569045240043e-06, "loss": 0.8829589486122131, "step": 12528 }, { "epoch": 2.280695367252207, "grad_norm": 10.6875, "learning_rate": 3.092804232382457e-06, "loss": 1.383821964263916, "step": 12530 }, { "epoch": 2.281059433876399, "grad_norm": 9.375, "learning_rate": 3.091751986860107e-06, "loss": 1.297087550163269, "step": 12532 }, { "epoch": 2.2814235005005914, "grad_norm": 15.1875, "learning_rate": 3.0907001681113667e-06, "loss": 0.8560866713523865, "step": 12534 }, { "epoch": 2.281787567124784, "grad_norm": 9.5, "learning_rate": 3.089648776290587e-06, "loss": 1.5389878749847412, "step": 12536 }, { "epoch": 2.2821516337489762, "grad_norm": 12.5625, "learning_rate": 3.0885978115520543e-06, "loss": 1.2532103061676025, "step": 12538 }, { "epoch": 2.2825157003731684, "grad_norm": 9.625, "learning_rate": 3.087547274049994e-06, "loss": 1.2819963693618774, "step": 12540 }, { "epoch": 2.2828797669973606, "grad_norm": 11.5, "learning_rate": 3.0864971639385684e-06, "loss": 1.4069114923477173, "step": 12542 }, { "epoch": 2.283243833621553, "grad_norm": 9.8125, "learning_rate": 3.085447481371876e-06, "loss": 1.3645291328430176, "step": 12544 }, { "epoch": 2.283607900245745, "grad_norm": 10.0625, "learning_rate": 3.084398226503954e-06, "loss": 1.4025938510894775, "step": 12546 }, { "epoch": 2.283971966869937, "grad_norm": 15.75, "learning_rate": 3.083349399488777e-06, "loss": 1.1785614490509033, "step": 12548 }, { "epoch": 2.2843360334941294, "grad_norm": 23.25, "learning_rate": 3.082301000480255e-06, "loss": 1.0558698177337646, "step": 12550 }, { "epoch": 2.2847001001183216, "grad_norm": 5.375, "learning_rate": 3.0812530296322366e-06, "loss": 1.221854329109192, "step": 12552 }, { "epoch": 2.285064166742514, "grad_norm": 12.4375, "learning_rate": 3.080205487098508e-06, "loss": 1.270862102508545, "step": 12554 }, { "epoch": 2.285428233366706, "grad_norm": 7.8125, "learning_rate": 3.0791583730327914e-06, "loss": 1.4056766033172607, "step": 12556 }, { "epoch": 2.285792299990898, "grad_norm": 8.375, "learning_rate": 3.078111687588746e-06, "loss": 1.4692795276641846, "step": 12558 }, { "epoch": 2.2861563666150904, "grad_norm": 16.375, "learning_rate": 3.077065430919969e-06, "loss": 1.06184720993042, "step": 12560 }, { "epoch": 2.286520433239283, "grad_norm": 15.8125, "learning_rate": 3.0760196031799944e-06, "loss": 1.2931253910064697, "step": 12562 }, { "epoch": 2.286884499863475, "grad_norm": 9.75, "learning_rate": 3.0749742045222934e-06, "loss": 1.2624245882034302, "step": 12564 }, { "epoch": 2.2872485664876674, "grad_norm": 8.125, "learning_rate": 3.073929235100274e-06, "loss": 1.5429012775421143, "step": 12566 }, { "epoch": 2.2876126331118596, "grad_norm": 20.375, "learning_rate": 3.072884695067281e-06, "loss": 1.2527109384536743, "step": 12568 }, { "epoch": 2.2879766997360518, "grad_norm": 18.0, "learning_rate": 3.071840584576596e-06, "loss": 1.5365409851074219, "step": 12570 }, { "epoch": 2.288340766360244, "grad_norm": 7.6875, "learning_rate": 3.070796903781439e-06, "loss": 1.2739225625991821, "step": 12572 }, { "epoch": 2.288704832984436, "grad_norm": 18.875, "learning_rate": 3.0697536528349642e-06, "loss": 0.9951099157333374, "step": 12574 }, { "epoch": 2.2890688996086284, "grad_norm": 7.84375, "learning_rate": 3.068710831890267e-06, "loss": 1.1309419870376587, "step": 12576 }, { "epoch": 2.2894329662328206, "grad_norm": 4.28125, "learning_rate": 3.0676684411003747e-06, "loss": 1.3102229833602905, "step": 12578 }, { "epoch": 2.2897970328570127, "grad_norm": 10.1875, "learning_rate": 3.0666264806182556e-06, "loss": 1.0711464881896973, "step": 12580 }, { "epoch": 2.290161099481205, "grad_norm": 9.5, "learning_rate": 3.065584950596812e-06, "loss": 1.3585255146026611, "step": 12582 }, { "epoch": 2.290525166105397, "grad_norm": 17.75, "learning_rate": 3.0645438511888854e-06, "loss": 1.3575489521026611, "step": 12584 }, { "epoch": 2.2908892327295893, "grad_norm": 7.9375, "learning_rate": 3.0635031825472518e-06, "loss": 1.4524719715118408, "step": 12586 }, { "epoch": 2.291253299353782, "grad_norm": 12.0, "learning_rate": 3.0624629448246257e-06, "loss": 1.2567143440246582, "step": 12588 }, { "epoch": 2.291617365977974, "grad_norm": 13.8125, "learning_rate": 3.0614231381736577e-06, "loss": 1.0736030340194702, "step": 12590 }, { "epoch": 2.2919814326021664, "grad_norm": 17.0, "learning_rate": 3.0603837627469345e-06, "loss": 0.522739589214325, "step": 12592 }, { "epoch": 2.2923454992263586, "grad_norm": 7.6875, "learning_rate": 3.059344818696982e-06, "loss": 1.1999160051345825, "step": 12594 }, { "epoch": 2.2927095658505507, "grad_norm": 10.8125, "learning_rate": 3.0583063061762597e-06, "loss": 1.4812519550323486, "step": 12596 }, { "epoch": 2.293073632474743, "grad_norm": 13.3125, "learning_rate": 3.057268225337165e-06, "loss": 1.3010470867156982, "step": 12598 }, { "epoch": 2.293437699098935, "grad_norm": 7.4375, "learning_rate": 3.0562305763320327e-06, "loss": 1.275359869003296, "step": 12600 }, { "epoch": 2.2938017657231273, "grad_norm": 9.5625, "learning_rate": 3.055193359313133e-06, "loss": 1.4190359115600586, "step": 12602 }, { "epoch": 2.2941658323473195, "grad_norm": 25.125, "learning_rate": 3.0541565744326735e-06, "loss": 1.3066437244415283, "step": 12604 }, { "epoch": 2.2945298989715117, "grad_norm": 25.625, "learning_rate": 3.053120221842798e-06, "loss": 1.190384864807129, "step": 12606 }, { "epoch": 2.294893965595704, "grad_norm": 10.0, "learning_rate": 3.052084301695588e-06, "loss": 1.2210527658462524, "step": 12608 }, { "epoch": 2.295258032219896, "grad_norm": 7.21875, "learning_rate": 3.0510488141430596e-06, "loss": 1.2423650026321411, "step": 12610 }, { "epoch": 2.2956220988440883, "grad_norm": 18.625, "learning_rate": 3.0500137593371666e-06, "loss": 1.2772867679595947, "step": 12612 }, { "epoch": 2.295986165468281, "grad_norm": 7.96875, "learning_rate": 3.048979137429799e-06, "loss": 1.1831212043762207, "step": 12614 }, { "epoch": 2.2963502320924727, "grad_norm": 11.0, "learning_rate": 3.047944948572783e-06, "loss": 1.4668102264404297, "step": 12616 }, { "epoch": 2.2967142987166653, "grad_norm": 10.0625, "learning_rate": 3.046911192917883e-06, "loss": 1.7448139190673828, "step": 12618 }, { "epoch": 2.2970783653408575, "grad_norm": 8.375, "learning_rate": 3.045877870616798e-06, "loss": 1.0120562314987183, "step": 12620 }, { "epoch": 2.2974424319650497, "grad_norm": 15.4375, "learning_rate": 3.044844981821162e-06, "loss": 1.180618166923523, "step": 12622 }, { "epoch": 2.297806498589242, "grad_norm": 14.125, "learning_rate": 3.04381252668255e-06, "loss": 0.08765455335378647, "step": 12624 }, { "epoch": 2.298170565213434, "grad_norm": 34.25, "learning_rate": 3.0427805053524697e-06, "loss": 0.3928592801094055, "step": 12626 }, { "epoch": 2.2985346318376263, "grad_norm": 15.1875, "learning_rate": 3.0417489179823646e-06, "loss": 1.3172202110290527, "step": 12628 }, { "epoch": 2.2988986984618185, "grad_norm": 24.75, "learning_rate": 3.0407177647236173e-06, "loss": 0.9428372979164124, "step": 12630 }, { "epoch": 2.2992627650860107, "grad_norm": 8.3125, "learning_rate": 3.039687045727545e-06, "loss": 1.5005693435668945, "step": 12632 }, { "epoch": 2.299626831710203, "grad_norm": 6.875, "learning_rate": 3.038656761145402e-06, "loss": 1.377175211906433, "step": 12634 }, { "epoch": 2.299990898334395, "grad_norm": 9.875, "learning_rate": 3.037626911128378e-06, "loss": 0.8938100934028625, "step": 12636 }, { "epoch": 2.3003549649585873, "grad_norm": 9.9375, "learning_rate": 3.036597495827599e-06, "loss": 1.5012824535369873, "step": 12638 }, { "epoch": 2.3007190315827795, "grad_norm": 11.375, "learning_rate": 3.0355685153941283e-06, "loss": 1.2269644737243652, "step": 12640 }, { "epoch": 2.3010830982069717, "grad_norm": 13.9375, "learning_rate": 3.0345399699789642e-06, "loss": 0.9781918525695801, "step": 12642 }, { "epoch": 2.3014471648311643, "grad_norm": 14.0625, "learning_rate": 3.033511859733041e-06, "loss": 1.3739736080169678, "step": 12644 }, { "epoch": 2.3018112314553565, "grad_norm": 10.625, "learning_rate": 3.0324841848072304e-06, "loss": 1.2351527214050293, "step": 12646 }, { "epoch": 2.3021752980795487, "grad_norm": 54.0, "learning_rate": 3.03145694535234e-06, "loss": 0.5533649325370789, "step": 12648 }, { "epoch": 2.302539364703741, "grad_norm": 22.125, "learning_rate": 3.0304301415191115e-06, "loss": 1.7622896432876587, "step": 12650 }, { "epoch": 2.302903431327933, "grad_norm": 148.0, "learning_rate": 3.029403773458226e-06, "loss": 1.3319618701934814, "step": 12652 }, { "epoch": 2.3032674979521253, "grad_norm": 21.125, "learning_rate": 3.0283778413202975e-06, "loss": 1.4163382053375244, "step": 12654 }, { "epoch": 2.3036315645763175, "grad_norm": 12.625, "learning_rate": 3.027352345255878e-06, "loss": 1.3314707279205322, "step": 12656 }, { "epoch": 2.3039956312005097, "grad_norm": 15.875, "learning_rate": 3.0263272854154548e-06, "loss": 1.3248398303985596, "step": 12658 }, { "epoch": 2.304359697824702, "grad_norm": 9.5, "learning_rate": 3.025302661949451e-06, "loss": 0.838480532169342, "step": 12660 }, { "epoch": 2.304723764448894, "grad_norm": 18.375, "learning_rate": 3.024278475008226e-06, "loss": 1.0200480222702026, "step": 12662 }, { "epoch": 2.3050878310730862, "grad_norm": 5.75, "learning_rate": 3.023254724742075e-06, "loss": 0.44165003299713135, "step": 12664 }, { "epoch": 2.3054518976972784, "grad_norm": 7.1875, "learning_rate": 3.022231411301231e-06, "loss": 1.2833613157272339, "step": 12666 }, { "epoch": 2.3058159643214706, "grad_norm": 14.75, "learning_rate": 3.021208534835858e-06, "loss": 1.2190918922424316, "step": 12668 }, { "epoch": 2.3061800309456633, "grad_norm": 8.125, "learning_rate": 3.020186095496061e-06, "loss": 1.5317522287368774, "step": 12670 }, { "epoch": 2.3065440975698555, "grad_norm": 5.09375, "learning_rate": 3.0191640934318783e-06, "loss": 0.9419819116592407, "step": 12672 }, { "epoch": 2.3069081641940477, "grad_norm": 9.9375, "learning_rate": 3.0181425287932845e-06, "loss": 1.2479746341705322, "step": 12674 }, { "epoch": 2.30727223081824, "grad_norm": 9.375, "learning_rate": 3.017121401730191e-06, "loss": 1.3125934600830078, "step": 12676 }, { "epoch": 2.307636297442432, "grad_norm": 12.9375, "learning_rate": 3.0161007123924425e-06, "loss": 1.241598129272461, "step": 12678 }, { "epoch": 2.3080003640666242, "grad_norm": 12.0, "learning_rate": 3.015080460929822e-06, "loss": 1.119701862335205, "step": 12680 }, { "epoch": 2.3083644306908164, "grad_norm": 77.5, "learning_rate": 3.014060647492047e-06, "loss": 1.2777540683746338, "step": 12682 }, { "epoch": 2.3087284973150086, "grad_norm": 40.5, "learning_rate": 3.0130412722287705e-06, "loss": 1.179136872291565, "step": 12684 }, { "epoch": 2.309092563939201, "grad_norm": 22.875, "learning_rate": 3.0120223352895827e-06, "loss": 0.6506235003471375, "step": 12686 }, { "epoch": 2.309456630563393, "grad_norm": 8.375, "learning_rate": 3.011003836824008e-06, "loss": 1.0332425832748413, "step": 12688 }, { "epoch": 2.309820697187585, "grad_norm": 9.4375, "learning_rate": 3.009985776981507e-06, "loss": 1.3676140308380127, "step": 12690 }, { "epoch": 2.3101847638117774, "grad_norm": 18.125, "learning_rate": 3.0089681559114758e-06, "loss": 1.4909124374389648, "step": 12692 }, { "epoch": 2.3105488304359696, "grad_norm": 14.5625, "learning_rate": 3.0079509737632455e-06, "loss": 1.3306161165237427, "step": 12694 }, { "epoch": 2.3109128970601622, "grad_norm": 6.96875, "learning_rate": 3.006934230686085e-06, "loss": 1.2349447011947632, "step": 12696 }, { "epoch": 2.3112769636843544, "grad_norm": 23.875, "learning_rate": 3.005917926829196e-06, "loss": 1.1286929845809937, "step": 12698 }, { "epoch": 2.3116410303085466, "grad_norm": 15.5, "learning_rate": 3.0049020623417167e-06, "loss": 1.0020360946655273, "step": 12700 }, { "epoch": 2.312005096932739, "grad_norm": 6.78125, "learning_rate": 3.0038866373727223e-06, "loss": 1.217538595199585, "step": 12702 }, { "epoch": 2.312369163556931, "grad_norm": 8.75, "learning_rate": 3.0028716520712212e-06, "loss": 1.3113577365875244, "step": 12704 }, { "epoch": 2.312733230181123, "grad_norm": 9.6875, "learning_rate": 3.001857106586159e-06, "loss": 1.1445388793945312, "step": 12706 }, { "epoch": 2.3130972968053154, "grad_norm": 7.125, "learning_rate": 3.0008430010664164e-06, "loss": 1.1376641988754272, "step": 12708 }, { "epoch": 2.3134613634295076, "grad_norm": 14.0, "learning_rate": 2.999829335660809e-06, "loss": 1.436424970626831, "step": 12710 }, { "epoch": 2.3138254300537, "grad_norm": 19.375, "learning_rate": 2.9988161105180875e-06, "loss": 1.5562865734100342, "step": 12712 }, { "epoch": 2.314189496677892, "grad_norm": 19.75, "learning_rate": 2.997803325786939e-06, "loss": 1.3501567840576172, "step": 12714 }, { "epoch": 2.314553563302084, "grad_norm": 9.375, "learning_rate": 2.9967909816159857e-06, "loss": 1.3954524993896484, "step": 12716 }, { "epoch": 2.3149176299262764, "grad_norm": 6.4375, "learning_rate": 2.995779078153785e-06, "loss": 1.108249545097351, "step": 12718 }, { "epoch": 2.3152816965504686, "grad_norm": 13.25, "learning_rate": 2.99476761554883e-06, "loss": 1.3637020587921143, "step": 12720 }, { "epoch": 2.315645763174661, "grad_norm": 8.375, "learning_rate": 2.9937565939495472e-06, "loss": 1.3312394618988037, "step": 12722 }, { "epoch": 2.316009829798853, "grad_norm": 8.5, "learning_rate": 2.992746013504302e-06, "loss": 1.0356110334396362, "step": 12724 }, { "epoch": 2.3163738964230456, "grad_norm": 7.90625, "learning_rate": 2.9917358743613913e-06, "loss": 1.2071000337600708, "step": 12726 }, { "epoch": 2.316737963047238, "grad_norm": 10.5, "learning_rate": 2.99072617666905e-06, "loss": 1.4758578538894653, "step": 12728 }, { "epoch": 2.31710202967143, "grad_norm": 71.5, "learning_rate": 2.9897169205754466e-06, "loss": 1.6389625072479248, "step": 12730 }, { "epoch": 2.317466096295622, "grad_norm": 20.75, "learning_rate": 2.9887081062286856e-06, "loss": 1.6999410390853882, "step": 12732 }, { "epoch": 2.3178301629198144, "grad_norm": 53.5, "learning_rate": 2.987699733776806e-06, "loss": 1.8977272510528564, "step": 12734 }, { "epoch": 2.3181942295440066, "grad_norm": 5.25, "learning_rate": 2.9866918033677827e-06, "loss": 1.2279573678970337, "step": 12736 }, { "epoch": 2.3185582961681988, "grad_norm": 8.5, "learning_rate": 2.985684315149526e-06, "loss": 1.0158472061157227, "step": 12738 }, { "epoch": 2.318922362792391, "grad_norm": 9.1875, "learning_rate": 2.9846772692698795e-06, "loss": 1.4386669397354126, "step": 12740 }, { "epoch": 2.319286429416583, "grad_norm": 17.25, "learning_rate": 2.9836706658766233e-06, "loss": 1.2104482650756836, "step": 12742 }, { "epoch": 2.3196504960407753, "grad_norm": 10.375, "learning_rate": 2.982664505117474e-06, "loss": 1.4086564779281616, "step": 12744 }, { "epoch": 2.3200145626649675, "grad_norm": 15.8125, "learning_rate": 2.9816587871400796e-06, "loss": 1.1924282312393188, "step": 12746 }, { "epoch": 2.3203786292891597, "grad_norm": 11.375, "learning_rate": 2.9806535120920268e-06, "loss": 1.2032430171966553, "step": 12748 }, { "epoch": 2.320742695913352, "grad_norm": 51.5, "learning_rate": 2.9796486801208337e-06, "loss": 1.0400818586349487, "step": 12750 }, { "epoch": 2.3211067625375446, "grad_norm": 12.25, "learning_rate": 2.9786442913739566e-06, "loss": 1.5251060724258423, "step": 12752 }, { "epoch": 2.3214708291617367, "grad_norm": 9.0625, "learning_rate": 2.977640345998785e-06, "loss": 1.3160400390625, "step": 12754 }, { "epoch": 2.321834895785929, "grad_norm": 3.609375, "learning_rate": 2.976636844142645e-06, "loss": 1.206324577331543, "step": 12756 }, { "epoch": 2.322198962410121, "grad_norm": 17.375, "learning_rate": 2.9756337859527943e-06, "loss": 1.3376787900924683, "step": 12758 }, { "epoch": 2.3225630290343133, "grad_norm": 12.375, "learning_rate": 2.9746311715764296e-06, "loss": 1.088538408279419, "step": 12760 }, { "epoch": 2.3229270956585055, "grad_norm": 16.75, "learning_rate": 2.97362900116068e-06, "loss": 0.3987538814544678, "step": 12762 }, { "epoch": 2.3232911622826977, "grad_norm": 9.0625, "learning_rate": 2.9726272748526087e-06, "loss": 1.2419790029525757, "step": 12764 }, { "epoch": 2.32365522890689, "grad_norm": 9.0625, "learning_rate": 2.9716259927992166e-06, "loss": 1.496699571609497, "step": 12766 }, { "epoch": 2.324019295531082, "grad_norm": 27.125, "learning_rate": 2.9706251551474374e-06, "loss": 1.4162895679473877, "step": 12768 }, { "epoch": 2.3243833621552743, "grad_norm": 13.0625, "learning_rate": 2.9696247620441386e-06, "loss": 1.6553939580917358, "step": 12770 }, { "epoch": 2.3247474287794665, "grad_norm": 9.125, "learning_rate": 2.9686248136361264e-06, "loss": 1.430681586265564, "step": 12772 }, { "epoch": 2.3251114954036587, "grad_norm": 12.0, "learning_rate": 2.9676253100701367e-06, "loss": 1.151663064956665, "step": 12774 }, { "epoch": 2.325475562027851, "grad_norm": 16.125, "learning_rate": 2.966626251492844e-06, "loss": 1.6114152669906616, "step": 12776 }, { "epoch": 2.3258396286520435, "grad_norm": 20.625, "learning_rate": 2.965627638050855e-06, "loss": 1.8489385843276978, "step": 12778 }, { "epoch": 2.3262036952762357, "grad_norm": 14.8125, "learning_rate": 2.964629469890714e-06, "loss": 1.5465797185897827, "step": 12780 }, { "epoch": 2.326567761900428, "grad_norm": 12.5, "learning_rate": 2.9636317471588966e-06, "loss": 1.2140628099441528, "step": 12782 }, { "epoch": 2.32693182852462, "grad_norm": 17.125, "learning_rate": 2.962634470001815e-06, "loss": 0.848121166229248, "step": 12784 }, { "epoch": 2.3272958951488123, "grad_norm": 19.75, "learning_rate": 2.961637638565815e-06, "loss": 0.6174255609512329, "step": 12786 }, { "epoch": 2.3276599617730045, "grad_norm": 58.25, "learning_rate": 2.9606412529971782e-06, "loss": 1.7457401752471924, "step": 12788 }, { "epoch": 2.3280240283971967, "grad_norm": 10.75, "learning_rate": 2.959645313442121e-06, "loss": 1.3340728282928467, "step": 12790 }, { "epoch": 2.328388095021389, "grad_norm": 11.625, "learning_rate": 2.9586498200467925e-06, "loss": 1.3564493656158447, "step": 12792 }, { "epoch": 2.328752161645581, "grad_norm": 8.3125, "learning_rate": 2.9576547729572763e-06, "loss": 1.2549247741699219, "step": 12794 }, { "epoch": 2.3291162282697733, "grad_norm": 21.0, "learning_rate": 2.956660172319593e-06, "loss": 1.0571786165237427, "step": 12796 }, { "epoch": 2.3294802948939655, "grad_norm": 51.25, "learning_rate": 2.9556660182796963e-06, "loss": 0.8457317352294922, "step": 12798 }, { "epoch": 2.3298443615181577, "grad_norm": 27.875, "learning_rate": 2.9546723109834734e-06, "loss": 1.0769550800323486, "step": 12800 }, { "epoch": 2.33020842814235, "grad_norm": 26.375, "learning_rate": 2.9536790505767475e-06, "loss": 1.6301898956298828, "step": 12802 }, { "epoch": 2.3305724947665425, "grad_norm": 23.375, "learning_rate": 2.9526862372052755e-06, "loss": 1.5576831102371216, "step": 12804 }, { "epoch": 2.3309365613907347, "grad_norm": 14.1875, "learning_rate": 2.951693871014748e-06, "loss": 1.3612929582595825, "step": 12806 }, { "epoch": 2.331300628014927, "grad_norm": 16.375, "learning_rate": 2.950701952150791e-06, "loss": 1.789056658744812, "step": 12808 }, { "epoch": 2.331664694639119, "grad_norm": 9.8125, "learning_rate": 2.9497104807589655e-06, "loss": 1.3118585348129272, "step": 12810 }, { "epoch": 2.3320287612633113, "grad_norm": 19.25, "learning_rate": 2.948719456984765e-06, "loss": 1.1820694208145142, "step": 12812 }, { "epoch": 2.3323928278875035, "grad_norm": 126.0, "learning_rate": 2.947728880973618e-06, "loss": 0.7172834873199463, "step": 12814 }, { "epoch": 2.3327568945116957, "grad_norm": 2.4375, "learning_rate": 2.9467387528708884e-06, "loss": 0.8870652914047241, "step": 12816 }, { "epoch": 2.333120961135888, "grad_norm": 8.25, "learning_rate": 2.945749072821873e-06, "loss": 1.0431147813796997, "step": 12818 }, { "epoch": 2.33348502776008, "grad_norm": 8.375, "learning_rate": 2.944759840971803e-06, "loss": 1.4970178604125977, "step": 12820 }, { "epoch": 2.3338490943842722, "grad_norm": 20.25, "learning_rate": 2.9437710574658453e-06, "loss": 1.2170723676681519, "step": 12822 }, { "epoch": 2.3342131610084644, "grad_norm": 16.75, "learning_rate": 2.9427827224490984e-06, "loss": 1.045131802558899, "step": 12824 }, { "epoch": 2.3345772276326566, "grad_norm": 14.5, "learning_rate": 2.941794836066598e-06, "loss": 1.7131435871124268, "step": 12826 }, { "epoch": 2.334941294256849, "grad_norm": 3.328125, "learning_rate": 2.94080739846331e-06, "loss": 0.9052638411521912, "step": 12828 }, { "epoch": 2.3353053608810415, "grad_norm": 15.5, "learning_rate": 2.93982040978414e-06, "loss": 1.2864320278167725, "step": 12830 }, { "epoch": 2.335669427505233, "grad_norm": 8.8125, "learning_rate": 2.938833870173922e-06, "loss": 1.7033592462539673, "step": 12832 }, { "epoch": 2.336033494129426, "grad_norm": 8.5, "learning_rate": 2.9378477797774287e-06, "loss": 1.1344698667526245, "step": 12834 }, { "epoch": 2.336397560753618, "grad_norm": 12.875, "learning_rate": 2.936862138739363e-06, "loss": 1.5222827196121216, "step": 12836 }, { "epoch": 2.3367616273778102, "grad_norm": 15.1875, "learning_rate": 2.9358769472043654e-06, "loss": 1.5892690420150757, "step": 12838 }, { "epoch": 2.3371256940020024, "grad_norm": 12.9375, "learning_rate": 2.9348922053170076e-06, "loss": 1.3002058267593384, "step": 12840 }, { "epoch": 2.3374897606261946, "grad_norm": 40.5, "learning_rate": 2.933907913221796e-06, "loss": 1.4518928527832031, "step": 12842 }, { "epoch": 2.337853827250387, "grad_norm": 16.875, "learning_rate": 2.932924071063174e-06, "loss": 1.7592427730560303, "step": 12844 }, { "epoch": 2.338217893874579, "grad_norm": 15.0625, "learning_rate": 2.931940678985514e-06, "loss": 1.122901439666748, "step": 12846 }, { "epoch": 2.338581960498771, "grad_norm": 63.5, "learning_rate": 2.9309577371331255e-06, "loss": 0.7378765344619751, "step": 12848 }, { "epoch": 2.3389460271229634, "grad_norm": 18.75, "learning_rate": 2.9299752456502517e-06, "loss": 1.4323711395263672, "step": 12850 }, { "epoch": 2.3393100937471556, "grad_norm": 40.25, "learning_rate": 2.928993204681068e-06, "loss": 1.6694300174713135, "step": 12852 }, { "epoch": 2.339674160371348, "grad_norm": 6.75, "learning_rate": 2.928011614369687e-06, "loss": 1.1885361671447754, "step": 12854 }, { "epoch": 2.3400382269955404, "grad_norm": 16.875, "learning_rate": 2.927030474860151e-06, "loss": 1.1138637065887451, "step": 12856 }, { "epoch": 2.340402293619732, "grad_norm": 19.5, "learning_rate": 2.92604978629644e-06, "loss": 1.1558057069778442, "step": 12858 }, { "epoch": 2.340766360243925, "grad_norm": 10.75, "learning_rate": 2.9250695488224646e-06, "loss": 1.2405270338058472, "step": 12860 }, { "epoch": 2.341130426868117, "grad_norm": 21.625, "learning_rate": 2.9240897625820713e-06, "loss": 1.6242928504943848, "step": 12862 }, { "epoch": 2.341494493492309, "grad_norm": 21.625, "learning_rate": 2.92311042771904e-06, "loss": 1.9920815229415894, "step": 12864 }, { "epoch": 2.3418585601165014, "grad_norm": 7.75, "learning_rate": 2.922131544377084e-06, "loss": 1.305228352546692, "step": 12866 }, { "epoch": 2.3422226267406936, "grad_norm": 13.5, "learning_rate": 2.92115311269985e-06, "loss": 1.2778526544570923, "step": 12868 }, { "epoch": 2.342586693364886, "grad_norm": 22.5, "learning_rate": 2.92017513283092e-06, "loss": 2.2170212268829346, "step": 12870 }, { "epoch": 2.342950759989078, "grad_norm": 31.625, "learning_rate": 2.9191976049138064e-06, "loss": 1.1426204442977905, "step": 12872 }, { "epoch": 2.34331482661327, "grad_norm": 26.625, "learning_rate": 2.9182205290919595e-06, "loss": 0.8540529608726501, "step": 12874 }, { "epoch": 2.3436788932374624, "grad_norm": 15.75, "learning_rate": 2.9172439055087616e-06, "loss": 1.4425444602966309, "step": 12876 }, { "epoch": 2.3440429598616546, "grad_norm": 23.25, "learning_rate": 2.916267734307526e-06, "loss": 1.4816980361938477, "step": 12878 }, { "epoch": 2.3444070264858468, "grad_norm": 20.25, "learning_rate": 2.9152920156315035e-06, "loss": 1.3841744661331177, "step": 12880 }, { "epoch": 2.344771093110039, "grad_norm": 21.75, "learning_rate": 2.9143167496238765e-06, "loss": 1.5072520971298218, "step": 12882 }, { "epoch": 2.345135159734231, "grad_norm": 7.03125, "learning_rate": 2.9133419364277616e-06, "loss": 1.3918832540512085, "step": 12884 }, { "epoch": 2.345499226358424, "grad_norm": 10.875, "learning_rate": 2.912367576186208e-06, "loss": 0.9455580711364746, "step": 12886 }, { "epoch": 2.345863292982616, "grad_norm": 22.125, "learning_rate": 2.9113936690421996e-06, "loss": 0.9071639180183411, "step": 12888 }, { "epoch": 2.346227359606808, "grad_norm": 5.4375, "learning_rate": 2.9104202151386537e-06, "loss": 0.741948127746582, "step": 12890 }, { "epoch": 2.3465914262310004, "grad_norm": 12.75, "learning_rate": 2.909447214618419e-06, "loss": 1.376150131225586, "step": 12892 }, { "epoch": 2.3469554928551926, "grad_norm": 21.25, "learning_rate": 2.908474667624282e-06, "loss": 1.4099239110946655, "step": 12894 }, { "epoch": 2.3473195594793848, "grad_norm": 16.75, "learning_rate": 2.907502574298958e-06, "loss": 1.299912691116333, "step": 12896 }, { "epoch": 2.347683626103577, "grad_norm": 29.75, "learning_rate": 2.906530934785099e-06, "loss": 1.2498507499694824, "step": 12898 }, { "epoch": 2.348047692727769, "grad_norm": 7.96875, "learning_rate": 2.9055597492252885e-06, "loss": 1.166631817817688, "step": 12900 }, { "epoch": 2.3484117593519613, "grad_norm": 3.046875, "learning_rate": 2.9045890177620433e-06, "loss": 0.6954485177993774, "step": 12902 }, { "epoch": 2.3487758259761535, "grad_norm": 6.5625, "learning_rate": 2.9036187405378158e-06, "loss": 1.3160232305526733, "step": 12904 }, { "epoch": 2.3491398926003457, "grad_norm": 4.90625, "learning_rate": 2.9026489176949895e-06, "loss": 1.0704166889190674, "step": 12906 }, { "epoch": 2.349503959224538, "grad_norm": 23.5, "learning_rate": 2.9016795493758822e-06, "loss": 1.2405421733856201, "step": 12908 }, { "epoch": 2.34986802584873, "grad_norm": 7.625, "learning_rate": 2.900710635722744e-06, "loss": 1.1511577367782593, "step": 12910 }, { "epoch": 2.3502320924729228, "grad_norm": 10.3125, "learning_rate": 2.89974217687776e-06, "loss": 0.9296000003814697, "step": 12912 }, { "epoch": 2.350596159097115, "grad_norm": 19.0, "learning_rate": 2.898774172983048e-06, "loss": 1.4404160976409912, "step": 12914 }, { "epoch": 2.350960225721307, "grad_norm": 7.96875, "learning_rate": 2.897806624180657e-06, "loss": 1.2417014837265015, "step": 12916 }, { "epoch": 2.3513242923454993, "grad_norm": 9.25, "learning_rate": 2.8968395306125725e-06, "loss": 1.2106091976165771, "step": 12918 }, { "epoch": 2.3516883589696915, "grad_norm": 10.875, "learning_rate": 2.8958728924207103e-06, "loss": 1.2392441034317017, "step": 12920 }, { "epoch": 2.3520524255938837, "grad_norm": 14.125, "learning_rate": 2.8949067097469214e-06, "loss": 1.4720377922058105, "step": 12922 }, { "epoch": 2.352416492218076, "grad_norm": 18.5, "learning_rate": 2.8939409827329894e-06, "loss": 1.6523391008377075, "step": 12924 }, { "epoch": 2.352780558842268, "grad_norm": 20.875, "learning_rate": 2.89297571152063e-06, "loss": 1.635360598564148, "step": 12926 }, { "epoch": 2.3531446254664603, "grad_norm": 27.75, "learning_rate": 2.8920108962514935e-06, "loss": 0.9188194870948792, "step": 12928 }, { "epoch": 2.3535086920906525, "grad_norm": 39.25, "learning_rate": 2.891046537067162e-06, "loss": 1.56831955909729, "step": 12930 }, { "epoch": 2.3538727587148447, "grad_norm": 15.1875, "learning_rate": 2.890082634109152e-06, "loss": 1.983557105064392, "step": 12932 }, { "epoch": 2.354236825339037, "grad_norm": 17.75, "learning_rate": 2.8891191875189117e-06, "loss": 1.4402353763580322, "step": 12934 }, { "epoch": 2.354600891963229, "grad_norm": 9.1875, "learning_rate": 2.8881561974378237e-06, "loss": 1.432793140411377, "step": 12936 }, { "epoch": 2.3549649585874217, "grad_norm": 12.75, "learning_rate": 2.8871936640072027e-06, "loss": 1.5808932781219482, "step": 12938 }, { "epoch": 2.355329025211614, "grad_norm": 14.6875, "learning_rate": 2.886231587368296e-06, "loss": 1.6627857685089111, "step": 12940 }, { "epoch": 2.355693091835806, "grad_norm": 9.5625, "learning_rate": 2.8852699676622855e-06, "loss": 1.4841033220291138, "step": 12942 }, { "epoch": 2.3560571584599983, "grad_norm": 16.75, "learning_rate": 2.8843088050302837e-06, "loss": 1.154237151145935, "step": 12944 }, { "epoch": 2.3564212250841905, "grad_norm": 22.375, "learning_rate": 2.8833480996133383e-06, "loss": 1.4320063591003418, "step": 12946 }, { "epoch": 2.3567852917083827, "grad_norm": 19.0, "learning_rate": 2.8823878515524283e-06, "loss": 1.3051279783248901, "step": 12948 }, { "epoch": 2.357149358332575, "grad_norm": 15.0625, "learning_rate": 2.8814280609884665e-06, "loss": 1.4349632263183594, "step": 12950 }, { "epoch": 2.357513424956767, "grad_norm": 11.9375, "learning_rate": 2.8804687280622983e-06, "loss": 1.6198092699050903, "step": 12952 }, { "epoch": 2.3578774915809593, "grad_norm": 10.0, "learning_rate": 2.879509852914702e-06, "loss": 1.2842026948928833, "step": 12954 }, { "epoch": 2.3582415582051515, "grad_norm": 9.8125, "learning_rate": 2.8785514356863893e-06, "loss": 1.5202780961990356, "step": 12956 }, { "epoch": 2.3586056248293437, "grad_norm": 6.5625, "learning_rate": 2.877593476518002e-06, "loss": 1.0736358165740967, "step": 12958 }, { "epoch": 2.358969691453536, "grad_norm": 10.75, "learning_rate": 2.876635975550119e-06, "loss": 0.4618733525276184, "step": 12960 }, { "epoch": 2.359333758077728, "grad_norm": 12.125, "learning_rate": 2.875678932923248e-06, "loss": 1.1793478727340698, "step": 12962 }, { "epoch": 2.3596978247019207, "grad_norm": 19.0, "learning_rate": 2.874722348777832e-06, "loss": 1.57537841796875, "step": 12964 }, { "epoch": 2.3600618913261124, "grad_norm": 62.0, "learning_rate": 2.873766223254246e-06, "loss": 1.095525860786438, "step": 12966 }, { "epoch": 2.360425957950305, "grad_norm": 14.9375, "learning_rate": 2.872810556492797e-06, "loss": 1.3439770936965942, "step": 12968 }, { "epoch": 2.3607900245744973, "grad_norm": 16.25, "learning_rate": 2.8718553486337253e-06, "loss": 1.428471326828003, "step": 12970 }, { "epoch": 2.3611540911986895, "grad_norm": 10.25, "learning_rate": 2.870900599817204e-06, "loss": 1.6733887195587158, "step": 12972 }, { "epoch": 2.3615181578228817, "grad_norm": 19.125, "learning_rate": 2.8699463101833385e-06, "loss": 1.5539997816085815, "step": 12974 }, { "epoch": 2.361882224447074, "grad_norm": 11.8125, "learning_rate": 2.8689924798721673e-06, "loss": 0.9679824113845825, "step": 12976 }, { "epoch": 2.362246291071266, "grad_norm": 23.625, "learning_rate": 2.868039109023661e-06, "loss": 1.359969139099121, "step": 12978 }, { "epoch": 2.3626103576954582, "grad_norm": 9.375, "learning_rate": 2.867086197777722e-06, "loss": 0.5674327611923218, "step": 12980 }, { "epoch": 2.3629744243196504, "grad_norm": 12.375, "learning_rate": 2.8661337462741873e-06, "loss": 1.3166778087615967, "step": 12982 }, { "epoch": 2.3633384909438426, "grad_norm": 58.25, "learning_rate": 2.865181754652825e-06, "loss": 1.7650721073150635, "step": 12984 }, { "epoch": 2.363702557568035, "grad_norm": 11.6875, "learning_rate": 2.864230223053335e-06, "loss": 1.4578721523284912, "step": 12986 }, { "epoch": 2.364066624192227, "grad_norm": 19.875, "learning_rate": 2.863279151615353e-06, "loss": 1.0696241855621338, "step": 12988 }, { "epoch": 2.364430690816419, "grad_norm": 18.0, "learning_rate": 2.862328540478443e-06, "loss": 0.8878389596939087, "step": 12990 }, { "epoch": 2.3647947574406114, "grad_norm": 19.125, "learning_rate": 2.8613783897821033e-06, "loss": 1.2449302673339844, "step": 12992 }, { "epoch": 2.365158824064804, "grad_norm": 3.03125, "learning_rate": 2.8604286996657656e-06, "loss": 1.0127277374267578, "step": 12994 }, { "epoch": 2.3655228906889962, "grad_norm": 9.4375, "learning_rate": 2.859479470268793e-06, "loss": 1.1745678186416626, "step": 12996 }, { "epoch": 2.3658869573131884, "grad_norm": 9.0, "learning_rate": 2.8585307017304796e-06, "loss": 1.353560447692871, "step": 12998 }, { "epoch": 2.3662510239373806, "grad_norm": 13.9375, "learning_rate": 2.857582394190055e-06, "loss": 1.301735520362854, "step": 13000 }, { "epoch": 2.366615090561573, "grad_norm": 8.625, "learning_rate": 2.8566345477866793e-06, "loss": 1.3826541900634766, "step": 13002 }, { "epoch": 2.366979157185765, "grad_norm": 10.375, "learning_rate": 2.8556871626594446e-06, "loss": 1.1823984384536743, "step": 13004 }, { "epoch": 2.367343223809957, "grad_norm": 21.625, "learning_rate": 2.854740238947376e-06, "loss": 1.398445725440979, "step": 13006 }, { "epoch": 2.3677072904341494, "grad_norm": 16.875, "learning_rate": 2.853793776789431e-06, "loss": 1.334977388381958, "step": 13008 }, { "epoch": 2.3680713570583416, "grad_norm": 16.5, "learning_rate": 2.8528477763244984e-06, "loss": 0.957625150680542, "step": 13010 }, { "epoch": 2.368435423682534, "grad_norm": 8.0625, "learning_rate": 2.8519022376913997e-06, "loss": 1.3189506530761719, "step": 13012 }, { "epoch": 2.368799490306726, "grad_norm": 8.75, "learning_rate": 2.8509571610288904e-06, "loss": 1.247552514076233, "step": 13014 }, { "epoch": 2.369163556930918, "grad_norm": 9.4375, "learning_rate": 2.850012546475656e-06, "loss": 1.2783420085906982, "step": 13016 }, { "epoch": 2.3695276235551104, "grad_norm": 13.5, "learning_rate": 2.8490683941703136e-06, "loss": 1.3923428058624268, "step": 13018 }, { "epoch": 2.369891690179303, "grad_norm": 19.875, "learning_rate": 2.848124704251416e-06, "loss": 1.3748270273208618, "step": 13020 }, { "epoch": 2.370255756803495, "grad_norm": 10.5625, "learning_rate": 2.8471814768574436e-06, "loss": 1.2959493398666382, "step": 13022 }, { "epoch": 2.3706198234276874, "grad_norm": 7.6875, "learning_rate": 2.846238712126812e-06, "loss": 1.1435142755508423, "step": 13024 }, { "epoch": 2.3709838900518796, "grad_norm": 8.0625, "learning_rate": 2.845296410197869e-06, "loss": 1.2150239944458008, "step": 13026 }, { "epoch": 2.371347956676072, "grad_norm": 8.0625, "learning_rate": 2.8443545712088935e-06, "loss": 1.36673104763031, "step": 13028 }, { "epoch": 2.371712023300264, "grad_norm": 14.9375, "learning_rate": 2.843413195298095e-06, "loss": 1.423221230506897, "step": 13030 }, { "epoch": 2.372076089924456, "grad_norm": 18.875, "learning_rate": 2.8424722826036176e-06, "loss": 1.5557280778884888, "step": 13032 }, { "epoch": 2.3724401565486484, "grad_norm": 19.625, "learning_rate": 2.8415318332635365e-06, "loss": 1.251732587814331, "step": 13034 }, { "epoch": 2.3728042231728406, "grad_norm": 12.875, "learning_rate": 2.840591847415859e-06, "loss": 1.5633962154388428, "step": 13036 }, { "epoch": 2.3731682897970328, "grad_norm": 5.71875, "learning_rate": 2.8396523251985236e-06, "loss": 1.179494857788086, "step": 13038 }, { "epoch": 2.373532356421225, "grad_norm": 11.125, "learning_rate": 2.8387132667494023e-06, "loss": 0.9931835532188416, "step": 13040 }, { "epoch": 2.373896423045417, "grad_norm": 9.75, "learning_rate": 2.8377746722062963e-06, "loss": 1.1571617126464844, "step": 13042 }, { "epoch": 2.3742604896696093, "grad_norm": 9.375, "learning_rate": 2.8368365417069426e-06, "loss": 1.4481487274169922, "step": 13044 }, { "epoch": 2.374624556293802, "grad_norm": 13.5625, "learning_rate": 2.835898875389007e-06, "loss": 1.3988542556762695, "step": 13046 }, { "epoch": 2.374988622917994, "grad_norm": 18.0, "learning_rate": 2.8349616733900885e-06, "loss": 1.7332851886749268, "step": 13048 }, { "epoch": 2.3753526895421864, "grad_norm": 13.8125, "learning_rate": 2.8340249358477184e-06, "loss": 1.6849112510681152, "step": 13050 }, { "epoch": 2.3757167561663786, "grad_norm": 24.625, "learning_rate": 2.8330886628993578e-06, "loss": 1.6017203330993652, "step": 13052 }, { "epoch": 2.3760808227905708, "grad_norm": 12.1875, "learning_rate": 2.8321528546824015e-06, "loss": 1.2707186937332153, "step": 13054 }, { "epoch": 2.376444889414763, "grad_norm": 34.5, "learning_rate": 2.8312175113341754e-06, "loss": 1.5900399684906006, "step": 13056 }, { "epoch": 2.376808956038955, "grad_norm": 10.5625, "learning_rate": 2.830282632991938e-06, "loss": 1.698850154876709, "step": 13058 }, { "epoch": 2.3771730226631473, "grad_norm": 9.6875, "learning_rate": 2.8293482197928777e-06, "loss": 1.4708998203277588, "step": 13060 }, { "epoch": 2.3775370892873395, "grad_norm": 15.75, "learning_rate": 2.8284142718741173e-06, "loss": 1.6085891723632812, "step": 13062 }, { "epoch": 2.3779011559115317, "grad_norm": 5.28125, "learning_rate": 2.8274807893727094e-06, "loss": 0.8812276124954224, "step": 13064 }, { "epoch": 2.378265222535724, "grad_norm": 9.875, "learning_rate": 2.8265477724256383e-06, "loss": 0.5617726445198059, "step": 13066 }, { "epoch": 2.378629289159916, "grad_norm": 10.375, "learning_rate": 2.8256152211698205e-06, "loss": 1.0298148393630981, "step": 13068 }, { "epoch": 2.3789933557841083, "grad_norm": 16.0, "learning_rate": 2.8246831357421044e-06, "loss": 1.0253523588180542, "step": 13070 }, { "epoch": 2.379357422408301, "grad_norm": 25.75, "learning_rate": 2.82375151627927e-06, "loss": 1.3209866285324097, "step": 13072 }, { "epoch": 2.3797214890324927, "grad_norm": 19.75, "learning_rate": 2.8228203629180286e-06, "loss": 1.3811129331588745, "step": 13074 }, { "epoch": 2.3800855556566853, "grad_norm": 36.75, "learning_rate": 2.821889675795022e-06, "loss": 1.3401545286178589, "step": 13076 }, { "epoch": 2.3804496222808775, "grad_norm": 13.5, "learning_rate": 2.8209594550468263e-06, "loss": 0.8718788623809814, "step": 13078 }, { "epoch": 2.3808136889050697, "grad_norm": 10.5, "learning_rate": 2.820029700809947e-06, "loss": 1.2428971529006958, "step": 13080 }, { "epoch": 2.381177755529262, "grad_norm": 11.0625, "learning_rate": 2.8191004132208214e-06, "loss": 1.7124323844909668, "step": 13082 }, { "epoch": 2.381541822153454, "grad_norm": 11.75, "learning_rate": 2.8181715924158197e-06, "loss": 1.5032646656036377, "step": 13084 }, { "epoch": 2.3819058887776463, "grad_norm": 6.03125, "learning_rate": 2.817243238531242e-06, "loss": 1.5273163318634033, "step": 13086 }, { "epoch": 2.3822699554018385, "grad_norm": 11.3125, "learning_rate": 2.81631535170332e-06, "loss": 1.294839859008789, "step": 13088 }, { "epoch": 2.3826340220260307, "grad_norm": 11.4375, "learning_rate": 2.815387932068218e-06, "loss": 1.3597090244293213, "step": 13090 }, { "epoch": 2.382998088650223, "grad_norm": 7.5, "learning_rate": 2.814460979762031e-06, "loss": 1.2180516719818115, "step": 13092 }, { "epoch": 2.383362155274415, "grad_norm": 8.6875, "learning_rate": 2.8135344949207856e-06, "loss": 0.9353842735290527, "step": 13094 }, { "epoch": 2.3837262218986073, "grad_norm": 14.875, "learning_rate": 2.8126084776804386e-06, "loss": 1.3736299276351929, "step": 13096 }, { "epoch": 2.3840902885228, "grad_norm": 10.75, "learning_rate": 2.811682928176881e-06, "loss": 1.4455559253692627, "step": 13098 }, { "epoch": 2.3844543551469917, "grad_norm": 6.46875, "learning_rate": 2.8107578465459322e-06, "loss": 1.0201411247253418, "step": 13100 }, { "epoch": 2.3848184217711843, "grad_norm": 6.46875, "learning_rate": 2.8098332329233447e-06, "loss": 1.0659406185150146, "step": 13102 }, { "epoch": 2.3851824883953765, "grad_norm": 17.125, "learning_rate": 2.8089090874448015e-06, "loss": 1.624879240989685, "step": 13104 }, { "epoch": 2.3855465550195687, "grad_norm": 9.4375, "learning_rate": 2.807985410245917e-06, "loss": 1.5225954055786133, "step": 13106 }, { "epoch": 2.385910621643761, "grad_norm": 14.5625, "learning_rate": 2.8070622014622384e-06, "loss": 1.5918283462524414, "step": 13108 }, { "epoch": 2.386274688267953, "grad_norm": 23.375, "learning_rate": 2.806139461229241e-06, "loss": 1.7529596090316772, "step": 13110 }, { "epoch": 2.3866387548921453, "grad_norm": 10.6875, "learning_rate": 2.8052171896823344e-06, "loss": 1.4478387832641602, "step": 13112 }, { "epoch": 2.3870028215163375, "grad_norm": 11.5, "learning_rate": 2.804295386956858e-06, "loss": 1.3970688581466675, "step": 13114 }, { "epoch": 2.3873668881405297, "grad_norm": 13.0, "learning_rate": 2.803374053188082e-06, "loss": 1.3684873580932617, "step": 13116 }, { "epoch": 2.387730954764722, "grad_norm": 4.21875, "learning_rate": 2.8024531885112092e-06, "loss": 1.0634028911590576, "step": 13118 }, { "epoch": 2.388095021388914, "grad_norm": 17.125, "learning_rate": 2.8015327930613727e-06, "loss": 1.5346989631652832, "step": 13120 }, { "epoch": 2.3884590880131062, "grad_norm": 18.125, "learning_rate": 2.8006128669736366e-06, "loss": 1.447345495223999, "step": 13122 }, { "epoch": 2.3888231546372984, "grad_norm": 10.8125, "learning_rate": 2.7996934103829966e-06, "loss": 1.4378901720046997, "step": 13124 }, { "epoch": 2.3891872212614906, "grad_norm": 20.625, "learning_rate": 2.798774423424378e-06, "loss": 1.7141553163528442, "step": 13126 }, { "epoch": 2.3895512878856833, "grad_norm": 12.0625, "learning_rate": 2.79785590623264e-06, "loss": 1.0675203800201416, "step": 13128 }, { "epoch": 2.3899153545098755, "grad_norm": 14.0, "learning_rate": 2.79693785894257e-06, "loss": 1.1264009475708008, "step": 13130 }, { "epoch": 2.3902794211340677, "grad_norm": 12.0, "learning_rate": 2.796020281688889e-06, "loss": 1.332797884941101, "step": 13132 }, { "epoch": 2.39064348775826, "grad_norm": 15.625, "learning_rate": 2.795103174606246e-06, "loss": 1.8389105796813965, "step": 13134 }, { "epoch": 2.391007554382452, "grad_norm": 9.25, "learning_rate": 2.7941865378292254e-06, "loss": 1.405497431755066, "step": 13136 }, { "epoch": 2.3913716210066442, "grad_norm": 8.125, "learning_rate": 2.7932703714923377e-06, "loss": 1.0936295986175537, "step": 13138 }, { "epoch": 2.3917356876308364, "grad_norm": 10.5, "learning_rate": 2.792354675730027e-06, "loss": 1.4086532592773438, "step": 13140 }, { "epoch": 2.3920997542550286, "grad_norm": 8.25, "learning_rate": 2.7914394506766678e-06, "loss": 1.3387991189956665, "step": 13142 }, { "epoch": 2.392463820879221, "grad_norm": 13.375, "learning_rate": 2.7905246964665665e-06, "loss": 1.2444496154785156, "step": 13144 }, { "epoch": 2.392827887503413, "grad_norm": 12.25, "learning_rate": 2.789610413233959e-06, "loss": 1.138127088546753, "step": 13146 }, { "epoch": 2.393191954127605, "grad_norm": 38.25, "learning_rate": 2.7886966011130136e-06, "loss": 1.9173109531402588, "step": 13148 }, { "epoch": 2.3935560207517974, "grad_norm": 25.0, "learning_rate": 2.787783260237826e-06, "loss": 1.6392672061920166, "step": 13150 }, { "epoch": 2.3939200873759896, "grad_norm": 15.625, "learning_rate": 2.786870390742429e-06, "loss": 1.1793363094329834, "step": 13152 }, { "epoch": 2.3942841540001822, "grad_norm": 18.5, "learning_rate": 2.7859579927607793e-06, "loss": 1.8120503425598145, "step": 13154 }, { "epoch": 2.3946482206243744, "grad_norm": 5.875, "learning_rate": 2.7850460664267687e-06, "loss": 1.0406832695007324, "step": 13156 }, { "epoch": 2.3950122872485666, "grad_norm": 15.125, "learning_rate": 2.784134611874219e-06, "loss": 1.3404755592346191, "step": 13158 }, { "epoch": 2.395376353872759, "grad_norm": 14.4375, "learning_rate": 2.7832236292368824e-06, "loss": 1.4649168252944946, "step": 13160 }, { "epoch": 2.395740420496951, "grad_norm": 9.8125, "learning_rate": 2.782313118648442e-06, "loss": 0.7475913166999817, "step": 13162 }, { "epoch": 2.396104487121143, "grad_norm": 70.0, "learning_rate": 2.7814030802425105e-06, "loss": 0.7028264999389648, "step": 13164 }, { "epoch": 2.3964685537453354, "grad_norm": 9.0625, "learning_rate": 2.780493514152634e-06, "loss": 1.376814603805542, "step": 13166 }, { "epoch": 2.3968326203695276, "grad_norm": 15.875, "learning_rate": 2.7795844205122866e-06, "loss": 1.488100528717041, "step": 13168 }, { "epoch": 2.39719668699372, "grad_norm": 13.75, "learning_rate": 2.7786757994548742e-06, "loss": 1.8152649402618408, "step": 13170 }, { "epoch": 2.397560753617912, "grad_norm": 25.625, "learning_rate": 2.777767651113733e-06, "loss": 1.6000306606292725, "step": 13172 }, { "epoch": 2.397924820242104, "grad_norm": 17.25, "learning_rate": 2.7768599756221303e-06, "loss": 0.8882613778114319, "step": 13174 }, { "epoch": 2.3982888868662964, "grad_norm": 11.3125, "learning_rate": 2.7759527731132647e-06, "loss": 1.4401156902313232, "step": 13176 }, { "epoch": 2.3986529534904886, "grad_norm": 26.125, "learning_rate": 2.775046043720263e-06, "loss": 1.213514804840088, "step": 13178 }, { "epoch": 2.399017020114681, "grad_norm": 4.625, "learning_rate": 2.7741397875761855e-06, "loss": 1.1056969165802002, "step": 13180 }, { "epoch": 2.3993810867388734, "grad_norm": 40.75, "learning_rate": 2.7732340048140203e-06, "loss": 1.2660188674926758, "step": 13182 }, { "epoch": 2.3997451533630656, "grad_norm": 47.75, "learning_rate": 2.7723286955666885e-06, "loss": 1.5601726770401, "step": 13184 }, { "epoch": 2.400109219987258, "grad_norm": 10.125, "learning_rate": 2.7714238599670394e-06, "loss": 0.6036195158958435, "step": 13186 }, { "epoch": 2.40047328661145, "grad_norm": 12.125, "learning_rate": 2.7705194981478545e-06, "loss": 1.4248721599578857, "step": 13188 }, { "epoch": 2.400837353235642, "grad_norm": 16.25, "learning_rate": 2.769615610241846e-06, "loss": 1.3486409187316895, "step": 13190 }, { "epoch": 2.4012014198598344, "grad_norm": 15.375, "learning_rate": 2.768712196381655e-06, "loss": 1.1833536624908447, "step": 13192 }, { "epoch": 2.4015654864840266, "grad_norm": 3.453125, "learning_rate": 2.767809256699854e-06, "loss": 1.285017490386963, "step": 13194 }, { "epoch": 2.4019295531082188, "grad_norm": 9.125, "learning_rate": 2.7669067913289447e-06, "loss": 1.1078941822052002, "step": 13196 }, { "epoch": 2.402293619732411, "grad_norm": 10.0625, "learning_rate": 2.766004800401362e-06, "loss": 1.3018831014633179, "step": 13198 }, { "epoch": 2.402657686356603, "grad_norm": 11.625, "learning_rate": 2.7651032840494685e-06, "loss": 1.4734562635421753, "step": 13200 }, { "epoch": 2.4030217529807953, "grad_norm": 16.875, "learning_rate": 2.764202242405558e-06, "loss": 1.6767512559890747, "step": 13202 }, { "epoch": 2.4033858196049875, "grad_norm": 14.1875, "learning_rate": 2.7633016756018547e-06, "loss": 1.8229765892028809, "step": 13204 }, { "epoch": 2.40374988622918, "grad_norm": 22.625, "learning_rate": 2.7624015837705136e-06, "loss": 1.9370911121368408, "step": 13206 }, { "epoch": 2.404113952853372, "grad_norm": 14.8125, "learning_rate": 2.7615019670436194e-06, "loss": 1.4044655561447144, "step": 13208 }, { "epoch": 2.4044780194775646, "grad_norm": 14.0, "learning_rate": 2.760602825553187e-06, "loss": 1.4396326541900635, "step": 13210 }, { "epoch": 2.4048420861017568, "grad_norm": 6.46875, "learning_rate": 2.7597041594311618e-06, "loss": 1.2416374683380127, "step": 13212 }, { "epoch": 2.405206152725949, "grad_norm": 10.75, "learning_rate": 2.7588059688094194e-06, "loss": 1.2069329023361206, "step": 13214 }, { "epoch": 2.405570219350141, "grad_norm": 87.0, "learning_rate": 2.7579082538197653e-06, "loss": 1.2373838424682617, "step": 13216 }, { "epoch": 2.4059342859743333, "grad_norm": 4.9375, "learning_rate": 2.7570110145939365e-06, "loss": 1.113019347190857, "step": 13218 }, { "epoch": 2.4062983525985255, "grad_norm": 9.9375, "learning_rate": 2.756114251263598e-06, "loss": 1.4458307027816772, "step": 13220 }, { "epoch": 2.4066624192227177, "grad_norm": 24.875, "learning_rate": 2.7552179639603477e-06, "loss": 1.103837251663208, "step": 13222 }, { "epoch": 2.40702648584691, "grad_norm": 12.1875, "learning_rate": 2.7543221528157104e-06, "loss": 1.5726428031921387, "step": 13224 }, { "epoch": 2.407390552471102, "grad_norm": 8.4375, "learning_rate": 2.753426817961144e-06, "loss": 1.418402910232544, "step": 13226 }, { "epoch": 2.4077546190952943, "grad_norm": 11.75, "learning_rate": 2.7525319595280347e-06, "loss": 1.3836729526519775, "step": 13228 }, { "epoch": 2.4081186857194865, "grad_norm": 12.6875, "learning_rate": 2.7516375776476993e-06, "loss": 1.3468658924102783, "step": 13230 }, { "epoch": 2.4084827523436787, "grad_norm": 10.375, "learning_rate": 2.7507436724513853e-06, "loss": 1.330183982849121, "step": 13232 }, { "epoch": 2.408846818967871, "grad_norm": 16.375, "learning_rate": 2.749850244070269e-06, "loss": 1.5821893215179443, "step": 13234 }, { "epoch": 2.4092108855920635, "grad_norm": 64.5, "learning_rate": 2.748957292635458e-06, "loss": 1.578415870666504, "step": 13236 }, { "epoch": 2.4095749522162557, "grad_norm": 17.75, "learning_rate": 2.748064818277989e-06, "loss": 1.363551139831543, "step": 13238 }, { "epoch": 2.409939018840448, "grad_norm": 10.875, "learning_rate": 2.7471728211288283e-06, "loss": 1.07505464553833, "step": 13240 }, { "epoch": 2.41030308546464, "grad_norm": 9.625, "learning_rate": 2.7462813013188746e-06, "loss": 1.5999397039413452, "step": 13242 }, { "epoch": 2.4106671520888323, "grad_norm": 20.25, "learning_rate": 2.745390258978953e-06, "loss": 1.5222870111465454, "step": 13244 }, { "epoch": 2.4110312187130245, "grad_norm": 14.25, "learning_rate": 2.744499694239821e-06, "loss": 1.3627160787582397, "step": 13246 }, { "epoch": 2.4113952853372167, "grad_norm": 19.75, "learning_rate": 2.743609607232166e-06, "loss": 1.7409682273864746, "step": 13248 }, { "epoch": 2.411759351961409, "grad_norm": 14.3125, "learning_rate": 2.7427199980866035e-06, "loss": 1.3861503601074219, "step": 13250 }, { "epoch": 2.412123418585601, "grad_norm": 10.5, "learning_rate": 2.741830866933681e-06, "loss": 1.1757136583328247, "step": 13252 }, { "epoch": 2.4124874852097933, "grad_norm": 13.0625, "learning_rate": 2.740942213903875e-06, "loss": 1.7118579149246216, "step": 13254 }, { "epoch": 2.4128515518339855, "grad_norm": 6.6875, "learning_rate": 2.7400540391275908e-06, "loss": 1.3312727212905884, "step": 13256 }, { "epoch": 2.4132156184581777, "grad_norm": 30.375, "learning_rate": 2.7391663427351658e-06, "loss": 1.226712703704834, "step": 13258 }, { "epoch": 2.41357968508237, "grad_norm": 13.1875, "learning_rate": 2.7382791248568642e-06, "loss": 1.4244717359542847, "step": 13260 }, { "epoch": 2.4139437517065625, "grad_norm": 12.4375, "learning_rate": 2.7373923856228822e-06, "loss": 1.6039049625396729, "step": 13262 }, { "epoch": 2.4143078183307547, "grad_norm": 14.625, "learning_rate": 2.7365061251633457e-06, "loss": 1.6134241819381714, "step": 13264 }, { "epoch": 2.414671884954947, "grad_norm": 9.3125, "learning_rate": 2.7356203436083093e-06, "loss": 1.1808816194534302, "step": 13266 }, { "epoch": 2.415035951579139, "grad_norm": 42.25, "learning_rate": 2.734735041087759e-06, "loss": 1.2498751878738403, "step": 13268 }, { "epoch": 2.4154000182033313, "grad_norm": 11.3125, "learning_rate": 2.7338502177316077e-06, "loss": 1.4044361114501953, "step": 13270 }, { "epoch": 2.4157640848275235, "grad_norm": 30.75, "learning_rate": 2.7329658736697008e-06, "loss": 1.59952712059021, "step": 13272 }, { "epoch": 2.4161281514517157, "grad_norm": 21.875, "learning_rate": 2.732082009031812e-06, "loss": 1.2319375276565552, "step": 13274 }, { "epoch": 2.416492218075908, "grad_norm": 14.3125, "learning_rate": 2.731198623947644e-06, "loss": 1.3763971328735352, "step": 13276 }, { "epoch": 2.4168562847001, "grad_norm": 5.21875, "learning_rate": 2.730315718546831e-06, "loss": 1.3468014001846313, "step": 13278 }, { "epoch": 2.4172203513242922, "grad_norm": 7.28125, "learning_rate": 2.729433292958935e-06, "loss": 1.0505539178848267, "step": 13280 }, { "epoch": 2.4175844179484844, "grad_norm": 9.1875, "learning_rate": 2.7285513473134494e-06, "loss": 1.338356375694275, "step": 13282 }, { "epoch": 2.4179484845726766, "grad_norm": 6.15625, "learning_rate": 2.7276698817397953e-06, "loss": 1.280744194984436, "step": 13284 }, { "epoch": 2.418312551196869, "grad_norm": 3.8125, "learning_rate": 2.7267888963673246e-06, "loss": 1.2489186525344849, "step": 13286 }, { "epoch": 2.4186766178210615, "grad_norm": 12.0625, "learning_rate": 2.7259083913253183e-06, "loss": 1.3006317615509033, "step": 13288 }, { "epoch": 2.4190406844452537, "grad_norm": 16.0, "learning_rate": 2.725028366742986e-06, "loss": 1.4647231101989746, "step": 13290 }, { "epoch": 2.419404751069446, "grad_norm": 23.25, "learning_rate": 2.7241488227494693e-06, "loss": 1.6237925291061401, "step": 13292 }, { "epoch": 2.419768817693638, "grad_norm": 12.9375, "learning_rate": 2.7232697594738365e-06, "loss": 1.3615989685058594, "step": 13294 }, { "epoch": 2.4201328843178302, "grad_norm": 15.625, "learning_rate": 2.7223911770450876e-06, "loss": 1.3674650192260742, "step": 13296 }, { "epoch": 2.4204969509420224, "grad_norm": 6.96875, "learning_rate": 2.7215130755921504e-06, "loss": 1.2055695056915283, "step": 13298 }, { "epoch": 2.4208610175662146, "grad_norm": 11.8125, "learning_rate": 2.720635455243883e-06, "loss": 0.9048939347267151, "step": 13300 }, { "epoch": 2.421225084190407, "grad_norm": 9.4375, "learning_rate": 2.719758316129072e-06, "loss": 1.0693939924240112, "step": 13302 }, { "epoch": 2.421589150814599, "grad_norm": 35.75, "learning_rate": 2.718881658376435e-06, "loss": 1.3081045150756836, "step": 13304 }, { "epoch": 2.421953217438791, "grad_norm": 25.625, "learning_rate": 2.718005482114617e-06, "loss": 0.558391273021698, "step": 13306 }, { "epoch": 2.4223172840629834, "grad_norm": 17.25, "learning_rate": 2.7171297874721937e-06, "loss": 1.3600088357925415, "step": 13308 }, { "epoch": 2.4226813506871756, "grad_norm": 6.53125, "learning_rate": 2.7162545745776696e-06, "loss": 1.2515337467193604, "step": 13310 }, { "epoch": 2.423045417311368, "grad_norm": 22.25, "learning_rate": 2.715379843559479e-06, "loss": 0.9471852779388428, "step": 13312 }, { "epoch": 2.4234094839355604, "grad_norm": 118.0, "learning_rate": 2.7145055945459853e-06, "loss": 1.4826947450637817, "step": 13314 }, { "epoch": 2.423773550559752, "grad_norm": 11.5, "learning_rate": 2.7136318276654804e-06, "loss": 1.686980128288269, "step": 13316 }, { "epoch": 2.424137617183945, "grad_norm": 13.375, "learning_rate": 2.7127585430461863e-06, "loss": 1.0486658811569214, "step": 13318 }, { "epoch": 2.424501683808137, "grad_norm": 7.90625, "learning_rate": 2.711885740816254e-06, "loss": 1.0649824142456055, "step": 13320 }, { "epoch": 2.424865750432329, "grad_norm": 14.9375, "learning_rate": 2.7110134211037635e-06, "loss": 1.705924391746521, "step": 13322 }, { "epoch": 2.4252298170565214, "grad_norm": 13.5625, "learning_rate": 2.710141584036725e-06, "loss": 1.4834460020065308, "step": 13324 }, { "epoch": 2.4255938836807136, "grad_norm": 19.25, "learning_rate": 2.7092702297430757e-06, "loss": 1.5113946199417114, "step": 13326 }, { "epoch": 2.425957950304906, "grad_norm": 24.0, "learning_rate": 2.708399358350684e-06, "loss": 0.6128663420677185, "step": 13328 }, { "epoch": 2.426322016929098, "grad_norm": 33.25, "learning_rate": 2.7075289699873476e-06, "loss": 0.42841148376464844, "step": 13330 }, { "epoch": 2.42668608355329, "grad_norm": 36.0, "learning_rate": 2.7066590647807907e-06, "loss": 0.8327840566635132, "step": 13332 }, { "epoch": 2.4270501501774824, "grad_norm": 10.625, "learning_rate": 2.7057896428586694e-06, "loss": 1.3326199054718018, "step": 13334 }, { "epoch": 2.4274142168016746, "grad_norm": 10.5625, "learning_rate": 2.7049207043485683e-06, "loss": 1.5825438499450684, "step": 13336 }, { "epoch": 2.4277782834258668, "grad_norm": 9.125, "learning_rate": 2.704052249378e-06, "loss": 1.827541470527649, "step": 13338 }, { "epoch": 2.4281423500500594, "grad_norm": 14.0, "learning_rate": 2.7031842780744065e-06, "loss": 1.390425682067871, "step": 13340 }, { "epoch": 2.428506416674251, "grad_norm": 19.625, "learning_rate": 2.702316790565159e-06, "loss": 1.4069594144821167, "step": 13342 }, { "epoch": 2.428870483298444, "grad_norm": 11.3125, "learning_rate": 2.701449786977559e-06, "loss": 1.091294765472412, "step": 13344 }, { "epoch": 2.429234549922636, "grad_norm": 5.59375, "learning_rate": 2.7005832674388342e-06, "loss": 0.9225186109542847, "step": 13346 }, { "epoch": 2.429598616546828, "grad_norm": 8.125, "learning_rate": 2.6997172320761445e-06, "loss": 1.338148593902588, "step": 13348 }, { "epoch": 2.4299626831710204, "grad_norm": 8.0625, "learning_rate": 2.698851681016575e-06, "loss": 1.3794116973876953, "step": 13350 }, { "epoch": 2.4303267497952126, "grad_norm": 6.59375, "learning_rate": 2.697986614387143e-06, "loss": 1.2935271263122559, "step": 13352 }, { "epoch": 2.4306908164194048, "grad_norm": 7.09375, "learning_rate": 2.697122032314794e-06, "loss": 1.3203860521316528, "step": 13354 }, { "epoch": 2.431054883043597, "grad_norm": 8.875, "learning_rate": 2.696257934926401e-06, "loss": 1.3398065567016602, "step": 13356 }, { "epoch": 2.431418949667789, "grad_norm": 6.15625, "learning_rate": 2.695394322348768e-06, "loss": 1.2970964908599854, "step": 13358 }, { "epoch": 2.4317830162919813, "grad_norm": 8.5, "learning_rate": 2.694531194708625e-06, "loss": 1.4082047939300537, "step": 13360 }, { "epoch": 2.4321470829161735, "grad_norm": 8.5625, "learning_rate": 2.6936685521326335e-06, "loss": 1.0450191497802734, "step": 13362 }, { "epoch": 2.4325111495403657, "grad_norm": 8.875, "learning_rate": 2.6928063947473825e-06, "loss": 0.38913848996162415, "step": 13364 }, { "epoch": 2.432875216164558, "grad_norm": 24.125, "learning_rate": 2.6919447226793897e-06, "loss": 1.2759171724319458, "step": 13366 }, { "epoch": 2.43323928278875, "grad_norm": 4.40625, "learning_rate": 2.6910835360551037e-06, "loss": 1.3626065254211426, "step": 13368 }, { "epoch": 2.4336033494129428, "grad_norm": 13.8125, "learning_rate": 2.6902228350008975e-06, "loss": 1.3550106287002563, "step": 13370 }, { "epoch": 2.433967416037135, "grad_norm": 11.25, "learning_rate": 2.6893626196430776e-06, "loss": 1.5025393962860107, "step": 13372 }, { "epoch": 2.434331482661327, "grad_norm": 7.375, "learning_rate": 2.6885028901078763e-06, "loss": 1.3633432388305664, "step": 13374 }, { "epoch": 2.4346955492855193, "grad_norm": 14.625, "learning_rate": 2.687643646521456e-06, "loss": 1.3471213579177856, "step": 13376 }, { "epoch": 2.4350596159097115, "grad_norm": 26.25, "learning_rate": 2.6867848890099064e-06, "loss": 1.6405763626098633, "step": 13378 }, { "epoch": 2.4354236825339037, "grad_norm": 10.5, "learning_rate": 2.685926617699247e-06, "loss": 1.96724271774292, "step": 13380 }, { "epoch": 2.435787749158096, "grad_norm": 21.0, "learning_rate": 2.6850688327154256e-06, "loss": 1.5573025941848755, "step": 13382 }, { "epoch": 2.436151815782288, "grad_norm": 32.25, "learning_rate": 2.684211534184319e-06, "loss": 1.3523973226547241, "step": 13384 }, { "epoch": 2.4365158824064803, "grad_norm": 22.875, "learning_rate": 2.683354722231732e-06, "loss": 1.4019713401794434, "step": 13386 }, { "epoch": 2.4368799490306725, "grad_norm": 25.5, "learning_rate": 2.6824983969833983e-06, "loss": 1.538862943649292, "step": 13388 }, { "epoch": 2.4372440156548647, "grad_norm": 19.5, "learning_rate": 2.68164255856498e-06, "loss": 1.3116371631622314, "step": 13390 }, { "epoch": 2.437608082279057, "grad_norm": 20.25, "learning_rate": 2.6807872071020684e-06, "loss": 1.4484238624572754, "step": 13392 }, { "epoch": 2.437972148903249, "grad_norm": 24.875, "learning_rate": 2.6799323427201823e-06, "loss": 1.6456549167633057, "step": 13394 }, { "epoch": 2.4383362155274417, "grad_norm": 3.5625, "learning_rate": 2.67907796554477e-06, "loss": 1.0868059396743774, "step": 13396 }, { "epoch": 2.438700282151634, "grad_norm": 14.5625, "learning_rate": 2.678224075701208e-06, "loss": 1.248133897781372, "step": 13398 }, { "epoch": 2.439064348775826, "grad_norm": 16.0, "learning_rate": 2.677370673314801e-06, "loss": 1.6528799533843994, "step": 13400 }, { "epoch": 2.4394284154000183, "grad_norm": 12.75, "learning_rate": 2.6765177585107816e-06, "loss": 1.24089777469635, "step": 13402 }, { "epoch": 2.4397924820242105, "grad_norm": 18.0, "learning_rate": 2.6756653314143124e-06, "loss": 1.7661129236221313, "step": 13404 }, { "epoch": 2.4401565486484027, "grad_norm": 19.625, "learning_rate": 2.674813392150484e-06, "loss": 1.2290040254592896, "step": 13406 }, { "epoch": 2.440520615272595, "grad_norm": 4.8125, "learning_rate": 2.673961940844314e-06, "loss": 0.9780008792877197, "step": 13408 }, { "epoch": 2.440884681896787, "grad_norm": 17.75, "learning_rate": 2.67311097762075e-06, "loss": 2.018935203552246, "step": 13410 }, { "epoch": 2.4412487485209793, "grad_norm": 11.6875, "learning_rate": 2.672260502604667e-06, "loss": 1.2240355014801025, "step": 13412 }, { "epoch": 2.4416128151451715, "grad_norm": 7.9375, "learning_rate": 2.6714105159208693e-06, "loss": 1.2782444953918457, "step": 13414 }, { "epoch": 2.4419768817693637, "grad_norm": 8.875, "learning_rate": 2.6705610176940887e-06, "loss": 1.0177124738693237, "step": 13416 }, { "epoch": 2.442340948393556, "grad_norm": 10.8125, "learning_rate": 2.6697120080489864e-06, "loss": 1.7586997747421265, "step": 13418 }, { "epoch": 2.442705015017748, "grad_norm": 10.5625, "learning_rate": 2.66886348711015e-06, "loss": 1.1449360847473145, "step": 13420 }, { "epoch": 2.4430690816419407, "grad_norm": 6.90625, "learning_rate": 2.6680154550020972e-06, "loss": 1.1197320222854614, "step": 13422 }, { "epoch": 2.443433148266133, "grad_norm": 14.5, "learning_rate": 2.6671679118492727e-06, "loss": 1.4658730030059814, "step": 13424 }, { "epoch": 2.443797214890325, "grad_norm": 73.0, "learning_rate": 2.6663208577760503e-06, "loss": 1.6675937175750732, "step": 13426 }, { "epoch": 2.4441612815145173, "grad_norm": 18.5, "learning_rate": 2.665474292906732e-06, "loss": 1.6271227598190308, "step": 13428 }, { "epoch": 2.4445253481387095, "grad_norm": 10.625, "learning_rate": 2.6646282173655473e-06, "loss": 1.274809718132019, "step": 13430 }, { "epoch": 2.4448894147629017, "grad_norm": 65.0, "learning_rate": 2.6637826312766545e-06, "loss": 1.2878271341323853, "step": 13432 }, { "epoch": 2.445253481387094, "grad_norm": 6.40625, "learning_rate": 2.6629375347641406e-06, "loss": 1.2144771814346313, "step": 13434 }, { "epoch": 2.445617548011286, "grad_norm": 5.71875, "learning_rate": 2.66209292795202e-06, "loss": 1.2489968538284302, "step": 13436 }, { "epoch": 2.4459816146354783, "grad_norm": 13.375, "learning_rate": 2.6612488109642338e-06, "loss": 1.2636427879333496, "step": 13438 }, { "epoch": 2.4463456812596704, "grad_norm": 21.875, "learning_rate": 2.660405183924654e-06, "loss": 2.0725436210632324, "step": 13440 }, { "epoch": 2.4467097478838626, "grad_norm": 22.75, "learning_rate": 2.65956204695708e-06, "loss": 1.2559139728546143, "step": 13442 }, { "epoch": 2.447073814508055, "grad_norm": 8.4375, "learning_rate": 2.658719400185237e-06, "loss": 1.4041410684585571, "step": 13444 }, { "epoch": 2.447437881132247, "grad_norm": 63.75, "learning_rate": 2.6578772437327815e-06, "loss": 1.3121445178985596, "step": 13446 }, { "epoch": 2.4478019477564397, "grad_norm": 11.4375, "learning_rate": 2.6570355777232966e-06, "loss": 1.407128095626831, "step": 13448 }, { "epoch": 2.4481660143806314, "grad_norm": 12.25, "learning_rate": 2.656194402280292e-06, "loss": 1.1246237754821777, "step": 13450 }, { "epoch": 2.448530081004824, "grad_norm": 29.0, "learning_rate": 2.6553537175272074e-06, "loss": 1.1758387088775635, "step": 13452 }, { "epoch": 2.4488941476290162, "grad_norm": 12.8125, "learning_rate": 2.6545135235874108e-06, "loss": 1.7648210525512695, "step": 13454 }, { "epoch": 2.4492582142532084, "grad_norm": 5.34375, "learning_rate": 2.653673820584196e-06, "loss": 1.1780683994293213, "step": 13456 }, { "epoch": 2.4496222808774006, "grad_norm": 7.1875, "learning_rate": 2.6528346086407868e-06, "loss": 1.2844581604003906, "step": 13458 }, { "epoch": 2.449986347501593, "grad_norm": 14.75, "learning_rate": 2.6519958878803342e-06, "loss": 1.333465576171875, "step": 13460 }, { "epoch": 2.450350414125785, "grad_norm": 26.75, "learning_rate": 2.651157658425916e-06, "loss": 1.3125548362731934, "step": 13462 }, { "epoch": 2.450714480749977, "grad_norm": 30.75, "learning_rate": 2.650319920400541e-06, "loss": 1.4113140106201172, "step": 13464 }, { "epoch": 2.4510785473741694, "grad_norm": 15.125, "learning_rate": 2.649482673927142e-06, "loss": 1.3063167333602905, "step": 13466 }, { "epoch": 2.4514426139983616, "grad_norm": 10.3125, "learning_rate": 2.6486459191285815e-06, "loss": 1.2190064191818237, "step": 13468 }, { "epoch": 2.451806680622554, "grad_norm": 17.0, "learning_rate": 2.647809656127651e-06, "loss": 1.4076989889144897, "step": 13470 }, { "epoch": 2.452170747246746, "grad_norm": 12.625, "learning_rate": 2.646973885047068e-06, "loss": 1.3949708938598633, "step": 13472 }, { "epoch": 2.452534813870938, "grad_norm": 9.25, "learning_rate": 2.6461386060094796e-06, "loss": 1.10963773727417, "step": 13474 }, { "epoch": 2.4528988804951304, "grad_norm": 5.875, "learning_rate": 2.645303819137458e-06, "loss": 1.3966336250305176, "step": 13476 }, { "epoch": 2.453262947119323, "grad_norm": 24.0, "learning_rate": 2.6444695245535058e-06, "loss": 1.0470508337020874, "step": 13478 }, { "epoch": 2.453627013743515, "grad_norm": 10.0625, "learning_rate": 2.643635722380052e-06, "loss": 1.3997278213500977, "step": 13480 }, { "epoch": 2.4539910803677074, "grad_norm": 12.4375, "learning_rate": 2.6428024127394536e-06, "loss": 0.9530977010726929, "step": 13482 }, { "epoch": 2.4543551469918996, "grad_norm": 28.5, "learning_rate": 2.641969595753996e-06, "loss": 0.47040119767189026, "step": 13484 }, { "epoch": 2.454719213616092, "grad_norm": 12.6875, "learning_rate": 2.6411372715458905e-06, "loss": 1.276247501373291, "step": 13486 }, { "epoch": 2.455083280240284, "grad_norm": 19.5, "learning_rate": 2.640305440237279e-06, "loss": 1.4224942922592163, "step": 13488 }, { "epoch": 2.455447346864476, "grad_norm": 9.125, "learning_rate": 2.6394741019502285e-06, "loss": 1.6374298334121704, "step": 13490 }, { "epoch": 2.4558114134886684, "grad_norm": 5.4375, "learning_rate": 2.6386432568067343e-06, "loss": 1.1236530542373657, "step": 13492 }, { "epoch": 2.4561754801128606, "grad_norm": 18.0, "learning_rate": 2.6378129049287193e-06, "loss": 1.5757429599761963, "step": 13494 }, { "epoch": 2.4565395467370528, "grad_norm": 10.1875, "learning_rate": 2.636983046438035e-06, "loss": 1.9966856241226196, "step": 13496 }, { "epoch": 2.456903613361245, "grad_norm": 47.25, "learning_rate": 2.63615368145646e-06, "loss": 1.3129832744598389, "step": 13498 }, { "epoch": 2.457267679985437, "grad_norm": 33.25, "learning_rate": 2.6353248101056995e-06, "loss": 1.5786428451538086, "step": 13500 }, { "epoch": 2.4576317466096294, "grad_norm": 21.375, "learning_rate": 2.6344964325073873e-06, "loss": 1.2999707460403442, "step": 13502 }, { "epoch": 2.457995813233822, "grad_norm": 6.625, "learning_rate": 2.633668548783084e-06, "loss": 1.1972272396087646, "step": 13504 }, { "epoch": 2.458359879858014, "grad_norm": 5.3125, "learning_rate": 2.6328411590542795e-06, "loss": 1.1587053537368774, "step": 13506 }, { "epoch": 2.4587239464822064, "grad_norm": 7.46875, "learning_rate": 2.6320142634423885e-06, "loss": 1.5507616996765137, "step": 13508 }, { "epoch": 2.4590880131063986, "grad_norm": 11.4375, "learning_rate": 2.6311878620687546e-06, "loss": 1.3037623167037964, "step": 13510 }, { "epoch": 2.4594520797305908, "grad_norm": 5.84375, "learning_rate": 2.63036195505465e-06, "loss": 1.182337760925293, "step": 13512 }, { "epoch": 2.459816146354783, "grad_norm": 17.625, "learning_rate": 2.6295365425212727e-06, "loss": 1.3224929571151733, "step": 13514 }, { "epoch": 2.460180212978975, "grad_norm": 13.1875, "learning_rate": 2.628711624589748e-06, "loss": 1.0804643630981445, "step": 13516 }, { "epoch": 2.4605442796031674, "grad_norm": 13.5625, "learning_rate": 2.6278872013811296e-06, "loss": 1.0033141374588013, "step": 13518 }, { "epoch": 2.4609083462273595, "grad_norm": 15.625, "learning_rate": 2.6270632730163993e-06, "loss": 1.5021710395812988, "step": 13520 }, { "epoch": 2.4612724128515517, "grad_norm": 12.3125, "learning_rate": 2.6262398396164635e-06, "loss": 1.5142216682434082, "step": 13522 }, { "epoch": 2.461636479475744, "grad_norm": 9.875, "learning_rate": 2.6254169013021584e-06, "loss": 1.4393287897109985, "step": 13524 }, { "epoch": 2.462000546099936, "grad_norm": 5.59375, "learning_rate": 2.6245944581942478e-06, "loss": 1.422282099723816, "step": 13526 }, { "epoch": 2.4623646127241283, "grad_norm": 6.15625, "learning_rate": 2.623772510413421e-06, "loss": 1.2538256645202637, "step": 13528 }, { "epoch": 2.462728679348321, "grad_norm": 17.125, "learning_rate": 2.622951058080296e-06, "loss": 1.4100571870803833, "step": 13530 }, { "epoch": 2.463092745972513, "grad_norm": 10.125, "learning_rate": 2.6221301013154165e-06, "loss": 1.3595296144485474, "step": 13532 }, { "epoch": 2.4634568125967053, "grad_norm": 12.9375, "learning_rate": 2.621309640239256e-06, "loss": 1.3561580181121826, "step": 13534 }, { "epoch": 2.4638208792208975, "grad_norm": 12.0, "learning_rate": 2.620489674972212e-06, "loss": 1.3407347202301025, "step": 13536 }, { "epoch": 2.4641849458450897, "grad_norm": 19.875, "learning_rate": 2.619670205634613e-06, "loss": 1.353811264038086, "step": 13538 }, { "epoch": 2.464549012469282, "grad_norm": 5.25, "learning_rate": 2.618851232346712e-06, "loss": 1.2983731031417847, "step": 13540 }, { "epoch": 2.464913079093474, "grad_norm": 5.78125, "learning_rate": 2.61803275522869e-06, "loss": 1.2819554805755615, "step": 13542 }, { "epoch": 2.4652771457176663, "grad_norm": 18.125, "learning_rate": 2.617214774400656e-06, "loss": 1.2022639513015747, "step": 13544 }, { "epoch": 2.4656412123418585, "grad_norm": 18.125, "learning_rate": 2.6163972899826436e-06, "loss": 2.0331835746765137, "step": 13546 }, { "epoch": 2.4660052789660507, "grad_norm": 8.9375, "learning_rate": 2.6155803020946164e-06, "loss": 1.044741153717041, "step": 13548 }, { "epoch": 2.466369345590243, "grad_norm": 10.4375, "learning_rate": 2.6147638108564644e-06, "loss": 1.3638688325881958, "step": 13550 }, { "epoch": 2.466733412214435, "grad_norm": 5.34375, "learning_rate": 2.613947816388004e-06, "loss": 1.2540897130966187, "step": 13552 }, { "epoch": 2.4670974788386273, "grad_norm": 8.0625, "learning_rate": 2.6131323188089793e-06, "loss": 1.0862540006637573, "step": 13554 }, { "epoch": 2.46746154546282, "grad_norm": 17.0, "learning_rate": 2.6123173182390605e-06, "loss": 1.3320529460906982, "step": 13556 }, { "epoch": 2.4678256120870117, "grad_norm": 6.28125, "learning_rate": 2.611502814797846e-06, "loss": 1.2859331369400024, "step": 13558 }, { "epoch": 2.4681896787112043, "grad_norm": 12.125, "learning_rate": 2.610688808604862e-06, "loss": 0.9958328008651733, "step": 13560 }, { "epoch": 2.4685537453353965, "grad_norm": 14.875, "learning_rate": 2.609875299779559e-06, "loss": 1.829673171043396, "step": 13562 }, { "epoch": 2.4689178119595887, "grad_norm": 15.75, "learning_rate": 2.609062288441317e-06, "loss": 1.5439484119415283, "step": 13564 }, { "epoch": 2.469281878583781, "grad_norm": 56.0, "learning_rate": 2.6082497747094416e-06, "loss": 1.1055101156234741, "step": 13566 }, { "epoch": 2.469645945207973, "grad_norm": 6.875, "learning_rate": 2.6074377587031663e-06, "loss": 1.164146900177002, "step": 13568 }, { "epoch": 2.4700100118321653, "grad_norm": 18.125, "learning_rate": 2.6066262405416514e-06, "loss": 1.1970126628875732, "step": 13570 }, { "epoch": 2.4703740784563575, "grad_norm": 34.75, "learning_rate": 2.6058152203439833e-06, "loss": 1.2932348251342773, "step": 13572 }, { "epoch": 2.4707381450805497, "grad_norm": 34.75, "learning_rate": 2.6050046982291766e-06, "loss": 1.0211294889450073, "step": 13574 }, { "epoch": 2.471102211704742, "grad_norm": 38.75, "learning_rate": 2.604194674316171e-06, "loss": 1.4936089515686035, "step": 13576 }, { "epoch": 2.471466278328934, "grad_norm": 15.375, "learning_rate": 2.6033851487238352e-06, "loss": 1.3037607669830322, "step": 13578 }, { "epoch": 2.4718303449531263, "grad_norm": 4.4375, "learning_rate": 2.6025761215709633e-06, "loss": 1.1959121227264404, "step": 13580 }, { "epoch": 2.472194411577319, "grad_norm": 3.59375, "learning_rate": 2.6017675929762775e-06, "loss": 1.0831255912780762, "step": 13582 }, { "epoch": 2.4725584782015106, "grad_norm": 4.40625, "learning_rate": 2.6009595630584255e-06, "loss": 1.3629963397979736, "step": 13584 }, { "epoch": 2.4729225448257033, "grad_norm": 18.75, "learning_rate": 2.6001520319359823e-06, "loss": 1.3672566413879395, "step": 13586 }, { "epoch": 2.4732866114498955, "grad_norm": 12.6875, "learning_rate": 2.5993449997274506e-06, "loss": 1.143486499786377, "step": 13588 }, { "epoch": 2.4736506780740877, "grad_norm": 4.71875, "learning_rate": 2.598538466551258e-06, "loss": 1.1877418756484985, "step": 13590 }, { "epoch": 2.47401474469828, "grad_norm": 2.265625, "learning_rate": 2.5977324325257606e-06, "loss": 0.8492780923843384, "step": 13592 }, { "epoch": 2.474378811322472, "grad_norm": 10.375, "learning_rate": 2.5969268977692407e-06, "loss": 0.29919368028640747, "step": 13594 }, { "epoch": 2.4747428779466643, "grad_norm": 8.0, "learning_rate": 2.596121862399907e-06, "loss": 0.8483954071998596, "step": 13596 }, { "epoch": 2.4751069445708564, "grad_norm": 7.28125, "learning_rate": 2.5953173265358956e-06, "loss": 1.230735182762146, "step": 13598 }, { "epoch": 2.4754710111950486, "grad_norm": 12.1875, "learning_rate": 2.594513290295268e-06, "loss": 1.4282658100128174, "step": 13600 }, { "epoch": 2.475835077819241, "grad_norm": 26.5, "learning_rate": 2.5937097537960136e-06, "loss": 1.5406758785247803, "step": 13602 }, { "epoch": 2.476199144443433, "grad_norm": 10.3125, "learning_rate": 2.592906717156049e-06, "loss": 1.286152720451355, "step": 13604 }, { "epoch": 2.4765632110676252, "grad_norm": 4.28125, "learning_rate": 2.5921041804932155e-06, "loss": 0.8500762581825256, "step": 13606 }, { "epoch": 2.4769272776918174, "grad_norm": 9.0, "learning_rate": 2.5913021439252826e-06, "loss": 1.3473135232925415, "step": 13608 }, { "epoch": 2.4772913443160096, "grad_norm": 62.75, "learning_rate": 2.5905006075699462e-06, "loss": 1.3492763042449951, "step": 13610 }, { "epoch": 2.4776554109402023, "grad_norm": 25.375, "learning_rate": 2.589699571544828e-06, "loss": 1.372023105621338, "step": 13612 }, { "epoch": 2.4780194775643944, "grad_norm": 25.125, "learning_rate": 2.5888990359674767e-06, "loss": 1.7664756774902344, "step": 13614 }, { "epoch": 2.4783835441885866, "grad_norm": 74.0, "learning_rate": 2.588099000955368e-06, "loss": 1.2682101726531982, "step": 13616 }, { "epoch": 2.478747610812779, "grad_norm": 7.28125, "learning_rate": 2.5872994666259037e-06, "loss": 1.0988736152648926, "step": 13618 }, { "epoch": 2.479111677436971, "grad_norm": 10.5625, "learning_rate": 2.5865004330964126e-06, "loss": 1.2914401292800903, "step": 13620 }, { "epoch": 2.4794757440611632, "grad_norm": 9.4375, "learning_rate": 2.585701900484149e-06, "loss": 1.4311060905456543, "step": 13622 }, { "epoch": 2.4798398106853554, "grad_norm": 5.34375, "learning_rate": 2.5849038689062944e-06, "loss": 0.9893988370895386, "step": 13624 }, { "epoch": 2.4802038773095476, "grad_norm": 19.375, "learning_rate": 2.5841063384799563e-06, "loss": 0.9233954548835754, "step": 13626 }, { "epoch": 2.48056794393374, "grad_norm": 8.625, "learning_rate": 2.5833093093221708e-06, "loss": 1.1942216157913208, "step": 13628 }, { "epoch": 2.480932010557932, "grad_norm": 5.9375, "learning_rate": 2.5825127815498967e-06, "loss": 1.435471773147583, "step": 13630 }, { "epoch": 2.481296077182124, "grad_norm": 6.46875, "learning_rate": 2.581716755280022e-06, "loss": 1.4254781007766724, "step": 13632 }, { "epoch": 2.4816601438063164, "grad_norm": 8.25, "learning_rate": 2.5809212306293606e-06, "loss": 1.3009958267211914, "step": 13634 }, { "epoch": 2.4820242104305086, "grad_norm": 12.5, "learning_rate": 2.5801262077146527e-06, "loss": 1.5462100505828857, "step": 13636 }, { "epoch": 2.482388277054701, "grad_norm": 18.375, "learning_rate": 2.5793316866525635e-06, "loss": 1.1356589794158936, "step": 13638 }, { "epoch": 2.4827523436788934, "grad_norm": 22.625, "learning_rate": 2.5785376675596875e-06, "loss": 1.202012062072754, "step": 13640 }, { "epoch": 2.4831164103030856, "grad_norm": 16.625, "learning_rate": 2.577744150552542e-06, "loss": 1.3646906614303589, "step": 13642 }, { "epoch": 2.483480476927278, "grad_norm": 9.0, "learning_rate": 2.5769511357475734e-06, "loss": 1.3165546655654907, "step": 13644 }, { "epoch": 2.48384454355147, "grad_norm": 21.0, "learning_rate": 2.5761586232611533e-06, "loss": 1.2491192817687988, "step": 13646 }, { "epoch": 2.484208610175662, "grad_norm": 10.0625, "learning_rate": 2.57536661320958e-06, "loss": 1.4154661893844604, "step": 13648 }, { "epoch": 2.4845726767998544, "grad_norm": 11.25, "learning_rate": 2.5745751057090764e-06, "loss": 1.2536170482635498, "step": 13650 }, { "epoch": 2.4849367434240466, "grad_norm": 14.625, "learning_rate": 2.5737841008757945e-06, "loss": 1.5065913200378418, "step": 13652 }, { "epoch": 2.4853008100482388, "grad_norm": 22.125, "learning_rate": 2.572993598825811e-06, "loss": 1.4974873065948486, "step": 13654 }, { "epoch": 2.485664876672431, "grad_norm": 15.4375, "learning_rate": 2.572203599675128e-06, "loss": 1.5454620122909546, "step": 13656 }, { "epoch": 2.486028943296623, "grad_norm": 19.75, "learning_rate": 2.5714141035396757e-06, "loss": 1.4531348943710327, "step": 13658 }, { "epoch": 2.4863930099208154, "grad_norm": 12.875, "learning_rate": 2.570625110535308e-06, "loss": 1.4217097759246826, "step": 13660 }, { "epoch": 2.4867570765450075, "grad_norm": 21.125, "learning_rate": 2.5698366207778073e-06, "loss": 1.3445327281951904, "step": 13662 }, { "epoch": 2.4871211431692, "grad_norm": 20.0, "learning_rate": 2.5690486343828817e-06, "loss": 1.351833701133728, "step": 13664 }, { "epoch": 2.4874852097933924, "grad_norm": 15.1875, "learning_rate": 2.568261151466165e-06, "loss": 1.2386531829833984, "step": 13666 }, { "epoch": 2.4878492764175846, "grad_norm": 14.125, "learning_rate": 2.5674741721432162e-06, "loss": 1.2128888368606567, "step": 13668 }, { "epoch": 2.4882133430417768, "grad_norm": 16.25, "learning_rate": 2.5666876965295216e-06, "loss": 0.8626888990402222, "step": 13670 }, { "epoch": 2.488577409665969, "grad_norm": 6.875, "learning_rate": 2.5659017247404938e-06, "loss": 1.3152241706848145, "step": 13672 }, { "epoch": 2.488941476290161, "grad_norm": 48.0, "learning_rate": 2.5651162568914707e-06, "loss": 1.4157578945159912, "step": 13674 }, { "epoch": 2.4893055429143534, "grad_norm": 21.125, "learning_rate": 2.5643312930977168e-06, "loss": 1.6922650337219238, "step": 13676 }, { "epoch": 2.4896696095385455, "grad_norm": 21.375, "learning_rate": 2.563546833474421e-06, "loss": 1.460087537765503, "step": 13678 }, { "epoch": 2.4900336761627377, "grad_norm": 150.0, "learning_rate": 2.562762878136702e-06, "loss": 1.1895815134048462, "step": 13680 }, { "epoch": 2.49039774278693, "grad_norm": 30.125, "learning_rate": 2.5619794271996e-06, "loss": 0.7901835441589355, "step": 13682 }, { "epoch": 2.490761809411122, "grad_norm": 8.4375, "learning_rate": 2.561196480778084e-06, "loss": 1.2408534288406372, "step": 13684 }, { "epoch": 2.4911258760353143, "grad_norm": 8.8125, "learning_rate": 2.5604140389870487e-06, "loss": 1.0823071002960205, "step": 13686 }, { "epoch": 2.4914899426595065, "grad_norm": 11.875, "learning_rate": 2.5596321019413135e-06, "loss": 1.4040659666061401, "step": 13688 }, { "epoch": 2.491854009283699, "grad_norm": 15.75, "learning_rate": 2.5588506697556244e-06, "loss": 1.3021875619888306, "step": 13690 }, { "epoch": 2.492218075907891, "grad_norm": 6.8125, "learning_rate": 2.558069742544654e-06, "loss": 1.1599243879318237, "step": 13692 }, { "epoch": 2.4925821425320835, "grad_norm": 9.5625, "learning_rate": 2.5572893204229996e-06, "loss": 1.4490139484405518, "step": 13694 }, { "epoch": 2.4929462091562757, "grad_norm": 8.375, "learning_rate": 2.5565094035051856e-06, "loss": 1.3817355632781982, "step": 13696 }, { "epoch": 2.493310275780468, "grad_norm": 12.8125, "learning_rate": 2.555729991905662e-06, "loss": 1.1381415128707886, "step": 13698 }, { "epoch": 2.49367434240466, "grad_norm": 11.5625, "learning_rate": 2.5549510857388033e-06, "loss": 1.4469672441482544, "step": 13700 }, { "epoch": 2.4940384090288523, "grad_norm": 15.375, "learning_rate": 2.5541726851189107e-06, "loss": 1.2025855779647827, "step": 13702 }, { "epoch": 2.4944024756530445, "grad_norm": 12.1875, "learning_rate": 2.5533947901602124e-06, "loss": 0.6385471820831299, "step": 13704 }, { "epoch": 2.4947665422772367, "grad_norm": 19.25, "learning_rate": 2.5526174009768606e-06, "loss": 1.451906681060791, "step": 13706 }, { "epoch": 2.495130608901429, "grad_norm": 8.9375, "learning_rate": 2.551840517682934e-06, "loss": 1.4848006963729858, "step": 13708 }, { "epoch": 2.495494675525621, "grad_norm": 7.40625, "learning_rate": 2.551064140392438e-06, "loss": 1.1459434032440186, "step": 13710 }, { "epoch": 2.4958587421498133, "grad_norm": 10.0625, "learning_rate": 2.550288269219301e-06, "loss": 1.2502771615982056, "step": 13712 }, { "epoch": 2.4962228087740055, "grad_norm": 9.25, "learning_rate": 2.549512904277381e-06, "loss": 1.2116224765777588, "step": 13714 }, { "epoch": 2.4965868753981977, "grad_norm": 10.9375, "learning_rate": 2.5487380456804585e-06, "loss": 1.691530466079712, "step": 13716 }, { "epoch": 2.49695094202239, "grad_norm": 15.3125, "learning_rate": 2.5479636935422403e-06, "loss": 1.3164284229278564, "step": 13718 }, { "epoch": 2.4973150086465825, "grad_norm": 10.375, "learning_rate": 2.5471898479763614e-06, "loss": 1.2102293968200684, "step": 13720 }, { "epoch": 2.4976790752707747, "grad_norm": 4.40625, "learning_rate": 2.5464165090963783e-06, "loss": 1.0726637840270996, "step": 13722 }, { "epoch": 2.498043141894967, "grad_norm": 7.53125, "learning_rate": 2.5456436770157766e-06, "loss": 1.4328835010528564, "step": 13724 }, { "epoch": 2.498407208519159, "grad_norm": 11.3125, "learning_rate": 2.5448713518479663e-06, "loss": 1.1862092018127441, "step": 13726 }, { "epoch": 2.4987712751433513, "grad_norm": 14.0, "learning_rate": 2.5440995337062817e-06, "loss": 1.467098593711853, "step": 13728 }, { "epoch": 2.4991353417675435, "grad_norm": 9.3125, "learning_rate": 2.5433282227039864e-06, "loss": 1.6445116996765137, "step": 13730 }, { "epoch": 2.4994994083917357, "grad_norm": 15.75, "learning_rate": 2.542557418954265e-06, "loss": 1.1949834823608398, "step": 13732 }, { "epoch": 2.499863475015928, "grad_norm": 28.0, "learning_rate": 2.5417871225702307e-06, "loss": 1.7359752655029297, "step": 13734 }, { "epoch": 2.50022754164012, "grad_norm": 17.75, "learning_rate": 2.5410173336649213e-06, "loss": 1.80339777469635, "step": 13736 }, { "epoch": 2.5005916082643123, "grad_norm": 6.9375, "learning_rate": 2.5402480523512994e-06, "loss": 1.3936741352081299, "step": 13738 }, { "epoch": 2.5009556748885045, "grad_norm": 16.125, "learning_rate": 2.5394792787422562e-06, "loss": 1.1109888553619385, "step": 13740 }, { "epoch": 2.501319741512697, "grad_norm": 26.125, "learning_rate": 2.538711012950603e-06, "loss": 1.4367766380310059, "step": 13742 }, { "epoch": 2.501683808136889, "grad_norm": 16.375, "learning_rate": 2.5379432550890826e-06, "loss": 1.556336760520935, "step": 13744 }, { "epoch": 2.5020478747610815, "grad_norm": 17.25, "learning_rate": 2.5371760052703586e-06, "loss": 1.6233019828796387, "step": 13746 }, { "epoch": 2.5024119413852732, "grad_norm": 11.25, "learning_rate": 2.536409263607022e-06, "loss": 1.3923618793487549, "step": 13748 }, { "epoch": 2.502776008009466, "grad_norm": 21.375, "learning_rate": 2.535643030211589e-06, "loss": 1.0545963048934937, "step": 13750 }, { "epoch": 2.503140074633658, "grad_norm": 15.3125, "learning_rate": 2.534877305196502e-06, "loss": 1.2005292177200317, "step": 13752 }, { "epoch": 2.5035041412578503, "grad_norm": 64.5, "learning_rate": 2.534112088674128e-06, "loss": 1.2051119804382324, "step": 13754 }, { "epoch": 2.5038682078820425, "grad_norm": 17.25, "learning_rate": 2.5333473807567577e-06, "loss": 1.2427839040756226, "step": 13756 }, { "epoch": 2.5042322745062346, "grad_norm": 4.5625, "learning_rate": 2.532583181556611e-06, "loss": 1.0875390768051147, "step": 13758 }, { "epoch": 2.504596341130427, "grad_norm": 23.375, "learning_rate": 2.5318194911858294e-06, "loss": 1.4024766683578491, "step": 13760 }, { "epoch": 2.504960407754619, "grad_norm": 10.75, "learning_rate": 2.5310563097564834e-06, "loss": 1.7428451776504517, "step": 13762 }, { "epoch": 2.5053244743788112, "grad_norm": 4.25, "learning_rate": 2.530293637380565e-06, "loss": 1.353786587715149, "step": 13764 }, { "epoch": 2.5056885410030034, "grad_norm": 4.25, "learning_rate": 2.5295314741699933e-06, "loss": 0.9711217880249023, "step": 13766 }, { "epoch": 2.5060526076271956, "grad_norm": 18.75, "learning_rate": 2.528769820236614e-06, "loss": 1.3038735389709473, "step": 13768 }, { "epoch": 2.506416674251388, "grad_norm": 25.5, "learning_rate": 2.528008675692195e-06, "loss": 1.3974357843399048, "step": 13770 }, { "epoch": 2.5067807408755804, "grad_norm": 13.4375, "learning_rate": 2.527248040648433e-06, "loss": 1.3673768043518066, "step": 13772 }, { "epoch": 2.507144807499772, "grad_norm": 5.1875, "learning_rate": 2.526487915216947e-06, "loss": 1.086998701095581, "step": 13774 }, { "epoch": 2.507508874123965, "grad_norm": 13.875, "learning_rate": 2.5257282995092824e-06, "loss": 1.2242774963378906, "step": 13776 }, { "epoch": 2.507872940748157, "grad_norm": 7.71875, "learning_rate": 2.52496919363691e-06, "loss": 1.471954584121704, "step": 13778 }, { "epoch": 2.5082370073723492, "grad_norm": 10.0625, "learning_rate": 2.5242105977112253e-06, "loss": 1.5057036876678467, "step": 13780 }, { "epoch": 2.5086010739965414, "grad_norm": 9.375, "learning_rate": 2.523452511843549e-06, "loss": 1.245390772819519, "step": 13782 }, { "epoch": 2.5089651406207336, "grad_norm": 10.4375, "learning_rate": 2.522694936145127e-06, "loss": 1.4163994789123535, "step": 13784 }, { "epoch": 2.509329207244926, "grad_norm": 9.8125, "learning_rate": 2.5219378707271315e-06, "loss": 1.2796659469604492, "step": 13786 }, { "epoch": 2.509693273869118, "grad_norm": 7.25, "learning_rate": 2.5211813157006582e-06, "loss": 1.1654201745986938, "step": 13788 }, { "epoch": 2.51005734049331, "grad_norm": 20.625, "learning_rate": 2.520425271176728e-06, "loss": 0.8108351230621338, "step": 13790 }, { "epoch": 2.5104214071175024, "grad_norm": 11.8125, "learning_rate": 2.519669737266288e-06, "loss": 1.2694549560546875, "step": 13792 }, { "epoch": 2.5107854737416946, "grad_norm": 18.25, "learning_rate": 2.5189147140802093e-06, "loss": 1.4437546730041504, "step": 13794 }, { "epoch": 2.511149540365887, "grad_norm": 18.875, "learning_rate": 2.518160201729289e-06, "loss": 1.7303444147109985, "step": 13796 }, { "epoch": 2.5115136069900794, "grad_norm": 8.1875, "learning_rate": 2.5174062003242483e-06, "loss": 1.0157277584075928, "step": 13798 }, { "epoch": 2.511877673614271, "grad_norm": 16.125, "learning_rate": 2.516652709975734e-06, "loss": 1.3200199604034424, "step": 13800 }, { "epoch": 2.512241740238464, "grad_norm": 11.75, "learning_rate": 2.515899730794318e-06, "loss": 0.8717083930969238, "step": 13802 }, { "epoch": 2.512605806862656, "grad_norm": 31.625, "learning_rate": 2.515147262890496e-06, "loss": 0.9612295627593994, "step": 13804 }, { "epoch": 2.512969873486848, "grad_norm": 10.1875, "learning_rate": 2.5143953063746907e-06, "loss": 1.7548043727874756, "step": 13806 }, { "epoch": 2.5133339401110404, "grad_norm": 47.0, "learning_rate": 2.5136438613572486e-06, "loss": 1.640061378479004, "step": 13808 }, { "epoch": 2.5136980067352326, "grad_norm": 19.25, "learning_rate": 2.5128929279484406e-06, "loss": 1.9909260272979736, "step": 13810 }, { "epoch": 2.5140620733594248, "grad_norm": 29.0, "learning_rate": 2.512142506258463e-06, "loss": 1.9370267391204834, "step": 13812 }, { "epoch": 2.514426139983617, "grad_norm": 13.5625, "learning_rate": 2.5113925963974376e-06, "loss": 1.7296578884124756, "step": 13814 }, { "epoch": 2.514790206607809, "grad_norm": 37.75, "learning_rate": 2.5106431984754107e-06, "loss": 1.53657066822052, "step": 13816 }, { "epoch": 2.5151542732320014, "grad_norm": 19.25, "learning_rate": 2.509894312602354e-06, "loss": 0.5026533007621765, "step": 13818 }, { "epoch": 2.5155183398561936, "grad_norm": 30.75, "learning_rate": 2.509145938888162e-06, "loss": 1.3250442743301392, "step": 13820 }, { "epoch": 2.5158824064803857, "grad_norm": 16.375, "learning_rate": 2.508398077442657e-06, "loss": 1.3676886558532715, "step": 13822 }, { "epoch": 2.5162464731045784, "grad_norm": 11.375, "learning_rate": 2.507650728375583e-06, "loss": 1.4402050971984863, "step": 13824 }, { "epoch": 2.51661053972877, "grad_norm": 23.125, "learning_rate": 2.506903891796612e-06, "loss": 1.4431074857711792, "step": 13826 }, { "epoch": 2.5169746063529628, "grad_norm": 47.5, "learning_rate": 2.5061575678153384e-06, "loss": 0.8885664939880371, "step": 13828 }, { "epoch": 2.517338672977155, "grad_norm": 14.5, "learning_rate": 2.505411756541282e-06, "loss": 1.5238444805145264, "step": 13830 }, { "epoch": 2.517702739601347, "grad_norm": 4.59375, "learning_rate": 2.5046664580838885e-06, "loss": 1.3137186765670776, "step": 13832 }, { "epoch": 2.5180668062255394, "grad_norm": 6.40625, "learning_rate": 2.5039216725525273e-06, "loss": 1.3961814641952515, "step": 13834 }, { "epoch": 2.5184308728497315, "grad_norm": 10.0, "learning_rate": 2.5031774000564914e-06, "loss": 0.9666022062301636, "step": 13836 }, { "epoch": 2.5187949394739237, "grad_norm": 19.5, "learning_rate": 2.5024336407050016e-06, "loss": 1.367753267288208, "step": 13838 }, { "epoch": 2.519159006098116, "grad_norm": 13.375, "learning_rate": 2.5016903946071996e-06, "loss": 1.3994977474212646, "step": 13840 }, { "epoch": 2.519523072722308, "grad_norm": 9.1875, "learning_rate": 2.500947661872155e-06, "loss": 0.9966473579406738, "step": 13842 }, { "epoch": 2.5198871393465003, "grad_norm": 17.125, "learning_rate": 2.500205442608861e-06, "loss": 1.293057918548584, "step": 13844 }, { "epoch": 2.5202512059706925, "grad_norm": 21.375, "learning_rate": 2.499463736926235e-06, "loss": 1.063389539718628, "step": 13846 }, { "epoch": 2.5206152725948847, "grad_norm": 35.25, "learning_rate": 2.4987225449331185e-06, "loss": 1.5489585399627686, "step": 13848 }, { "epoch": 2.5209793392190774, "grad_norm": 18.125, "learning_rate": 2.49798186673828e-06, "loss": 1.410095453262329, "step": 13850 }, { "epoch": 2.521343405843269, "grad_norm": 12.1875, "learning_rate": 2.4972417024504096e-06, "loss": 1.4894192218780518, "step": 13852 }, { "epoch": 2.5217074724674617, "grad_norm": 15.125, "learning_rate": 2.496502052178124e-06, "loss": 1.558536171913147, "step": 13854 }, { "epoch": 2.522071539091654, "grad_norm": 11.0625, "learning_rate": 2.495762916029964e-06, "loss": 1.6361110210418701, "step": 13856 }, { "epoch": 2.522435605715846, "grad_norm": 16.5, "learning_rate": 2.4950242941143944e-06, "loss": 1.4143916368484497, "step": 13858 }, { "epoch": 2.5227996723400383, "grad_norm": 19.75, "learning_rate": 2.494286186539805e-06, "loss": 1.42330002784729, "step": 13860 }, { "epoch": 2.5231637389642305, "grad_norm": 16.625, "learning_rate": 2.493548593414511e-06, "loss": 1.4504057168960571, "step": 13862 }, { "epoch": 2.5235278055884227, "grad_norm": 14.8125, "learning_rate": 2.4928115148467498e-06, "loss": 1.4021843671798706, "step": 13864 }, { "epoch": 2.523891872212615, "grad_norm": 11.625, "learning_rate": 2.4920749509446855e-06, "loss": 1.0065877437591553, "step": 13866 }, { "epoch": 2.524255938836807, "grad_norm": 22.75, "learning_rate": 2.491338901816406e-06, "loss": 1.8538141250610352, "step": 13868 }, { "epoch": 2.5246200054609993, "grad_norm": 70.5, "learning_rate": 2.4906033675699235e-06, "loss": 1.7632840871810913, "step": 13870 }, { "epoch": 2.5249840720851915, "grad_norm": 15.9375, "learning_rate": 2.489868348313174e-06, "loss": 1.097691535949707, "step": 13872 }, { "epoch": 2.5253481387093837, "grad_norm": 104.5, "learning_rate": 2.4891338441540194e-06, "loss": 1.4180630445480347, "step": 13874 }, { "epoch": 2.5257122053335763, "grad_norm": 21.875, "learning_rate": 2.488399855200245e-06, "loss": 1.52070152759552, "step": 13876 }, { "epoch": 2.526076271957768, "grad_norm": 6.78125, "learning_rate": 2.4876663815595604e-06, "loss": 1.246095895767212, "step": 13878 }, { "epoch": 2.5264403385819607, "grad_norm": 16.625, "learning_rate": 2.4869334233395997e-06, "loss": 0.9933915138244629, "step": 13880 }, { "epoch": 2.5268044052061525, "grad_norm": 108.5, "learning_rate": 2.486200980647922e-06, "loss": 0.8729953765869141, "step": 13882 }, { "epoch": 2.527168471830345, "grad_norm": 6.5, "learning_rate": 2.485469053592011e-06, "loss": 1.2897939682006836, "step": 13884 }, { "epoch": 2.5275325384545373, "grad_norm": 7.28125, "learning_rate": 2.4847376422792723e-06, "loss": 1.262458086013794, "step": 13886 }, { "epoch": 2.5278966050787295, "grad_norm": 34.5, "learning_rate": 2.4840067468170386e-06, "loss": 1.4337372779846191, "step": 13888 }, { "epoch": 2.5282606717029217, "grad_norm": 12.25, "learning_rate": 2.483276367312566e-06, "loss": 1.4279046058654785, "step": 13890 }, { "epoch": 2.528624738327114, "grad_norm": 18.5, "learning_rate": 2.4825465038730345e-06, "loss": 1.6506714820861816, "step": 13892 }, { "epoch": 2.528988804951306, "grad_norm": 23.375, "learning_rate": 2.4818171566055486e-06, "loss": 1.5873976945877075, "step": 13894 }, { "epoch": 2.5293528715754983, "grad_norm": 13.875, "learning_rate": 2.481088325617137e-06, "loss": 1.840796709060669, "step": 13896 }, { "epoch": 2.5297169381996905, "grad_norm": 12.25, "learning_rate": 2.4803600110147527e-06, "loss": 1.4852625131607056, "step": 13898 }, { "epoch": 2.5300810048238827, "grad_norm": 9.75, "learning_rate": 2.479632212905273e-06, "loss": 1.3497776985168457, "step": 13900 }, { "epoch": 2.530445071448075, "grad_norm": 10.4375, "learning_rate": 2.4789049313954986e-06, "loss": 1.345975637435913, "step": 13902 }, { "epoch": 2.530809138072267, "grad_norm": 14.0625, "learning_rate": 2.4781781665921565e-06, "loss": 1.3405169248580933, "step": 13904 }, { "epoch": 2.5311732046964597, "grad_norm": 10.5625, "learning_rate": 2.4774519186018955e-06, "loss": 1.3927009105682373, "step": 13906 }, { "epoch": 2.5315372713206514, "grad_norm": 14.6875, "learning_rate": 2.4767261875312897e-06, "loss": 1.4216716289520264, "step": 13908 }, { "epoch": 2.531901337944844, "grad_norm": 20.125, "learning_rate": 2.4760009734868374e-06, "loss": 1.4923580884933472, "step": 13910 }, { "epoch": 2.5322654045690363, "grad_norm": 9.625, "learning_rate": 2.475276276574961e-06, "loss": 1.4852474927902222, "step": 13912 }, { "epoch": 2.5326294711932285, "grad_norm": 5.21875, "learning_rate": 2.4745520969020065e-06, "loss": 0.798814058303833, "step": 13914 }, { "epoch": 2.5329935378174206, "grad_norm": 17.625, "learning_rate": 2.4738284345742442e-06, "loss": 0.3050364851951599, "step": 13916 }, { "epoch": 2.533357604441613, "grad_norm": 18.0, "learning_rate": 2.4731052896978684e-06, "loss": 0.5007586479187012, "step": 13918 }, { "epoch": 2.533721671065805, "grad_norm": 22.125, "learning_rate": 2.4723826623789988e-06, "loss": 0.6491674184799194, "step": 13920 }, { "epoch": 2.5340857376899972, "grad_norm": 5.0625, "learning_rate": 2.471660552723677e-06, "loss": 1.091306447982788, "step": 13922 }, { "epoch": 2.5344498043141894, "grad_norm": 10.1875, "learning_rate": 2.47093896083787e-06, "loss": 1.2955987453460693, "step": 13924 }, { "epoch": 2.5348138709383816, "grad_norm": 16.75, "learning_rate": 2.4702178868274686e-06, "loss": 1.3831698894500732, "step": 13926 }, { "epoch": 2.535177937562574, "grad_norm": 12.0, "learning_rate": 2.469497330798287e-06, "loss": 1.4622896909713745, "step": 13928 }, { "epoch": 2.535542004186766, "grad_norm": 27.375, "learning_rate": 2.468777292856064e-06, "loss": 1.553640365600586, "step": 13930 }, { "epoch": 2.5359060708109586, "grad_norm": 22.875, "learning_rate": 2.4680577731064637e-06, "loss": 1.3646631240844727, "step": 13932 }, { "epoch": 2.5362701374351504, "grad_norm": 9.3125, "learning_rate": 2.46733877165507e-06, "loss": 1.591979742050171, "step": 13934 }, { "epoch": 2.536634204059343, "grad_norm": 11.6875, "learning_rate": 2.466620288607396e-06, "loss": 1.199021816253662, "step": 13936 }, { "epoch": 2.5369982706835352, "grad_norm": 24.625, "learning_rate": 2.4659023240688747e-06, "loss": 1.5112149715423584, "step": 13938 }, { "epoch": 2.5373623373077274, "grad_norm": 6.8125, "learning_rate": 2.465184878144865e-06, "loss": 1.3368182182312012, "step": 13940 }, { "epoch": 2.5377264039319196, "grad_norm": 30.0, "learning_rate": 2.464467950940649e-06, "loss": 1.5485413074493408, "step": 13942 }, { "epoch": 2.538090470556112, "grad_norm": 40.0, "learning_rate": 2.4637515425614327e-06, "loss": 1.6032359600067139, "step": 13944 }, { "epoch": 2.538454537180304, "grad_norm": 9.0625, "learning_rate": 2.4630356531123467e-06, "loss": 1.4440189599990845, "step": 13946 }, { "epoch": 2.538818603804496, "grad_norm": 9.875, "learning_rate": 2.4623202826984445e-06, "loss": 1.2808880805969238, "step": 13948 }, { "epoch": 2.5391826704286884, "grad_norm": 17.625, "learning_rate": 2.4616054314247038e-06, "loss": 1.329640507698059, "step": 13950 }, { "epoch": 2.5395467370528806, "grad_norm": 24.125, "learning_rate": 2.4608910993960265e-06, "loss": 1.365883708000183, "step": 13952 }, { "epoch": 2.539910803677073, "grad_norm": 6.84375, "learning_rate": 2.460177286717237e-06, "loss": 1.0846781730651855, "step": 13954 }, { "epoch": 2.540274870301265, "grad_norm": 17.875, "learning_rate": 2.4594639934930855e-06, "loss": 1.2535054683685303, "step": 13956 }, { "epoch": 2.5406389369254576, "grad_norm": 20.875, "learning_rate": 2.4587512198282443e-06, "loss": 1.6685168743133545, "step": 13958 }, { "epoch": 2.5410030035496494, "grad_norm": 53.5, "learning_rate": 2.45803896582731e-06, "loss": 1.5431749820709229, "step": 13960 }, { "epoch": 2.541367070173842, "grad_norm": 45.75, "learning_rate": 2.4573272315948034e-06, "loss": 1.725101113319397, "step": 13962 }, { "epoch": 2.541731136798034, "grad_norm": 12.0, "learning_rate": 2.4566160172351684e-06, "loss": 1.432145595550537, "step": 13964 }, { "epoch": 2.5420952034222264, "grad_norm": 11.3125, "learning_rate": 2.4559053228527725e-06, "loss": 1.4086716175079346, "step": 13966 }, { "epoch": 2.5424592700464186, "grad_norm": 11.9375, "learning_rate": 2.4551951485519076e-06, "loss": 1.4589173793792725, "step": 13968 }, { "epoch": 2.542823336670611, "grad_norm": 16.75, "learning_rate": 2.4544854944367887e-06, "loss": 1.5004044771194458, "step": 13970 }, { "epoch": 2.543187403294803, "grad_norm": 23.0, "learning_rate": 2.453776360611555e-06, "loss": 1.9516510963439941, "step": 13972 }, { "epoch": 2.543551469918995, "grad_norm": 24.875, "learning_rate": 2.453067747180269e-06, "loss": 1.7078602313995361, "step": 13974 }, { "epoch": 2.5439155365431874, "grad_norm": 9.8125, "learning_rate": 2.4523596542469164e-06, "loss": 1.1005921363830566, "step": 13976 }, { "epoch": 2.5442796031673796, "grad_norm": 17.0, "learning_rate": 2.451652081915407e-06, "loss": 1.4509215354919434, "step": 13978 }, { "epoch": 2.5446436697915717, "grad_norm": 8.4375, "learning_rate": 2.4509450302895745e-06, "loss": 1.3434514999389648, "step": 13980 }, { "epoch": 2.545007736415764, "grad_norm": 20.375, "learning_rate": 2.4502384994731757e-06, "loss": 1.1130987405776978, "step": 13982 }, { "epoch": 2.5453718030399566, "grad_norm": 7.34375, "learning_rate": 2.4495324895698914e-06, "loss": 0.3280452489852905, "step": 13984 }, { "epoch": 2.5457358696641483, "grad_norm": 21.5, "learning_rate": 2.4488270006833255e-06, "loss": 1.099259853363037, "step": 13986 }, { "epoch": 2.546099936288341, "grad_norm": 9.8125, "learning_rate": 2.4481220329170057e-06, "loss": 1.2947473526000977, "step": 13988 }, { "epoch": 2.5464640029125327, "grad_norm": 10.6875, "learning_rate": 2.4474175863743823e-06, "loss": 1.319547176361084, "step": 13990 }, { "epoch": 2.5468280695367254, "grad_norm": 9.5625, "learning_rate": 2.4467136611588315e-06, "loss": 1.5339314937591553, "step": 13992 }, { "epoch": 2.5471921361609176, "grad_norm": 18.0, "learning_rate": 2.4460102573736506e-06, "loss": 1.2120921611785889, "step": 13994 }, { "epoch": 2.5475562027851097, "grad_norm": 9.3125, "learning_rate": 2.445307375122061e-06, "loss": 0.8374010324478149, "step": 13996 }, { "epoch": 2.547920269409302, "grad_norm": 9.5, "learning_rate": 2.4446050145072085e-06, "loss": 1.2683812379837036, "step": 13998 }, { "epoch": 2.548284336033494, "grad_norm": 7.5, "learning_rate": 2.4439031756321612e-06, "loss": 1.372105360031128, "step": 14000 }, { "epoch": 2.5486484026576863, "grad_norm": 5.9375, "learning_rate": 2.443201858599912e-06, "loss": 1.2243845462799072, "step": 14002 }, { "epoch": 2.5490124692818785, "grad_norm": 25.625, "learning_rate": 2.4425010635133744e-06, "loss": 1.2761088609695435, "step": 14004 }, { "epoch": 2.5493765359060707, "grad_norm": 16.25, "learning_rate": 2.4418007904753894e-06, "loss": 1.9006787538528442, "step": 14006 }, { "epoch": 2.549740602530263, "grad_norm": 8.6875, "learning_rate": 2.441101039588718e-06, "loss": 1.3298379182815552, "step": 14008 }, { "epoch": 2.550104669154455, "grad_norm": 11.5625, "learning_rate": 2.4404018109560456e-06, "loss": 1.4144887924194336, "step": 14010 }, { "epoch": 2.5504687357786473, "grad_norm": 5.9375, "learning_rate": 2.4397031046799823e-06, "loss": 1.3036308288574219, "step": 14012 }, { "epoch": 2.55083280240284, "grad_norm": 11.4375, "learning_rate": 2.4390049208630596e-06, "loss": 1.601028323173523, "step": 14014 }, { "epoch": 2.5511968690270317, "grad_norm": 93.5, "learning_rate": 2.4383072596077328e-06, "loss": 1.2932220697402954, "step": 14016 }, { "epoch": 2.5515609356512243, "grad_norm": 6.34375, "learning_rate": 2.437610121016382e-06, "loss": 1.045025110244751, "step": 14018 }, { "epoch": 2.5519250022754165, "grad_norm": 7.5, "learning_rate": 2.436913505191309e-06, "loss": 1.05776047706604, "step": 14020 }, { "epoch": 2.5522890688996087, "grad_norm": 13.875, "learning_rate": 2.436217412234739e-06, "loss": 1.5952341556549072, "step": 14022 }, { "epoch": 2.552653135523801, "grad_norm": 24.375, "learning_rate": 2.4355218422488202e-06, "loss": 1.5637341737747192, "step": 14024 }, { "epoch": 2.553017202147993, "grad_norm": 16.0, "learning_rate": 2.4348267953356265e-06, "loss": 1.3550633192062378, "step": 14026 }, { "epoch": 2.5533812687721853, "grad_norm": 8.375, "learning_rate": 2.4341322715971514e-06, "loss": 1.3722586631774902, "step": 14028 }, { "epoch": 2.5537453353963775, "grad_norm": 13.1875, "learning_rate": 2.4334382711353147e-06, "loss": 1.1840168237686157, "step": 14030 }, { "epoch": 2.5541094020205697, "grad_norm": 11.5, "learning_rate": 2.432744794051958e-06, "loss": 0.8268307447433472, "step": 14032 }, { "epoch": 2.554473468644762, "grad_norm": 7.0, "learning_rate": 2.4320518404488455e-06, "loss": 1.0226351022720337, "step": 14034 }, { "epoch": 2.554837535268954, "grad_norm": 16.25, "learning_rate": 2.431359410427666e-06, "loss": 1.4597655534744263, "step": 14036 }, { "epoch": 2.5552016018931463, "grad_norm": 13.875, "learning_rate": 2.430667504090031e-06, "loss": 1.644943118095398, "step": 14038 }, { "epoch": 2.555565668517339, "grad_norm": 17.125, "learning_rate": 2.429976121537474e-06, "loss": 1.3708240985870361, "step": 14040 }, { "epoch": 2.5559297351415307, "grad_norm": 8.5, "learning_rate": 2.4292852628714524e-06, "loss": 1.067210078239441, "step": 14042 }, { "epoch": 2.5562938017657233, "grad_norm": 9.125, "learning_rate": 2.4285949281933486e-06, "loss": 0.957159161567688, "step": 14044 }, { "epoch": 2.5566578683899155, "grad_norm": 58.25, "learning_rate": 2.427905117604465e-06, "loss": 0.7425721883773804, "step": 14046 }, { "epoch": 2.5570219350141077, "grad_norm": 6.59375, "learning_rate": 2.4272158312060295e-06, "loss": 0.9386372566223145, "step": 14048 }, { "epoch": 2.5573860016383, "grad_norm": 26.625, "learning_rate": 2.426527069099191e-06, "loss": 0.9818754196166992, "step": 14050 }, { "epoch": 2.557750068262492, "grad_norm": 27.875, "learning_rate": 2.4258388313850236e-06, "loss": 1.4963124990463257, "step": 14052 }, { "epoch": 2.5581141348866843, "grad_norm": 13.3125, "learning_rate": 2.4251511181645226e-06, "loss": 1.4882919788360596, "step": 14054 }, { "epoch": 2.5584782015108765, "grad_norm": 15.25, "learning_rate": 2.4244639295386072e-06, "loss": 1.495132327079773, "step": 14056 }, { "epoch": 2.5588422681350687, "grad_norm": 21.625, "learning_rate": 2.42377726560812e-06, "loss": 1.30695641040802, "step": 14058 }, { "epoch": 2.559206334759261, "grad_norm": 14.875, "learning_rate": 2.423091126473826e-06, "loss": 1.5260798931121826, "step": 14060 }, { "epoch": 2.559570401383453, "grad_norm": 8.4375, "learning_rate": 2.4224055122364132e-06, "loss": 1.3462334871292114, "step": 14062 }, { "epoch": 2.5599344680076452, "grad_norm": 42.5, "learning_rate": 2.4217204229964926e-06, "loss": 2.029184579849243, "step": 14064 }, { "epoch": 2.560298534631838, "grad_norm": 8.875, "learning_rate": 2.4210358588545987e-06, "loss": 1.4047307968139648, "step": 14066 }, { "epoch": 2.5606626012560296, "grad_norm": 31.0, "learning_rate": 2.4203518199111876e-06, "loss": 1.3947217464447021, "step": 14068 }, { "epoch": 2.5610266678802223, "grad_norm": 10.1875, "learning_rate": 2.4196683062666404e-06, "loss": 1.3592097759246826, "step": 14070 }, { "epoch": 2.5613907345044145, "grad_norm": 12.3125, "learning_rate": 2.4189853180212596e-06, "loss": 1.3318440914154053, "step": 14072 }, { "epoch": 2.5617548011286067, "grad_norm": 17.5, "learning_rate": 2.418302855275271e-06, "loss": 0.7932029962539673, "step": 14074 }, { "epoch": 2.562118867752799, "grad_norm": 14.4375, "learning_rate": 2.417620918128822e-06, "loss": 0.417538046836853, "step": 14076 }, { "epoch": 2.562482934376991, "grad_norm": 16.5, "learning_rate": 2.4169395066819857e-06, "loss": 1.5427486896514893, "step": 14078 }, { "epoch": 2.5628470010011832, "grad_norm": 165.0, "learning_rate": 2.4162586210347565e-06, "loss": 1.2161442041397095, "step": 14080 }, { "epoch": 2.5632110676253754, "grad_norm": 3.375, "learning_rate": 2.41557826128705e-06, "loss": 1.4107484817504883, "step": 14082 }, { "epoch": 2.5635751342495676, "grad_norm": 10.0625, "learning_rate": 2.4148984275387077e-06, "loss": 1.0203148126602173, "step": 14084 }, { "epoch": 2.56393920087376, "grad_norm": 9.0, "learning_rate": 2.4142191198894927e-06, "loss": 1.4793447256088257, "step": 14086 }, { "epoch": 2.564303267497952, "grad_norm": 6.28125, "learning_rate": 2.4135403384390886e-06, "loss": 1.3543294668197632, "step": 14088 }, { "epoch": 2.564667334122144, "grad_norm": 5.5, "learning_rate": 2.4128620832871065e-06, "loss": 1.1586647033691406, "step": 14090 }, { "epoch": 2.565031400746337, "grad_norm": 6.40625, "learning_rate": 2.4121843545330757e-06, "loss": 1.2547489404678345, "step": 14092 }, { "epoch": 2.5653954673705286, "grad_norm": 21.375, "learning_rate": 2.4115071522764506e-06, "loss": 1.4677958488464355, "step": 14094 }, { "epoch": 2.5657595339947212, "grad_norm": 10.125, "learning_rate": 2.410830476616608e-06, "loss": 1.2225617170333862, "step": 14096 }, { "epoch": 2.5661236006189134, "grad_norm": 36.25, "learning_rate": 2.410154327652848e-06, "loss": 1.591138482093811, "step": 14098 }, { "epoch": 2.5664876672431056, "grad_norm": 37.5, "learning_rate": 2.409478705484391e-06, "loss": 1.7120736837387085, "step": 14100 }, { "epoch": 2.566851733867298, "grad_norm": 6.0625, "learning_rate": 2.408803610210384e-06, "loss": 1.0847187042236328, "step": 14102 }, { "epoch": 2.56721580049149, "grad_norm": 10.4375, "learning_rate": 2.4081290419298923e-06, "loss": 1.3927597999572754, "step": 14104 }, { "epoch": 2.567579867115682, "grad_norm": 9.0625, "learning_rate": 2.4074550007419077e-06, "loss": 1.3700611591339111, "step": 14106 }, { "epoch": 2.5679439337398744, "grad_norm": 3.84375, "learning_rate": 2.406781486745342e-06, "loss": 0.9951527118682861, "step": 14108 }, { "epoch": 2.5683080003640666, "grad_norm": 4.3125, "learning_rate": 2.4061085000390318e-06, "loss": 1.1989988088607788, "step": 14110 }, { "epoch": 2.568672066988259, "grad_norm": 17.125, "learning_rate": 2.4054360407217336e-06, "loss": 1.2150812149047852, "step": 14112 }, { "epoch": 2.569036133612451, "grad_norm": 4.59375, "learning_rate": 2.4047641088921295e-06, "loss": 1.2451732158660889, "step": 14114 }, { "epoch": 2.569400200236643, "grad_norm": 14.1875, "learning_rate": 2.4040927046488224e-06, "loss": 1.0359206199645996, "step": 14116 }, { "epoch": 2.5697642668608354, "grad_norm": 14.0625, "learning_rate": 2.4034218280903375e-06, "loss": 1.558552622795105, "step": 14118 }, { "epoch": 2.5701283334850276, "grad_norm": 8.4375, "learning_rate": 2.4027514793151237e-06, "loss": 0.9711041450500488, "step": 14120 }, { "epoch": 2.57049240010922, "grad_norm": 7.65625, "learning_rate": 2.402081658421552e-06, "loss": 1.195290207862854, "step": 14122 }, { "epoch": 2.570856466733412, "grad_norm": 8.8125, "learning_rate": 2.401412365507916e-06, "loss": 1.167384386062622, "step": 14124 }, { "epoch": 2.5712205333576046, "grad_norm": 21.0, "learning_rate": 2.400743600672431e-06, "loss": 1.945046067237854, "step": 14126 }, { "epoch": 2.571584599981797, "grad_norm": 15.4375, "learning_rate": 2.4000753640132367e-06, "loss": 1.9064695835113525, "step": 14128 }, { "epoch": 2.571948666605989, "grad_norm": 14.125, "learning_rate": 2.399407655628393e-06, "loss": 1.0812196731567383, "step": 14130 }, { "epoch": 2.572312733230181, "grad_norm": 100.0, "learning_rate": 2.3987404756158844e-06, "loss": 1.1352931261062622, "step": 14132 }, { "epoch": 2.5726767998543734, "grad_norm": 16.75, "learning_rate": 2.3980738240736164e-06, "loss": 1.2162847518920898, "step": 14134 }, { "epoch": 2.5730408664785656, "grad_norm": 15.125, "learning_rate": 2.3974077010994175e-06, "loss": 1.5836262702941895, "step": 14136 }, { "epoch": 2.5734049331027578, "grad_norm": 7.78125, "learning_rate": 2.396742106791038e-06, "loss": 1.4147813320159912, "step": 14138 }, { "epoch": 2.57376899972695, "grad_norm": 3.421875, "learning_rate": 2.396077041246152e-06, "loss": 1.1749075651168823, "step": 14140 }, { "epoch": 2.574133066351142, "grad_norm": 25.5, "learning_rate": 2.3954125045623537e-06, "loss": 0.823738157749176, "step": 14142 }, { "epoch": 2.5744971329753343, "grad_norm": 27.125, "learning_rate": 2.3947484968371636e-06, "loss": 0.9977008104324341, "step": 14144 }, { "epoch": 2.5748611995995265, "grad_norm": 8.9375, "learning_rate": 2.3940850181680197e-06, "loss": 1.2254635095596313, "step": 14146 }, { "epoch": 2.575225266223719, "grad_norm": 31.0, "learning_rate": 2.3934220686522868e-06, "loss": 1.5278208255767822, "step": 14148 }, { "epoch": 2.575589332847911, "grad_norm": 17.0, "learning_rate": 2.392759648387249e-06, "loss": 1.3942779302597046, "step": 14150 }, { "epoch": 2.5759533994721036, "grad_norm": 32.25, "learning_rate": 2.392097757470113e-06, "loss": 0.5594972968101501, "step": 14152 }, { "epoch": 2.5763174660962957, "grad_norm": 11.375, "learning_rate": 2.3914363959980107e-06, "loss": 1.4911736249923706, "step": 14154 }, { "epoch": 2.576681532720488, "grad_norm": 7.8125, "learning_rate": 2.390775564067993e-06, "loss": 1.3684496879577637, "step": 14156 }, { "epoch": 2.57704559934468, "grad_norm": 2.796875, "learning_rate": 2.3901152617770333e-06, "loss": 1.2593903541564941, "step": 14158 }, { "epoch": 2.5774096659688723, "grad_norm": 9.125, "learning_rate": 2.38945548922203e-06, "loss": 1.3786157369613647, "step": 14160 }, { "epoch": 2.5777737325930645, "grad_norm": 21.75, "learning_rate": 2.3887962464998016e-06, "loss": 1.3256924152374268, "step": 14162 }, { "epoch": 2.5781377992172567, "grad_norm": 16.625, "learning_rate": 2.388137533707089e-06, "loss": 1.7599164247512817, "step": 14164 }, { "epoch": 2.578501865841449, "grad_norm": 33.5, "learning_rate": 2.3874793509405554e-06, "loss": 1.1446453332901, "step": 14166 }, { "epoch": 2.578865932465641, "grad_norm": 11.5625, "learning_rate": 2.3868216982967875e-06, "loss": 1.5041176080703735, "step": 14168 }, { "epoch": 2.5792299990898333, "grad_norm": 6.5625, "learning_rate": 2.3861645758722915e-06, "loss": 1.199200987815857, "step": 14170 }, { "epoch": 2.5795940657140255, "grad_norm": 8.5625, "learning_rate": 2.385507983763499e-06, "loss": 1.264521598815918, "step": 14172 }, { "epoch": 2.579958132338218, "grad_norm": 5.84375, "learning_rate": 2.384851922066761e-06, "loss": 1.0853348970413208, "step": 14174 }, { "epoch": 2.58032219896241, "grad_norm": 14.0, "learning_rate": 2.384196390878354e-06, "loss": 1.4397194385528564, "step": 14176 }, { "epoch": 2.5806862655866025, "grad_norm": 3.734375, "learning_rate": 2.3835413902944716e-06, "loss": 1.045330286026001, "step": 14178 }, { "epoch": 2.5810503322107947, "grad_norm": 20.0, "learning_rate": 2.382886920411234e-06, "loss": 1.0101420879364014, "step": 14180 }, { "epoch": 2.581414398834987, "grad_norm": 14.625, "learning_rate": 2.382232981324683e-06, "loss": 0.5374385714530945, "step": 14182 }, { "epoch": 2.581778465459179, "grad_norm": 13.375, "learning_rate": 2.3815795731307795e-06, "loss": 1.5702306032180786, "step": 14184 }, { "epoch": 2.5821425320833713, "grad_norm": 6.65625, "learning_rate": 2.38092669592541e-06, "loss": 1.2941797971725464, "step": 14186 }, { "epoch": 2.5825065987075635, "grad_norm": 28.75, "learning_rate": 2.380274349804381e-06, "loss": 1.126169204711914, "step": 14188 }, { "epoch": 2.5828706653317557, "grad_norm": 11.4375, "learning_rate": 2.379622534863421e-06, "loss": 1.514399766921997, "step": 14190 }, { "epoch": 2.583234731955948, "grad_norm": 7.71875, "learning_rate": 2.378971251198183e-06, "loss": 1.7874150276184082, "step": 14192 }, { "epoch": 2.58359879858014, "grad_norm": 3.875, "learning_rate": 2.3783204989042384e-06, "loss": 1.0225834846496582, "step": 14194 }, { "epoch": 2.5839628652043323, "grad_norm": 15.5625, "learning_rate": 2.3776702780770835e-06, "loss": 1.6737661361694336, "step": 14196 }, { "epoch": 2.5843269318285245, "grad_norm": 10.375, "learning_rate": 2.377020588812135e-06, "loss": 1.4150168895721436, "step": 14198 }, { "epoch": 2.584690998452717, "grad_norm": 13.6875, "learning_rate": 2.376371431204733e-06, "loss": 1.61765456199646, "step": 14200 }, { "epoch": 2.585055065076909, "grad_norm": 21.25, "learning_rate": 2.3757228053501376e-06, "loss": 1.3891081809997559, "step": 14202 }, { "epoch": 2.5854191317011015, "grad_norm": 9.1875, "learning_rate": 2.375074711343533e-06, "loss": 1.3046094179153442, "step": 14204 }, { "epoch": 2.5857831983252937, "grad_norm": 9.75, "learning_rate": 2.374427149280024e-06, "loss": 1.4639339447021484, "step": 14206 }, { "epoch": 2.586147264949486, "grad_norm": 25.0, "learning_rate": 2.373780119254637e-06, "loss": 1.2532970905303955, "step": 14208 }, { "epoch": 2.586511331573678, "grad_norm": 10.875, "learning_rate": 2.3731336213623222e-06, "loss": 1.312552571296692, "step": 14210 }, { "epoch": 2.5868753981978703, "grad_norm": 21.625, "learning_rate": 2.37248765569795e-06, "loss": 1.4278985261917114, "step": 14212 }, { "epoch": 2.5872394648220625, "grad_norm": 20.625, "learning_rate": 2.3718422223563137e-06, "loss": 1.3898577690124512, "step": 14214 }, { "epoch": 2.5876035314462547, "grad_norm": 18.0, "learning_rate": 2.371197321432127e-06, "loss": 1.5639362335205078, "step": 14216 }, { "epoch": 2.587967598070447, "grad_norm": 15.75, "learning_rate": 2.370552953020028e-06, "loss": 1.7671689987182617, "step": 14218 }, { "epoch": 2.588331664694639, "grad_norm": 17.125, "learning_rate": 2.3699091172145732e-06, "loss": 1.5203633308410645, "step": 14220 }, { "epoch": 2.5886957313188312, "grad_norm": 9.0, "learning_rate": 2.369265814110244e-06, "loss": 1.2983410358428955, "step": 14222 }, { "epoch": 2.5890597979430234, "grad_norm": 6.53125, "learning_rate": 2.3686230438014434e-06, "loss": 1.10750150680542, "step": 14224 }, { "epoch": 2.589423864567216, "grad_norm": 8.5625, "learning_rate": 2.3679808063824943e-06, "loss": 1.3014956712722778, "step": 14226 }, { "epoch": 2.589787931191408, "grad_norm": 64.5, "learning_rate": 2.3673391019476423e-06, "loss": 1.2168763875961304, "step": 14228 }, { "epoch": 2.5901519978156005, "grad_norm": 32.0, "learning_rate": 2.366697930591055e-06, "loss": 1.2090435028076172, "step": 14230 }, { "epoch": 2.590516064439792, "grad_norm": 13.25, "learning_rate": 2.3660572924068225e-06, "loss": 0.7218501567840576, "step": 14232 }, { "epoch": 2.590880131063985, "grad_norm": 12.9375, "learning_rate": 2.365417187488954e-06, "loss": 0.9979938268661499, "step": 14234 }, { "epoch": 2.591244197688177, "grad_norm": 9.75, "learning_rate": 2.364777615931385e-06, "loss": 1.5098721981048584, "step": 14236 }, { "epoch": 2.5916082643123692, "grad_norm": 12.25, "learning_rate": 2.3641385778279675e-06, "loss": 1.4654102325439453, "step": 14238 }, { "epoch": 2.5919723309365614, "grad_norm": 20.375, "learning_rate": 2.3635000732724795e-06, "loss": 1.600963830947876, "step": 14240 }, { "epoch": 2.5923363975607536, "grad_norm": 11.875, "learning_rate": 2.3628621023586183e-06, "loss": 1.5925166606903076, "step": 14242 }, { "epoch": 2.592700464184946, "grad_norm": 18.625, "learning_rate": 2.3622246651800034e-06, "loss": 1.7492650747299194, "step": 14244 }, { "epoch": 2.593064530809138, "grad_norm": 11.5, "learning_rate": 2.3615877618301765e-06, "loss": 1.6882426738739014, "step": 14246 }, { "epoch": 2.59342859743333, "grad_norm": 10.0625, "learning_rate": 2.3609513924026e-06, "loss": 1.4019925594329834, "step": 14248 }, { "epoch": 2.5937926640575224, "grad_norm": 10.1875, "learning_rate": 2.360315556990659e-06, "loss": 1.3696213960647583, "step": 14250 }, { "epoch": 2.5941567306817146, "grad_norm": 10.5625, "learning_rate": 2.3596802556876596e-06, "loss": 1.0603593587875366, "step": 14252 }, { "epoch": 2.594520797305907, "grad_norm": 16.75, "learning_rate": 2.35904548858683e-06, "loss": 0.5949654579162598, "step": 14254 }, { "epoch": 2.5948848639300994, "grad_norm": 6.59375, "learning_rate": 2.358411255781319e-06, "loss": 1.5676074028015137, "step": 14256 }, { "epoch": 2.595248930554291, "grad_norm": 7.96875, "learning_rate": 2.3577775573641987e-06, "loss": 1.106121301651001, "step": 14258 }, { "epoch": 2.595612997178484, "grad_norm": 15.0, "learning_rate": 2.357144393428461e-06, "loss": 1.4317102432250977, "step": 14260 }, { "epoch": 2.595977063802676, "grad_norm": 12.875, "learning_rate": 2.35651176406702e-06, "loss": 1.816224455833435, "step": 14262 }, { "epoch": 2.596341130426868, "grad_norm": 6.34375, "learning_rate": 2.355879669372712e-06, "loss": 0.8903669714927673, "step": 14264 }, { "epoch": 2.5967051970510604, "grad_norm": 6.25, "learning_rate": 2.355248109438295e-06, "loss": 1.159472942352295, "step": 14266 }, { "epoch": 2.5970692636752526, "grad_norm": 12.8125, "learning_rate": 2.354617084356446e-06, "loss": 1.5132120847702026, "step": 14268 }, { "epoch": 2.597433330299445, "grad_norm": 15.25, "learning_rate": 2.353986594219767e-06, "loss": 1.4413330554962158, "step": 14270 }, { "epoch": 2.597797396923637, "grad_norm": 26.875, "learning_rate": 2.353356639120779e-06, "loss": 0.8578499555587769, "step": 14272 }, { "epoch": 2.598161463547829, "grad_norm": 16.375, "learning_rate": 2.3527272191519256e-06, "loss": 1.7673108577728271, "step": 14274 }, { "epoch": 2.5985255301720214, "grad_norm": 11.8125, "learning_rate": 2.352098334405572e-06, "loss": 1.3355708122253418, "step": 14276 }, { "epoch": 2.5988895967962136, "grad_norm": 26.75, "learning_rate": 2.3514699849740043e-06, "loss": 1.2164437770843506, "step": 14278 }, { "epoch": 2.5992536634204058, "grad_norm": 52.25, "learning_rate": 2.35084217094943e-06, "loss": 1.2727967500686646, "step": 14280 }, { "epoch": 2.5996177300445984, "grad_norm": 10.9375, "learning_rate": 2.350214892423978e-06, "loss": 1.511594295501709, "step": 14282 }, { "epoch": 2.59998179666879, "grad_norm": 28.75, "learning_rate": 2.3495881494896994e-06, "loss": 1.2684283256530762, "step": 14284 }, { "epoch": 2.600345863292983, "grad_norm": 59.0, "learning_rate": 2.348961942238566e-06, "loss": 1.7031049728393555, "step": 14286 }, { "epoch": 2.600709929917175, "grad_norm": 10.0625, "learning_rate": 2.3483362707624716e-06, "loss": 1.4205446243286133, "step": 14288 }, { "epoch": 2.601073996541367, "grad_norm": 7.71875, "learning_rate": 2.34771113515323e-06, "loss": 1.084086298942566, "step": 14290 }, { "epoch": 2.6014380631655594, "grad_norm": 7.6875, "learning_rate": 2.347086535502578e-06, "loss": 1.4121547937393188, "step": 14292 }, { "epoch": 2.6018021297897516, "grad_norm": 9.3125, "learning_rate": 2.3464624719021733e-06, "loss": 1.0999360084533691, "step": 14294 }, { "epoch": 2.6021661964139438, "grad_norm": 13.5, "learning_rate": 2.3458389444435944e-06, "loss": 1.4395822286605835, "step": 14296 }, { "epoch": 2.602530263038136, "grad_norm": 12.6875, "learning_rate": 2.345215953218341e-06, "loss": 1.4034985303878784, "step": 14298 }, { "epoch": 2.602894329662328, "grad_norm": 19.125, "learning_rate": 2.344593498317835e-06, "loss": 1.598644733428955, "step": 14300 }, { "epoch": 2.6032583962865203, "grad_norm": 13.4375, "learning_rate": 2.3439715798334193e-06, "loss": 1.7083077430725098, "step": 14302 }, { "epoch": 2.6036224629107125, "grad_norm": 2.734375, "learning_rate": 2.3433501978563575e-06, "loss": 0.9617128968238831, "step": 14304 }, { "epoch": 2.6039865295349047, "grad_norm": 14.5625, "learning_rate": 2.3427293524778348e-06, "loss": 0.9819358587265015, "step": 14306 }, { "epoch": 2.6043505961590974, "grad_norm": 7.625, "learning_rate": 2.342109043788959e-06, "loss": 1.1405773162841797, "step": 14308 }, { "epoch": 2.604714662783289, "grad_norm": 23.75, "learning_rate": 2.341489271880756e-06, "loss": 1.4422760009765625, "step": 14310 }, { "epoch": 2.6050787294074818, "grad_norm": 26.0, "learning_rate": 2.340870036844176e-06, "loss": 1.445676565170288, "step": 14312 }, { "epoch": 2.605442796031674, "grad_norm": 27.625, "learning_rate": 2.3402513387700886e-06, "loss": 1.2540534734725952, "step": 14314 }, { "epoch": 2.605806862655866, "grad_norm": 17.5, "learning_rate": 2.3396331777492853e-06, "loss": 1.7215461730957031, "step": 14316 }, { "epoch": 2.6061709292800583, "grad_norm": 14.75, "learning_rate": 2.3390155538724795e-06, "loss": 1.525050401687622, "step": 14318 }, { "epoch": 2.6065349959042505, "grad_norm": 167.0, "learning_rate": 2.338398467230305e-06, "loss": 1.8181389570236206, "step": 14320 }, { "epoch": 2.6068990625284427, "grad_norm": 10.3125, "learning_rate": 2.3377819179133156e-06, "loss": 1.5395398139953613, "step": 14322 }, { "epoch": 2.607263129152635, "grad_norm": 21.375, "learning_rate": 2.337165906011988e-06, "loss": 1.333439588546753, "step": 14324 }, { "epoch": 2.607627195776827, "grad_norm": 11.5, "learning_rate": 2.3365504316167197e-06, "loss": 1.4189414978027344, "step": 14326 }, { "epoch": 2.6079912624010193, "grad_norm": 23.25, "learning_rate": 2.335935494817829e-06, "loss": 1.6621311902999878, "step": 14328 }, { "epoch": 2.6083553290252115, "grad_norm": 33.25, "learning_rate": 2.3353210957055554e-06, "loss": 1.5448209047317505, "step": 14330 }, { "epoch": 2.6087193956494037, "grad_norm": 18.25, "learning_rate": 2.334707234370059e-06, "loss": 0.9667041301727295, "step": 14332 }, { "epoch": 2.6090834622735963, "grad_norm": 9.3125, "learning_rate": 2.3340939109014217e-06, "loss": 0.906665563583374, "step": 14334 }, { "epoch": 2.609447528897788, "grad_norm": 9.0, "learning_rate": 2.333481125389647e-06, "loss": 0.5902478694915771, "step": 14336 }, { "epoch": 2.6098115955219807, "grad_norm": 34.5, "learning_rate": 2.332868877924658e-06, "loss": 1.4425134658813477, "step": 14338 }, { "epoch": 2.610175662146173, "grad_norm": 12.375, "learning_rate": 2.332257168596299e-06, "loss": 1.6120284795761108, "step": 14340 }, { "epoch": 2.610539728770365, "grad_norm": 12.5625, "learning_rate": 2.3316459974943366e-06, "loss": 1.3588635921478271, "step": 14342 }, { "epoch": 2.6109037953945573, "grad_norm": 7.84375, "learning_rate": 2.331035364708458e-06, "loss": 1.2055678367614746, "step": 14344 }, { "epoch": 2.6112678620187495, "grad_norm": 16.875, "learning_rate": 2.33042527032827e-06, "loss": 1.2236871719360352, "step": 14346 }, { "epoch": 2.6116319286429417, "grad_norm": 22.75, "learning_rate": 2.3298157144433025e-06, "loss": 1.3296234607696533, "step": 14348 }, { "epoch": 2.611995995267134, "grad_norm": 11.6875, "learning_rate": 2.3292066971430047e-06, "loss": 1.363838791847229, "step": 14350 }, { "epoch": 2.612360061891326, "grad_norm": 12.6875, "learning_rate": 2.328598218516748e-06, "loss": 1.4381417036056519, "step": 14352 }, { "epoch": 2.6127241285155183, "grad_norm": 16.75, "learning_rate": 2.3279902786538235e-06, "loss": 1.3195042610168457, "step": 14354 }, { "epoch": 2.6130881951397105, "grad_norm": 5.71875, "learning_rate": 2.3273828776434447e-06, "loss": 1.3307335376739502, "step": 14356 }, { "epoch": 2.6134522617639027, "grad_norm": 13.25, "learning_rate": 2.3267760155747443e-06, "loss": 1.5191245079040527, "step": 14358 }, { "epoch": 2.613816328388095, "grad_norm": 15.8125, "learning_rate": 2.326169692536777e-06, "loss": 1.4936727285385132, "step": 14360 }, { "epoch": 2.614180395012287, "grad_norm": 17.25, "learning_rate": 2.3255639086185193e-06, "loss": 1.4101324081420898, "step": 14362 }, { "epoch": 2.6145444616364797, "grad_norm": 17.0, "learning_rate": 2.324958663908867e-06, "loss": 1.464810848236084, "step": 14364 }, { "epoch": 2.6149085282606714, "grad_norm": 5.9375, "learning_rate": 2.3243539584966364e-06, "loss": 0.9426772594451904, "step": 14366 }, { "epoch": 2.615272594884864, "grad_norm": 13.5625, "learning_rate": 2.3237497924705667e-06, "loss": 1.0723059177398682, "step": 14368 }, { "epoch": 2.6156366615090563, "grad_norm": 21.625, "learning_rate": 2.3231461659193165e-06, "loss": 1.020393967628479, "step": 14370 }, { "epoch": 2.6160007281332485, "grad_norm": 16.75, "learning_rate": 2.322543078931465e-06, "loss": 1.4742343425750732, "step": 14372 }, { "epoch": 2.6163647947574407, "grad_norm": 34.0, "learning_rate": 2.3219405315955136e-06, "loss": 1.6413697004318237, "step": 14374 }, { "epoch": 2.616728861381633, "grad_norm": 11.125, "learning_rate": 2.3213385239998836e-06, "loss": 1.2438894510269165, "step": 14376 }, { "epoch": 2.617092928005825, "grad_norm": 10.625, "learning_rate": 2.320737056232917e-06, "loss": 1.5618762969970703, "step": 14378 }, { "epoch": 2.6174569946300172, "grad_norm": 31.5, "learning_rate": 2.320136128382876e-06, "loss": 1.7725112438201904, "step": 14380 }, { "epoch": 2.6178210612542094, "grad_norm": 21.375, "learning_rate": 2.3195357405379447e-06, "loss": 1.1901895999908447, "step": 14382 }, { "epoch": 2.6181851278784016, "grad_norm": 10.1875, "learning_rate": 2.3189358927862284e-06, "loss": 0.6279973983764648, "step": 14384 }, { "epoch": 2.618549194502594, "grad_norm": 12.4375, "learning_rate": 2.3183365852157524e-06, "loss": 1.2559270858764648, "step": 14386 }, { "epoch": 2.618913261126786, "grad_norm": 11.4375, "learning_rate": 2.317737817914461e-06, "loss": 1.6523911952972412, "step": 14388 }, { "epoch": 2.6192773277509787, "grad_norm": 18.75, "learning_rate": 2.3171395909702225e-06, "loss": 0.824800968170166, "step": 14390 }, { "epoch": 2.6196413943751704, "grad_norm": 7.78125, "learning_rate": 2.3165419044708234e-06, "loss": 1.4402625560760498, "step": 14392 }, { "epoch": 2.620005460999363, "grad_norm": 3.53125, "learning_rate": 2.315944758503972e-06, "loss": 0.9888216257095337, "step": 14394 }, { "epoch": 2.6203695276235552, "grad_norm": 4.375, "learning_rate": 2.3153481531572976e-06, "loss": 1.024646282196045, "step": 14396 }, { "epoch": 2.6207335942477474, "grad_norm": 8.1875, "learning_rate": 2.3147520885183483e-06, "loss": 1.1572277545928955, "step": 14398 }, { "epoch": 2.6210976608719396, "grad_norm": 8.1875, "learning_rate": 2.314156564674596e-06, "loss": 1.1977852582931519, "step": 14400 }, { "epoch": 2.621461727496132, "grad_norm": 7.09375, "learning_rate": 2.3135615817134296e-06, "loss": 1.1601250171661377, "step": 14402 }, { "epoch": 2.621825794120324, "grad_norm": 8.5625, "learning_rate": 2.3129671397221617e-06, "loss": 1.4369096755981445, "step": 14404 }, { "epoch": 2.622189860744516, "grad_norm": 3.484375, "learning_rate": 2.3123732387880238e-06, "loss": 1.1548892259597778, "step": 14406 }, { "epoch": 2.6225539273687084, "grad_norm": 3.34375, "learning_rate": 2.3117798789981683e-06, "loss": 0.8749016523361206, "step": 14408 }, { "epoch": 2.6229179939929006, "grad_norm": 12.0, "learning_rate": 2.3111870604396686e-06, "loss": 1.1647199392318726, "step": 14410 }, { "epoch": 2.623282060617093, "grad_norm": 16.25, "learning_rate": 2.3105947831995184e-06, "loss": 1.4007911682128906, "step": 14412 }, { "epoch": 2.623646127241285, "grad_norm": 7.8125, "learning_rate": 2.3100030473646316e-06, "loss": 1.462831735610962, "step": 14414 }, { "epoch": 2.6240101938654776, "grad_norm": 8.5625, "learning_rate": 2.309411853021844e-06, "loss": 1.3439569473266602, "step": 14416 }, { "epoch": 2.6243742604896694, "grad_norm": 7.46875, "learning_rate": 2.3088212002579097e-06, "loss": 1.0835633277893066, "step": 14418 }, { "epoch": 2.624738327113862, "grad_norm": 9.5, "learning_rate": 2.3082310891595054e-06, "loss": 0.754274845123291, "step": 14420 }, { "epoch": 2.625102393738054, "grad_norm": 13.25, "learning_rate": 2.3076415198132275e-06, "loss": 1.5140894651412964, "step": 14422 }, { "epoch": 2.6254664603622464, "grad_norm": 28.5, "learning_rate": 2.3070524923055925e-06, "loss": 1.3967156410217285, "step": 14424 }, { "epoch": 2.6258305269864386, "grad_norm": 12.9375, "learning_rate": 2.3064640067230383e-06, "loss": 1.8292180299758911, "step": 14426 }, { "epoch": 2.626194593610631, "grad_norm": 13.6875, "learning_rate": 2.305876063151922e-06, "loss": 1.3166353702545166, "step": 14428 }, { "epoch": 2.626558660234823, "grad_norm": 38.0, "learning_rate": 2.305288661678523e-06, "loss": 1.9137250185012817, "step": 14430 }, { "epoch": 2.626922726859015, "grad_norm": 17.75, "learning_rate": 2.30470180238904e-06, "loss": 1.2852834463119507, "step": 14432 }, { "epoch": 2.6272867934832074, "grad_norm": 21.5, "learning_rate": 2.3041154853695904e-06, "loss": 1.461565613746643, "step": 14434 }, { "epoch": 2.6276508601073996, "grad_norm": 19.75, "learning_rate": 2.303529710706216e-06, "loss": 1.2691551446914673, "step": 14436 }, { "epoch": 2.6280149267315918, "grad_norm": 7.21875, "learning_rate": 2.302944478484876e-06, "loss": 0.8443024754524231, "step": 14438 }, { "epoch": 2.628378993355784, "grad_norm": 8.6875, "learning_rate": 2.302359788791451e-06, "loss": 1.1387990713119507, "step": 14440 }, { "epoch": 2.6287430599799766, "grad_norm": 7.0, "learning_rate": 2.301775641711742e-06, "loss": 1.3775629997253418, "step": 14442 }, { "epoch": 2.6291071266041683, "grad_norm": 14.5625, "learning_rate": 2.3011920373314697e-06, "loss": 1.2274903059005737, "step": 14444 }, { "epoch": 2.629471193228361, "grad_norm": 7.8125, "learning_rate": 2.300608975736276e-06, "loss": 1.4638116359710693, "step": 14446 }, { "epoch": 2.629835259852553, "grad_norm": 6.90625, "learning_rate": 2.3000264570117227e-06, "loss": 1.4709136486053467, "step": 14448 }, { "epoch": 2.6301993264767454, "grad_norm": 14.1875, "learning_rate": 2.2994444812432927e-06, "loss": 1.0979920625686646, "step": 14450 }, { "epoch": 2.6305633931009376, "grad_norm": 15.0625, "learning_rate": 2.298863048516387e-06, "loss": 1.4460787773132324, "step": 14452 }, { "epoch": 2.6309274597251298, "grad_norm": 7.25, "learning_rate": 2.29828215891633e-06, "loss": 1.2614566087722778, "step": 14454 }, { "epoch": 2.631291526349322, "grad_norm": 3.84375, "learning_rate": 2.297701812528365e-06, "loss": 1.0139660835266113, "step": 14456 }, { "epoch": 2.631655592973514, "grad_norm": 7.5625, "learning_rate": 2.297122009437654e-06, "loss": 1.4179061651229858, "step": 14458 }, { "epoch": 2.6320196595977063, "grad_norm": 15.0, "learning_rate": 2.296542749729282e-06, "loss": 1.7426719665527344, "step": 14460 }, { "epoch": 2.6323837262218985, "grad_norm": 13.875, "learning_rate": 2.295964033488253e-06, "loss": 1.5685440301895142, "step": 14462 }, { "epoch": 2.6327477928460907, "grad_norm": 12.125, "learning_rate": 2.2953858607994907e-06, "loss": 1.7342426776885986, "step": 14464 }, { "epoch": 2.633111859470283, "grad_norm": 190.0, "learning_rate": 2.2948082317478402e-06, "loss": 1.4392192363739014, "step": 14466 }, { "epoch": 2.6334759260944756, "grad_norm": 25.625, "learning_rate": 2.294231146418065e-06, "loss": 1.4335014820098877, "step": 14468 }, { "epoch": 2.6338399927186673, "grad_norm": 17.5, "learning_rate": 2.2936546048948516e-06, "loss": 1.2903966903686523, "step": 14470 }, { "epoch": 2.63420405934286, "grad_norm": 4.9375, "learning_rate": 2.2930786072628044e-06, "loss": 1.3931260108947754, "step": 14472 }, { "epoch": 2.6345681259670517, "grad_norm": 4.4375, "learning_rate": 2.292503153606448e-06, "loss": 0.9813063144683838, "step": 14474 }, { "epoch": 2.6349321925912443, "grad_norm": 92.5, "learning_rate": 2.2919282440102296e-06, "loss": 1.2947978973388672, "step": 14476 }, { "epoch": 2.6352962592154365, "grad_norm": 11.5, "learning_rate": 2.2913538785585136e-06, "loss": 0.8180490732192993, "step": 14478 }, { "epoch": 2.6356603258396287, "grad_norm": 15.0, "learning_rate": 2.290780057335586e-06, "loss": 1.3983210325241089, "step": 14480 }, { "epoch": 2.636024392463821, "grad_norm": 7.5, "learning_rate": 2.290206780425653e-06, "loss": 1.0530699491500854, "step": 14482 }, { "epoch": 2.636388459088013, "grad_norm": 11.8125, "learning_rate": 2.2896340479128402e-06, "loss": 1.1618951559066772, "step": 14484 }, { "epoch": 2.6367525257122053, "grad_norm": 12.125, "learning_rate": 2.2890618598811943e-06, "loss": 1.4503509998321533, "step": 14486 }, { "epoch": 2.6371165923363975, "grad_norm": 8.0, "learning_rate": 2.288490216414681e-06, "loss": 1.3243434429168701, "step": 14488 }, { "epoch": 2.6374806589605897, "grad_norm": 14.25, "learning_rate": 2.2879191175971874e-06, "loss": 1.1167292594909668, "step": 14490 }, { "epoch": 2.637844725584782, "grad_norm": 28.5, "learning_rate": 2.287348563512519e-06, "loss": 1.273335576057434, "step": 14492 }, { "epoch": 2.638208792208974, "grad_norm": 12.375, "learning_rate": 2.2867785542444035e-06, "loss": 1.3807586431503296, "step": 14494 }, { "epoch": 2.6385728588331663, "grad_norm": 10.6875, "learning_rate": 2.2862090898764865e-06, "loss": 1.3990294933319092, "step": 14496 }, { "epoch": 2.638936925457359, "grad_norm": 9.75, "learning_rate": 2.285640170492335e-06, "loss": 1.1912405490875244, "step": 14498 }, { "epoch": 2.6393009920815507, "grad_norm": 7.25, "learning_rate": 2.2850717961754355e-06, "loss": 1.3534977436065674, "step": 14500 }, { "epoch": 2.6396650587057433, "grad_norm": 9.3125, "learning_rate": 2.284503967009194e-06, "loss": 1.2397384643554688, "step": 14502 }, { "epoch": 2.6400291253299355, "grad_norm": 11.5, "learning_rate": 2.2839366830769386e-06, "loss": 1.3599773645401, "step": 14504 }, { "epoch": 2.6403931919541277, "grad_norm": 10.6875, "learning_rate": 2.283369944461915e-06, "loss": 1.5993993282318115, "step": 14506 }, { "epoch": 2.64075725857832, "grad_norm": 16.25, "learning_rate": 2.2828037512472893e-06, "loss": 1.2562651634216309, "step": 14508 }, { "epoch": 2.641121325202512, "grad_norm": 52.0, "learning_rate": 2.282238103516149e-06, "loss": 1.7715880870819092, "step": 14510 }, { "epoch": 2.6414853918267043, "grad_norm": 11.25, "learning_rate": 2.2816730013515008e-06, "loss": 1.5312063694000244, "step": 14512 }, { "epoch": 2.6418494584508965, "grad_norm": 7.53125, "learning_rate": 2.28110844483627e-06, "loss": 1.1991188526153564, "step": 14514 }, { "epoch": 2.6422135250750887, "grad_norm": 7.875, "learning_rate": 2.2805444340533034e-06, "loss": 1.2983596324920654, "step": 14516 }, { "epoch": 2.642577591699281, "grad_norm": 8.9375, "learning_rate": 2.2799809690853675e-06, "loss": 1.3489129543304443, "step": 14518 }, { "epoch": 2.642941658323473, "grad_norm": 22.375, "learning_rate": 2.2794180500151485e-06, "loss": 1.1990947723388672, "step": 14520 }, { "epoch": 2.6433057249476652, "grad_norm": 11.6875, "learning_rate": 2.2788556769252527e-06, "loss": 1.138641595840454, "step": 14522 }, { "epoch": 2.643669791571858, "grad_norm": 12.25, "learning_rate": 2.2782938498982055e-06, "loss": 1.4047293663024902, "step": 14524 }, { "epoch": 2.6440338581960496, "grad_norm": 14.1875, "learning_rate": 2.2777325690164533e-06, "loss": 1.4530627727508545, "step": 14526 }, { "epoch": 2.6443979248202423, "grad_norm": 10.125, "learning_rate": 2.277171834362361e-06, "loss": 1.341822624206543, "step": 14528 }, { "epoch": 2.6447619914444345, "grad_norm": 8.625, "learning_rate": 2.2766116460182155e-06, "loss": 1.4875551462173462, "step": 14530 }, { "epoch": 2.6451260580686267, "grad_norm": 7.28125, "learning_rate": 2.2760520040662215e-06, "loss": 1.2909774780273438, "step": 14532 }, { "epoch": 2.645490124692819, "grad_norm": 10.0625, "learning_rate": 2.2754929085885034e-06, "loss": 1.2349538803100586, "step": 14534 }, { "epoch": 2.645854191317011, "grad_norm": 12.5, "learning_rate": 2.274934359667107e-06, "loss": 1.3877525329589844, "step": 14536 }, { "epoch": 2.6462182579412032, "grad_norm": 50.25, "learning_rate": 2.274376357383997e-06, "loss": 1.4723800420761108, "step": 14538 }, { "epoch": 2.6465823245653954, "grad_norm": 8.875, "learning_rate": 2.2738189018210587e-06, "loss": 1.580697774887085, "step": 14540 }, { "epoch": 2.6469463911895876, "grad_norm": 7.25, "learning_rate": 2.273261993060095e-06, "loss": 1.3183248043060303, "step": 14542 }, { "epoch": 2.64731045781378, "grad_norm": 7.1875, "learning_rate": 2.27270563118283e-06, "loss": 1.2986781597137451, "step": 14544 }, { "epoch": 2.647674524437972, "grad_norm": 4.28125, "learning_rate": 2.272149816270909e-06, "loss": 1.0991648435592651, "step": 14546 }, { "epoch": 2.648038591062164, "grad_norm": 6.21875, "learning_rate": 2.2715945484058945e-06, "loss": 1.0388941764831543, "step": 14548 }, { "epoch": 2.648402657686357, "grad_norm": 10.125, "learning_rate": 2.27103982766927e-06, "loss": 0.7706663608551025, "step": 14550 }, { "epoch": 2.6487667243105486, "grad_norm": 13.25, "learning_rate": 2.2704856541424396e-06, "loss": 1.0283136367797852, "step": 14552 }, { "epoch": 2.6491307909347412, "grad_norm": 20.75, "learning_rate": 2.269932027906724e-06, "loss": 1.2795251607894897, "step": 14554 }, { "epoch": 2.6494948575589334, "grad_norm": 20.0, "learning_rate": 2.2693789490433672e-06, "loss": 1.2816487550735474, "step": 14556 }, { "epoch": 2.6498589241831256, "grad_norm": 13.9375, "learning_rate": 2.2688264176335305e-06, "loss": 1.2676740884780884, "step": 14558 }, { "epoch": 2.650222990807318, "grad_norm": 3.59375, "learning_rate": 2.2682744337582964e-06, "loss": 1.0327903032302856, "step": 14560 }, { "epoch": 2.65058705743151, "grad_norm": 12.6875, "learning_rate": 2.2677229974986646e-06, "loss": 0.953514039516449, "step": 14562 }, { "epoch": 2.650951124055702, "grad_norm": 10.125, "learning_rate": 2.267172108935558e-06, "loss": 1.427741527557373, "step": 14564 }, { "epoch": 2.6513151906798944, "grad_norm": 9.125, "learning_rate": 2.266621768149817e-06, "loss": 1.3594467639923096, "step": 14566 }, { "epoch": 2.6516792573040866, "grad_norm": 37.0, "learning_rate": 2.266071975222201e-06, "loss": 1.4734126329421997, "step": 14568 }, { "epoch": 2.652043323928279, "grad_norm": 16.0, "learning_rate": 2.26552273023339e-06, "loss": 1.6638107299804688, "step": 14570 }, { "epoch": 2.652407390552471, "grad_norm": 12.0625, "learning_rate": 2.2649740332639847e-06, "loss": 1.2713983058929443, "step": 14572 }, { "epoch": 2.652771457176663, "grad_norm": 27.5, "learning_rate": 2.264425884394503e-06, "loss": 1.333370566368103, "step": 14574 }, { "epoch": 2.653135523800856, "grad_norm": 32.5, "learning_rate": 2.2638782837053833e-06, "loss": 1.5306910276412964, "step": 14576 }, { "epoch": 2.6534995904250476, "grad_norm": 12.0625, "learning_rate": 2.263331231276985e-06, "loss": 1.5224599838256836, "step": 14578 }, { "epoch": 2.65386365704924, "grad_norm": 8.1875, "learning_rate": 2.262784727189584e-06, "loss": 1.6478886604309082, "step": 14580 }, { "epoch": 2.6542277236734324, "grad_norm": 15.1875, "learning_rate": 2.2622387715233802e-06, "loss": 1.4007929563522339, "step": 14582 }, { "epoch": 2.6545917902976246, "grad_norm": 7.71875, "learning_rate": 2.261693364358488e-06, "loss": 0.9857863187789917, "step": 14584 }, { "epoch": 2.654955856921817, "grad_norm": 35.75, "learning_rate": 2.261148505774945e-06, "loss": 1.383042812347412, "step": 14586 }, { "epoch": 2.655319923546009, "grad_norm": 5.53125, "learning_rate": 2.260604195852706e-06, "loss": 1.1225652694702148, "step": 14588 }, { "epoch": 2.655683990170201, "grad_norm": 11.625, "learning_rate": 2.2600604346716463e-06, "loss": 1.3695437908172607, "step": 14590 }, { "epoch": 2.6560480567943934, "grad_norm": 26.875, "learning_rate": 2.2595172223115626e-06, "loss": 1.3275632858276367, "step": 14592 }, { "epoch": 2.6564121234185856, "grad_norm": 72.5, "learning_rate": 2.258974558852167e-06, "loss": 1.5609420537948608, "step": 14594 }, { "epoch": 2.6567761900427778, "grad_norm": 22.25, "learning_rate": 2.2584324443730937e-06, "loss": 1.7811380624771118, "step": 14596 }, { "epoch": 2.65714025666697, "grad_norm": 9.125, "learning_rate": 2.257890878953896e-06, "loss": 1.324152946472168, "step": 14598 }, { "epoch": 2.657504323291162, "grad_norm": 3.3125, "learning_rate": 2.2573498626740457e-06, "loss": 1.0647938251495361, "step": 14600 }, { "epoch": 2.6578683899153543, "grad_norm": 5.34375, "learning_rate": 2.2568093956129368e-06, "loss": 0.8082306385040283, "step": 14602 }, { "epoch": 2.6582324565395465, "grad_norm": 10.1875, "learning_rate": 2.2562694778498786e-06, "loss": 1.3415851593017578, "step": 14604 }, { "epoch": 2.658596523163739, "grad_norm": 23.0, "learning_rate": 2.2557301094641026e-06, "loss": 1.5772604942321777, "step": 14606 }, { "epoch": 2.658960589787931, "grad_norm": 20.375, "learning_rate": 2.2551912905347586e-06, "loss": 1.470680832862854, "step": 14608 }, { "epoch": 2.6593246564121236, "grad_norm": 4.21875, "learning_rate": 2.2546530211409157e-06, "loss": 0.18548749387264252, "step": 14610 }, { "epoch": 2.6596887230363158, "grad_norm": 21.5, "learning_rate": 2.254115301361565e-06, "loss": 1.3066803216934204, "step": 14612 }, { "epoch": 2.660052789660508, "grad_norm": 13.3125, "learning_rate": 2.253578131275612e-06, "loss": 1.705047607421875, "step": 14614 }, { "epoch": 2.6604168562847, "grad_norm": 13.625, "learning_rate": 2.2530415109618863e-06, "loss": 1.3961238861083984, "step": 14616 }, { "epoch": 2.6607809229088923, "grad_norm": 10.8125, "learning_rate": 2.2525054404991327e-06, "loss": 1.5302186012268066, "step": 14618 }, { "epoch": 2.6611449895330845, "grad_norm": 12.3125, "learning_rate": 2.2519699199660182e-06, "loss": 1.515799641609192, "step": 14620 }, { "epoch": 2.6615090561572767, "grad_norm": 17.125, "learning_rate": 2.251434949441129e-06, "loss": 1.6144614219665527, "step": 14622 }, { "epoch": 2.661873122781469, "grad_norm": 19.0, "learning_rate": 2.2509005290029697e-06, "loss": 1.8386269807815552, "step": 14624 }, { "epoch": 2.662237189405661, "grad_norm": 12.625, "learning_rate": 2.2503666587299637e-06, "loss": 1.4178409576416016, "step": 14626 }, { "epoch": 2.6626012560298533, "grad_norm": 11.0625, "learning_rate": 2.249833338700455e-06, "loss": 1.9339309930801392, "step": 14628 }, { "epoch": 2.6629653226540455, "grad_norm": 36.75, "learning_rate": 2.2493005689927046e-06, "loss": 1.367071270942688, "step": 14630 }, { "epoch": 2.663329389278238, "grad_norm": 9.6875, "learning_rate": 2.248768349684897e-06, "loss": 1.1875, "step": 14632 }, { "epoch": 2.66369345590243, "grad_norm": 28.5, "learning_rate": 2.2482366808551306e-06, "loss": 0.8653788566589355, "step": 14634 }, { "epoch": 2.6640575225266225, "grad_norm": 30.625, "learning_rate": 2.2477055625814273e-06, "loss": 1.4966187477111816, "step": 14636 }, { "epoch": 2.6644215891508147, "grad_norm": 18.0, "learning_rate": 2.2471749949417253e-06, "loss": 1.96842360496521, "step": 14638 }, { "epoch": 2.664785655775007, "grad_norm": 32.5, "learning_rate": 2.246644978013884e-06, "loss": 1.4189033508300781, "step": 14640 }, { "epoch": 2.665149722399199, "grad_norm": 11.8125, "learning_rate": 2.246115511875682e-06, "loss": 1.377279281616211, "step": 14642 }, { "epoch": 2.6655137890233913, "grad_norm": 13.375, "learning_rate": 2.2455865966048152e-06, "loss": 1.3651355504989624, "step": 14644 }, { "epoch": 2.6658778556475835, "grad_norm": 29.0, "learning_rate": 2.2450582322788996e-06, "loss": 1.9583160877227783, "step": 14646 }, { "epoch": 2.6662419222717757, "grad_norm": 13.4375, "learning_rate": 2.2445304189754714e-06, "loss": 1.6047117710113525, "step": 14648 }, { "epoch": 2.666605988895968, "grad_norm": 3.578125, "learning_rate": 2.2440031567719833e-06, "loss": 0.9793223738670349, "step": 14650 }, { "epoch": 2.66697005552016, "grad_norm": 27.625, "learning_rate": 2.243476445745812e-06, "loss": 1.208122730255127, "step": 14652 }, { "epoch": 2.6673341221443523, "grad_norm": 7.84375, "learning_rate": 2.242950285974248e-06, "loss": 1.2500379085540771, "step": 14654 }, { "epoch": 2.6676981887685445, "grad_norm": 14.5, "learning_rate": 2.242424677534503e-06, "loss": 1.81507408618927, "step": 14656 }, { "epoch": 2.668062255392737, "grad_norm": 19.75, "learning_rate": 2.241899620503709e-06, "loss": 1.8425989151000977, "step": 14658 }, { "epoch": 2.668426322016929, "grad_norm": 13.125, "learning_rate": 2.2413751149589145e-06, "loss": 1.1693949699401855, "step": 14660 }, { "epoch": 2.6687903886411215, "grad_norm": 12.4375, "learning_rate": 2.240851160977091e-06, "loss": 1.3427404165267944, "step": 14662 }, { "epoch": 2.6691544552653137, "grad_norm": 8.625, "learning_rate": 2.2403277586351236e-06, "loss": 1.2425905466079712, "step": 14664 }, { "epoch": 2.669518521889506, "grad_norm": 17.625, "learning_rate": 2.2398049080098215e-06, "loss": 1.4040961265563965, "step": 14666 }, { "epoch": 2.669882588513698, "grad_norm": 10.6875, "learning_rate": 2.2392826091779106e-06, "loss": 1.386849045753479, "step": 14668 }, { "epoch": 2.6702466551378903, "grad_norm": 10.125, "learning_rate": 2.238760862216036e-06, "loss": 1.525034785270691, "step": 14670 }, { "epoch": 2.6706107217620825, "grad_norm": 21.875, "learning_rate": 2.238239667200762e-06, "loss": 1.605167031288147, "step": 14672 }, { "epoch": 2.6709747883862747, "grad_norm": 22.625, "learning_rate": 2.237719024208572e-06, "loss": 2.1560471057891846, "step": 14674 }, { "epoch": 2.671338855010467, "grad_norm": 19.25, "learning_rate": 2.2371989333158673e-06, "loss": 1.6465058326721191, "step": 14676 }, { "epoch": 2.671702921634659, "grad_norm": 42.75, "learning_rate": 2.2366793945989706e-06, "loss": 1.6958762407302856, "step": 14678 }, { "epoch": 2.6720669882588513, "grad_norm": 14.625, "learning_rate": 2.2361604081341203e-06, "loss": 0.8031466007232666, "step": 14680 }, { "epoch": 2.6724310548830434, "grad_norm": 9.875, "learning_rate": 2.2356419739974774e-06, "loss": 1.3648557662963867, "step": 14682 }, { "epoch": 2.672795121507236, "grad_norm": 8.625, "learning_rate": 2.235124092265119e-06, "loss": 0.6042052507400513, "step": 14684 }, { "epoch": 2.673159188131428, "grad_norm": 21.25, "learning_rate": 2.234606763013042e-06, "loss": 1.4619104862213135, "step": 14686 }, { "epoch": 2.6735232547556205, "grad_norm": 13.0625, "learning_rate": 2.2340899863171632e-06, "loss": 1.5344159603118896, "step": 14688 }, { "epoch": 2.6738873213798127, "grad_norm": 14.75, "learning_rate": 2.2335737622533166e-06, "loss": 1.0969370603561401, "step": 14690 }, { "epoch": 2.674251388004005, "grad_norm": 34.0, "learning_rate": 2.2330580908972574e-06, "loss": 0.5326094627380371, "step": 14692 }, { "epoch": 2.674615454628197, "grad_norm": 20.25, "learning_rate": 2.232542972324656e-06, "loss": 1.4828294515609741, "step": 14694 }, { "epoch": 2.6749795212523892, "grad_norm": 22.5, "learning_rate": 2.232028406611106e-06, "loss": 1.7330148220062256, "step": 14696 }, { "epoch": 2.6753435878765814, "grad_norm": 7.09375, "learning_rate": 2.2315143938321173e-06, "loss": 1.4993782043457031, "step": 14698 }, { "epoch": 2.6757076545007736, "grad_norm": 11.4375, "learning_rate": 2.2310009340631176e-06, "loss": 1.0553703308105469, "step": 14700 }, { "epoch": 2.676071721124966, "grad_norm": 17.0, "learning_rate": 2.230488027379458e-06, "loss": 1.1226357221603394, "step": 14702 }, { "epoch": 2.676435787749158, "grad_norm": 27.25, "learning_rate": 2.2299756738564037e-06, "loss": 0.956444263458252, "step": 14704 }, { "epoch": 2.67679985437335, "grad_norm": 13.75, "learning_rate": 2.2294638735691398e-06, "loss": 1.2135436534881592, "step": 14706 }, { "epoch": 2.6771639209975424, "grad_norm": 13.1875, "learning_rate": 2.2289526265927724e-06, "loss": 1.5399620532989502, "step": 14708 }, { "epoch": 2.677527987621735, "grad_norm": 27.25, "learning_rate": 2.228441933002323e-06, "loss": 2.1901984214782715, "step": 14710 }, { "epoch": 2.677892054245927, "grad_norm": 4.25, "learning_rate": 2.2279317928727374e-06, "loss": 1.040196418762207, "step": 14712 }, { "epoch": 2.6782561208701194, "grad_norm": 6.09375, "learning_rate": 2.2274222062788732e-06, "loss": 1.4260969161987305, "step": 14714 }, { "epoch": 2.678620187494311, "grad_norm": 10.375, "learning_rate": 2.226913173295511e-06, "loss": 1.3539091348648071, "step": 14716 }, { "epoch": 2.678984254118504, "grad_norm": 16.125, "learning_rate": 2.2264046939973503e-06, "loss": 1.5118415355682373, "step": 14718 }, { "epoch": 2.679348320742696, "grad_norm": 14.875, "learning_rate": 2.225896768459007e-06, "loss": 1.3771758079528809, "step": 14720 }, { "epoch": 2.679712387366888, "grad_norm": 7.15625, "learning_rate": 2.225389396755019e-06, "loss": 1.0954978466033936, "step": 14722 }, { "epoch": 2.6800764539910804, "grad_norm": 15.875, "learning_rate": 2.2248825789598384e-06, "loss": 1.1442478895187378, "step": 14724 }, { "epoch": 2.6804405206152726, "grad_norm": 10.0, "learning_rate": 2.2243763151478415e-06, "loss": 0.6619093418121338, "step": 14726 }, { "epoch": 2.680804587239465, "grad_norm": 17.0, "learning_rate": 2.223870605393318e-06, "loss": 1.2649319171905518, "step": 14728 }, { "epoch": 2.681168653863657, "grad_norm": 9.8125, "learning_rate": 2.2233654497704795e-06, "loss": 0.7245772480964661, "step": 14730 }, { "epoch": 2.681532720487849, "grad_norm": 14.9375, "learning_rate": 2.2228608483534573e-06, "loss": 1.4337693452835083, "step": 14732 }, { "epoch": 2.6818967871120414, "grad_norm": 6.28125, "learning_rate": 2.222356801216298e-06, "loss": 1.0649837255477905, "step": 14734 }, { "epoch": 2.6822608537362336, "grad_norm": 19.375, "learning_rate": 2.2218533084329676e-06, "loss": 1.519266963005066, "step": 14736 }, { "epoch": 2.6826249203604258, "grad_norm": 8.6875, "learning_rate": 2.221350370077354e-06, "loss": 0.9926698207855225, "step": 14738 }, { "epoch": 2.6829889869846184, "grad_norm": 7.09375, "learning_rate": 2.2208479862232586e-06, "loss": 1.3184170722961426, "step": 14740 }, { "epoch": 2.68335305360881, "grad_norm": 12.875, "learning_rate": 2.220346156944407e-06, "loss": 1.365871787071228, "step": 14742 }, { "epoch": 2.683717120233003, "grad_norm": 14.3125, "learning_rate": 2.2198448823144384e-06, "loss": 1.2982304096221924, "step": 14744 }, { "epoch": 2.684081186857195, "grad_norm": 14.3125, "learning_rate": 2.219344162406914e-06, "loss": 1.4157578945159912, "step": 14746 }, { "epoch": 2.684445253481387, "grad_norm": 17.375, "learning_rate": 2.218843997295312e-06, "loss": 1.2325361967086792, "step": 14748 }, { "epoch": 2.6848093201055794, "grad_norm": 6.46875, "learning_rate": 2.2183443870530295e-06, "loss": 1.0730057954788208, "step": 14750 }, { "epoch": 2.6851733867297716, "grad_norm": 7.21875, "learning_rate": 2.2178453317533833e-06, "loss": 1.3177622556686401, "step": 14752 }, { "epoch": 2.6855374533539638, "grad_norm": 10.8125, "learning_rate": 2.2173468314696066e-06, "loss": 1.1811798810958862, "step": 14754 }, { "epoch": 2.685901519978156, "grad_norm": 8.1875, "learning_rate": 2.2168488862748522e-06, "loss": 1.1338566541671753, "step": 14756 }, { "epoch": 2.686265586602348, "grad_norm": 14.8125, "learning_rate": 2.2163514962421924e-06, "loss": 1.6747138500213623, "step": 14758 }, { "epoch": 2.6866296532265403, "grad_norm": 11.75, "learning_rate": 2.215854661444616e-06, "loss": 1.6357932090759277, "step": 14760 }, { "epoch": 2.6869937198507325, "grad_norm": 5.0625, "learning_rate": 2.2153583819550336e-06, "loss": 1.193169355392456, "step": 14762 }, { "epoch": 2.6873577864749247, "grad_norm": 8.25, "learning_rate": 2.21486265784627e-06, "loss": 0.9699419736862183, "step": 14764 }, { "epoch": 2.6877218530991174, "grad_norm": 10.25, "learning_rate": 2.214367489191071e-06, "loss": 1.4303457736968994, "step": 14766 }, { "epoch": 2.688085919723309, "grad_norm": 19.0, "learning_rate": 2.213872876062102e-06, "loss": 1.7218208312988281, "step": 14768 }, { "epoch": 2.6884499863475018, "grad_norm": 12.375, "learning_rate": 2.2133788185319438e-06, "loss": 1.265617847442627, "step": 14770 }, { "epoch": 2.688814052971694, "grad_norm": 6.125, "learning_rate": 2.2128853166730995e-06, "loss": 1.4886524677276611, "step": 14772 }, { "epoch": 2.689178119595886, "grad_norm": 8.875, "learning_rate": 2.2123923705579866e-06, "loss": 1.4232155084609985, "step": 14774 }, { "epoch": 2.6895421862200783, "grad_norm": 8.625, "learning_rate": 2.2118999802589425e-06, "loss": 1.5835509300231934, "step": 14776 }, { "epoch": 2.6899062528442705, "grad_norm": 12.4375, "learning_rate": 2.2114081458482255e-06, "loss": 1.3214442729949951, "step": 14778 }, { "epoch": 2.6902703194684627, "grad_norm": 11.3125, "learning_rate": 2.2109168673980087e-06, "loss": 1.3034852743148804, "step": 14780 }, { "epoch": 2.690634386092655, "grad_norm": 11.375, "learning_rate": 2.2104261449803864e-06, "loss": 1.1885371208190918, "step": 14782 }, { "epoch": 2.690998452716847, "grad_norm": 28.625, "learning_rate": 2.2099359786673693e-06, "loss": 1.3588523864746094, "step": 14784 }, { "epoch": 2.6913625193410393, "grad_norm": 4.78125, "learning_rate": 2.2094463685308873e-06, "loss": 1.1375269889831543, "step": 14786 }, { "epoch": 2.6917265859652315, "grad_norm": 16.5, "learning_rate": 2.208957314642789e-06, "loss": 1.2421104907989502, "step": 14788 }, { "epoch": 2.6920906525894237, "grad_norm": 8.625, "learning_rate": 2.2084688170748404e-06, "loss": 0.9773150682449341, "step": 14790 }, { "epoch": 2.6924547192136163, "grad_norm": 15.3125, "learning_rate": 2.2079808758987287e-06, "loss": 1.2801337242126465, "step": 14792 }, { "epoch": 2.692818785837808, "grad_norm": 10.4375, "learning_rate": 2.2074934911860544e-06, "loss": 1.7359005212783813, "step": 14794 }, { "epoch": 2.6931828524620007, "grad_norm": 19.25, "learning_rate": 2.207006663008341e-06, "loss": 1.1615086793899536, "step": 14796 }, { "epoch": 2.693546919086193, "grad_norm": 21.25, "learning_rate": 2.2065203914370287e-06, "loss": 0.834223747253418, "step": 14798 }, { "epoch": 2.693910985710385, "grad_norm": 9.5, "learning_rate": 2.2060346765434743e-06, "loss": 1.5428526401519775, "step": 14800 }, { "epoch": 2.6942750523345773, "grad_norm": 4.90625, "learning_rate": 2.2055495183989565e-06, "loss": 1.3263686895370483, "step": 14802 }, { "epoch": 2.6946391189587695, "grad_norm": 10.1875, "learning_rate": 2.205064917074669e-06, "loss": 1.4751248359680176, "step": 14804 }, { "epoch": 2.6950031855829617, "grad_norm": 10.9375, "learning_rate": 2.2045808726417254e-06, "loss": 1.3308625221252441, "step": 14806 }, { "epoch": 2.695367252207154, "grad_norm": 13.25, "learning_rate": 2.204097385171157e-06, "loss": 1.5199995040893555, "step": 14808 }, { "epoch": 2.695731318831346, "grad_norm": 27.25, "learning_rate": 2.2036144547339135e-06, "loss": 1.3691648244857788, "step": 14810 }, { "epoch": 2.6960953854555383, "grad_norm": 22.25, "learning_rate": 2.2031320814008646e-06, "loss": 1.6099021434783936, "step": 14812 }, { "epoch": 2.6964594520797305, "grad_norm": 11.25, "learning_rate": 2.2026502652427944e-06, "loss": 1.3485583066940308, "step": 14814 }, { "epoch": 2.6968235187039227, "grad_norm": 13.25, "learning_rate": 2.202169006330409e-06, "loss": 1.1106489896774292, "step": 14816 }, { "epoch": 2.6971875853281153, "grad_norm": 44.25, "learning_rate": 2.2016883047343305e-06, "loss": 0.8135997653007507, "step": 14818 }, { "epoch": 2.697551651952307, "grad_norm": 23.5, "learning_rate": 2.201208160525099e-06, "loss": 1.2263531684875488, "step": 14820 }, { "epoch": 2.6979157185764997, "grad_norm": 6.21875, "learning_rate": 2.2007285737731765e-06, "loss": 1.3085579872131348, "step": 14822 }, { "epoch": 2.6982797852006914, "grad_norm": 4.21875, "learning_rate": 2.200249544548938e-06, "loss": 1.15388023853302, "step": 14824 }, { "epoch": 2.698643851824884, "grad_norm": 52.75, "learning_rate": 2.19977107292268e-06, "loss": 0.8058637380599976, "step": 14826 }, { "epoch": 2.6990079184490763, "grad_norm": 56.25, "learning_rate": 2.1992931589646153e-06, "loss": 0.7274093627929688, "step": 14828 }, { "epoch": 2.6993719850732685, "grad_norm": 5.09375, "learning_rate": 2.198815802744877e-06, "loss": 1.1417689323425293, "step": 14830 }, { "epoch": 2.6997360516974607, "grad_norm": 9.4375, "learning_rate": 2.1983390043335152e-06, "loss": 1.6141223907470703, "step": 14832 }, { "epoch": 2.700100118321653, "grad_norm": 9.25, "learning_rate": 2.1978627638004977e-06, "loss": 1.4747462272644043, "step": 14834 }, { "epoch": 2.700464184945845, "grad_norm": 8.3125, "learning_rate": 2.1973870812157105e-06, "loss": 1.1974679231643677, "step": 14836 }, { "epoch": 2.7008282515700373, "grad_norm": 16.25, "learning_rate": 2.1969119566489584e-06, "loss": 1.1827150583267212, "step": 14838 }, { "epoch": 2.7011923181942294, "grad_norm": 7.4375, "learning_rate": 2.196437390169964e-06, "loss": 1.1084064245224, "step": 14840 }, { "epoch": 2.7015563848184216, "grad_norm": 7.78125, "learning_rate": 2.195963381848369e-06, "loss": 1.2836636304855347, "step": 14842 }, { "epoch": 2.701920451442614, "grad_norm": 11.3125, "learning_rate": 2.1954899317537306e-06, "loss": 1.534792184829712, "step": 14844 }, { "epoch": 2.702284518066806, "grad_norm": 23.75, "learning_rate": 2.1950170399555267e-06, "loss": 1.5105608701705933, "step": 14846 }, { "epoch": 2.7026485846909987, "grad_norm": 17.75, "learning_rate": 2.1945447065231518e-06, "loss": 1.5451600551605225, "step": 14848 }, { "epoch": 2.7030126513151904, "grad_norm": 8.125, "learning_rate": 2.194072931525918e-06, "loss": 1.1832053661346436, "step": 14850 }, { "epoch": 2.703376717939383, "grad_norm": 7.53125, "learning_rate": 2.1936017150330593e-06, "loss": 1.4322609901428223, "step": 14852 }, { "epoch": 2.7037407845635753, "grad_norm": 22.5, "learning_rate": 2.193131057113722e-06, "loss": 1.5028266906738281, "step": 14854 }, { "epoch": 2.7041048511877674, "grad_norm": 11.875, "learning_rate": 2.1926609578369744e-06, "loss": 1.5083743333816528, "step": 14856 }, { "epoch": 2.7044689178119596, "grad_norm": 19.75, "learning_rate": 2.192191417271801e-06, "loss": 1.633476972579956, "step": 14858 }, { "epoch": 2.704832984436152, "grad_norm": 10.0, "learning_rate": 2.1917224354871052e-06, "loss": 0.9893850684165955, "step": 14860 }, { "epoch": 2.705197051060344, "grad_norm": 12.25, "learning_rate": 2.1912540125517095e-06, "loss": 1.5509364604949951, "step": 14862 }, { "epoch": 2.705561117684536, "grad_norm": 43.5, "learning_rate": 2.1907861485343516e-06, "loss": 1.4076491594314575, "step": 14864 }, { "epoch": 2.7059251843087284, "grad_norm": 11.4375, "learning_rate": 2.1903188435036884e-06, "loss": 1.3426060676574707, "step": 14866 }, { "epoch": 2.7062892509329206, "grad_norm": 14.8125, "learning_rate": 2.189852097528296e-06, "loss": 1.2959814071655273, "step": 14868 }, { "epoch": 2.706653317557113, "grad_norm": 5.0625, "learning_rate": 2.1893859106766668e-06, "loss": 1.3426402807235718, "step": 14870 }, { "epoch": 2.707017384181305, "grad_norm": 9.0, "learning_rate": 2.188920283017213e-06, "loss": 1.2335952520370483, "step": 14872 }, { "epoch": 2.7073814508054976, "grad_norm": 11.5625, "learning_rate": 2.1884552146182623e-06, "loss": 1.3386529684066772, "step": 14874 }, { "epoch": 2.7077455174296894, "grad_norm": 18.0, "learning_rate": 2.1879907055480618e-06, "loss": 1.4073715209960938, "step": 14876 }, { "epoch": 2.708109584053882, "grad_norm": 15.4375, "learning_rate": 2.1875267558747766e-06, "loss": 1.9418431520462036, "step": 14878 }, { "epoch": 2.708473650678074, "grad_norm": 8.8125, "learning_rate": 2.1870633656664885e-06, "loss": 1.32002854347229, "step": 14880 }, { "epoch": 2.7088377173022664, "grad_norm": 8.5625, "learning_rate": 2.186600534991201e-06, "loss": 1.1419707536697388, "step": 14882 }, { "epoch": 2.7092017839264586, "grad_norm": 13.375, "learning_rate": 2.1861382639168294e-06, "loss": 1.194664478302002, "step": 14884 }, { "epoch": 2.709565850550651, "grad_norm": 20.75, "learning_rate": 2.1856765525112112e-06, "loss": 1.3358380794525146, "step": 14886 }, { "epoch": 2.709929917174843, "grad_norm": 16.75, "learning_rate": 2.1852154008421013e-06, "loss": 1.4162893295288086, "step": 14888 }, { "epoch": 2.710293983799035, "grad_norm": 20.0, "learning_rate": 2.184754808977171e-06, "loss": 1.0267837047576904, "step": 14890 }, { "epoch": 2.7106580504232274, "grad_norm": 8.25, "learning_rate": 2.1842947769840106e-06, "loss": 0.7245466113090515, "step": 14892 }, { "epoch": 2.7110221170474196, "grad_norm": 16.375, "learning_rate": 2.1838353049301285e-06, "loss": 1.1227068901062012, "step": 14894 }, { "epoch": 2.7113861836716118, "grad_norm": 15.3125, "learning_rate": 2.1833763928829497e-06, "loss": 1.5669455528259277, "step": 14896 }, { "epoch": 2.711750250295804, "grad_norm": 8.375, "learning_rate": 2.1829180409098173e-06, "loss": 1.2360657453536987, "step": 14898 }, { "epoch": 2.7121143169199966, "grad_norm": 9.5625, "learning_rate": 2.182460249077993e-06, "loss": 1.4627820253372192, "step": 14900 }, { "epoch": 2.7124783835441884, "grad_norm": 14.6875, "learning_rate": 2.182003017454657e-06, "loss": 1.2225757837295532, "step": 14902 }, { "epoch": 2.712842450168381, "grad_norm": 10.3125, "learning_rate": 2.181546346106905e-06, "loss": 1.5045427083969116, "step": 14904 }, { "epoch": 2.713206516792573, "grad_norm": 29.625, "learning_rate": 2.181090235101751e-06, "loss": 1.0104668140411377, "step": 14906 }, { "epoch": 2.7135705834167654, "grad_norm": 14.0, "learning_rate": 2.180634684506129e-06, "loss": 1.2800325155258179, "step": 14908 }, { "epoch": 2.7139346500409576, "grad_norm": 37.0, "learning_rate": 2.180179694386888e-06, "loss": 0.8998664021492004, "step": 14910 }, { "epoch": 2.7142987166651498, "grad_norm": 14.75, "learning_rate": 2.179725264810797e-06, "loss": 1.3930096626281738, "step": 14912 }, { "epoch": 2.714662783289342, "grad_norm": 8.9375, "learning_rate": 2.179271395844541e-06, "loss": 1.2303917407989502, "step": 14914 }, { "epoch": 2.715026849913534, "grad_norm": 8.625, "learning_rate": 2.178818087554724e-06, "loss": 1.38310706615448, "step": 14916 }, { "epoch": 2.7153909165377264, "grad_norm": 7.9375, "learning_rate": 2.178365340007866e-06, "loss": 1.2885059118270874, "step": 14918 }, { "epoch": 2.7157549831619185, "grad_norm": 11.375, "learning_rate": 2.177913153270407e-06, "loss": 0.9565805196762085, "step": 14920 }, { "epoch": 2.7161190497861107, "grad_norm": 11.125, "learning_rate": 2.1774615274087033e-06, "loss": 1.658626675605774, "step": 14922 }, { "epoch": 2.716483116410303, "grad_norm": 11.0625, "learning_rate": 2.1770104624890287e-06, "loss": 1.3765509128570557, "step": 14924 }, { "epoch": 2.7168471830344956, "grad_norm": 13.0625, "learning_rate": 2.176559958577576e-06, "loss": 1.358661413192749, "step": 14926 }, { "epoch": 2.7172112496586873, "grad_norm": 4.0625, "learning_rate": 2.176110015740454e-06, "loss": 0.9088307619094849, "step": 14928 }, { "epoch": 2.71757531628288, "grad_norm": 18.5, "learning_rate": 2.17566063404369e-06, "loss": 0.9821543097496033, "step": 14930 }, { "epoch": 2.717939382907072, "grad_norm": 17.75, "learning_rate": 2.1752118135532297e-06, "loss": 0.6211255192756653, "step": 14932 }, { "epoch": 2.7183034495312643, "grad_norm": 12.125, "learning_rate": 2.1747635543349355e-06, "loss": 1.5896955728530884, "step": 14934 }, { "epoch": 2.7186675161554565, "grad_norm": 15.625, "learning_rate": 2.1743158564545873e-06, "loss": 1.669480800628662, "step": 14936 }, { "epoch": 2.7190315827796487, "grad_norm": 7.90625, "learning_rate": 2.173868719977883e-06, "loss": 1.278374195098877, "step": 14938 }, { "epoch": 2.719395649403841, "grad_norm": 6.4375, "learning_rate": 2.173422144970437e-06, "loss": 1.2889635562896729, "step": 14940 }, { "epoch": 2.719759716028033, "grad_norm": 11.3125, "learning_rate": 2.172976131497785e-06, "loss": 1.3702778816223145, "step": 14942 }, { "epoch": 2.7201237826522253, "grad_norm": 8.0, "learning_rate": 2.1725306796253754e-06, "loss": 1.725634217262268, "step": 14944 }, { "epoch": 2.7204878492764175, "grad_norm": 6.875, "learning_rate": 2.172085789418577e-06, "loss": 1.1504065990447998, "step": 14946 }, { "epoch": 2.7208519159006097, "grad_norm": 7.46875, "learning_rate": 2.1716414609426762e-06, "loss": 1.1584641933441162, "step": 14948 }, { "epoch": 2.721215982524802, "grad_norm": 17.5, "learning_rate": 2.1711976942628754e-06, "loss": 1.146274209022522, "step": 14950 }, { "epoch": 2.7215800491489945, "grad_norm": 7.03125, "learning_rate": 2.1707544894442967e-06, "loss": 1.547098994255066, "step": 14952 }, { "epoch": 2.7219441157731863, "grad_norm": 9.9375, "learning_rate": 2.1703118465519785e-06, "loss": 1.1408462524414062, "step": 14954 }, { "epoch": 2.722308182397379, "grad_norm": 18.125, "learning_rate": 2.169869765650876e-06, "loss": 1.8340272903442383, "step": 14956 }, { "epoch": 2.7226722490215707, "grad_norm": 7.0625, "learning_rate": 2.1694282468058634e-06, "loss": 1.5290647745132446, "step": 14958 }, { "epoch": 2.7230363156457633, "grad_norm": 86.5, "learning_rate": 2.1689872900817312e-06, "loss": 1.17153799533844, "step": 14960 }, { "epoch": 2.7234003822699555, "grad_norm": 8.8125, "learning_rate": 2.168546895543189e-06, "loss": 0.8713878393173218, "step": 14962 }, { "epoch": 2.7237644488941477, "grad_norm": 6.03125, "learning_rate": 2.168107063254862e-06, "loss": 1.2060374021530151, "step": 14964 }, { "epoch": 2.72412851551834, "grad_norm": 18.125, "learning_rate": 2.1676677932812945e-06, "loss": 1.2441211938858032, "step": 14966 }, { "epoch": 2.724492582142532, "grad_norm": 18.625, "learning_rate": 2.1672290856869472e-06, "loss": 1.272569179534912, "step": 14968 }, { "epoch": 2.7248566487667243, "grad_norm": 9.1875, "learning_rate": 2.166790940536198e-06, "loss": 1.4582860469818115, "step": 14970 }, { "epoch": 2.7252207153909165, "grad_norm": 25.625, "learning_rate": 2.1663533578933447e-06, "loss": 1.0654188394546509, "step": 14972 }, { "epoch": 2.7255847820151087, "grad_norm": 16.0, "learning_rate": 2.165916337822599e-06, "loss": 1.414611577987671, "step": 14974 }, { "epoch": 2.725948848639301, "grad_norm": 17.875, "learning_rate": 2.165479880388093e-06, "loss": 1.3049029111862183, "step": 14976 }, { "epoch": 2.726312915263493, "grad_norm": 16.125, "learning_rate": 2.165043985653874e-06, "loss": 1.8345887660980225, "step": 14978 }, { "epoch": 2.7266769818876853, "grad_norm": 15.6875, "learning_rate": 2.1646086536839083e-06, "loss": 1.536637544631958, "step": 14980 }, { "epoch": 2.727041048511878, "grad_norm": 13.0625, "learning_rate": 2.16417388454208e-06, "loss": 1.3506499528884888, "step": 14982 }, { "epoch": 2.7274051151360696, "grad_norm": 14.1875, "learning_rate": 2.1637396782921885e-06, "loss": 1.3078205585479736, "step": 14984 }, { "epoch": 2.7277691817602623, "grad_norm": 20.375, "learning_rate": 2.1633060349979524e-06, "loss": 1.5477087497711182, "step": 14986 }, { "epoch": 2.7281332483844545, "grad_norm": 12.75, "learning_rate": 2.1628729547230066e-06, "loss": 1.5537257194519043, "step": 14988 }, { "epoch": 2.7284973150086467, "grad_norm": 15.6875, "learning_rate": 2.162440437530904e-06, "loss": 1.5752886533737183, "step": 14990 }, { "epoch": 2.728861381632839, "grad_norm": 9.5625, "learning_rate": 2.162008483485116e-06, "loss": 1.7336336374282837, "step": 14992 }, { "epoch": 2.729225448257031, "grad_norm": 8.875, "learning_rate": 2.161577092649028e-06, "loss": 1.469306230545044, "step": 14994 }, { "epoch": 2.7295895148812233, "grad_norm": 7.53125, "learning_rate": 2.1611462650859463e-06, "loss": 1.1119840145111084, "step": 14996 }, { "epoch": 2.7299535815054154, "grad_norm": 11.0625, "learning_rate": 2.1607160008590925e-06, "loss": 1.259823203086853, "step": 14998 }, { "epoch": 2.7303176481296076, "grad_norm": 14.75, "learning_rate": 2.160286300031606e-06, "loss": 1.3366429805755615, "step": 15000 }, { "epoch": 2.7306817147538, "grad_norm": 11.25, "learning_rate": 2.1598571626665447e-06, "loss": 1.6711106300354004, "step": 15002 }, { "epoch": 2.731045781377992, "grad_norm": 20.25, "learning_rate": 2.1594285888268816e-06, "loss": 1.5893425941467285, "step": 15004 }, { "epoch": 2.7314098480021842, "grad_norm": 21.875, "learning_rate": 2.1590005785755087e-06, "loss": 0.6128767728805542, "step": 15006 }, { "epoch": 2.731773914626377, "grad_norm": 11.8125, "learning_rate": 2.1585731319752344e-06, "loss": 0.9804597496986389, "step": 15008 }, { "epoch": 2.7321379812505686, "grad_norm": 27.25, "learning_rate": 2.158146249088785e-06, "loss": 1.5301352739334106, "step": 15010 }, { "epoch": 2.7325020478747613, "grad_norm": 10.625, "learning_rate": 2.1577199299788045e-06, "loss": 1.9731130599975586, "step": 15012 }, { "epoch": 2.7328661144989534, "grad_norm": 10.9375, "learning_rate": 2.1572941747078526e-06, "loss": 1.2514795064926147, "step": 15014 }, { "epoch": 2.7332301811231456, "grad_norm": 14.125, "learning_rate": 2.1568689833384077e-06, "loss": 1.0623831748962402, "step": 15016 }, { "epoch": 2.733594247747338, "grad_norm": 15.3125, "learning_rate": 2.1564443559328644e-06, "loss": 1.384691596031189, "step": 15018 }, { "epoch": 2.73395831437153, "grad_norm": 6.15625, "learning_rate": 2.1560202925535344e-06, "loss": 0.8401814699172974, "step": 15020 }, { "epoch": 2.7343223809957222, "grad_norm": 7.65625, "learning_rate": 2.15559679326265e-06, "loss": 1.2028660774230957, "step": 15022 }, { "epoch": 2.7346864476199144, "grad_norm": 18.75, "learning_rate": 2.1551738581223547e-06, "loss": 0.48643553256988525, "step": 15024 }, { "epoch": 2.7350505142441066, "grad_norm": 9.5, "learning_rate": 2.1547514871947147e-06, "loss": 1.4175217151641846, "step": 15026 }, { "epoch": 2.735414580868299, "grad_norm": 21.125, "learning_rate": 2.15432968054171e-06, "loss": 1.3999290466308594, "step": 15028 }, { "epoch": 2.735778647492491, "grad_norm": 13.6875, "learning_rate": 2.1539084382252398e-06, "loss": 1.3097772598266602, "step": 15030 }, { "epoch": 2.736142714116683, "grad_norm": 7.03125, "learning_rate": 2.15348776030712e-06, "loss": 1.4213893413543701, "step": 15032 }, { "epoch": 2.736506780740876, "grad_norm": 18.0, "learning_rate": 2.1530676468490823e-06, "loss": 1.4243406057357788, "step": 15034 }, { "epoch": 2.7368708473650676, "grad_norm": 21.125, "learning_rate": 2.152648097912777e-06, "loss": 1.4513845443725586, "step": 15036 }, { "epoch": 2.73723491398926, "grad_norm": 100.0, "learning_rate": 2.152229113559772e-06, "loss": 1.3756598234176636, "step": 15038 }, { "epoch": 2.7375989806134524, "grad_norm": 11.3125, "learning_rate": 2.1518106938515493e-06, "loss": 1.209734320640564, "step": 15040 }, { "epoch": 2.7379630472376446, "grad_norm": 28.0, "learning_rate": 2.1513928388495142e-06, "loss": 1.4093501567840576, "step": 15042 }, { "epoch": 2.738327113861837, "grad_norm": 60.5, "learning_rate": 2.150975548614982e-06, "loss": 1.954169511795044, "step": 15044 }, { "epoch": 2.738691180486029, "grad_norm": 23.375, "learning_rate": 2.150558823209189e-06, "loss": 1.0156046152114868, "step": 15046 }, { "epoch": 2.739055247110221, "grad_norm": 9.25, "learning_rate": 2.1501426626932888e-06, "loss": 1.1951603889465332, "step": 15048 }, { "epoch": 2.7394193137344134, "grad_norm": 11.125, "learning_rate": 2.14972706712835e-06, "loss": 1.8005973100662231, "step": 15050 }, { "epoch": 2.7397833803586056, "grad_norm": 15.25, "learning_rate": 2.149312036575361e-06, "loss": 1.4487876892089844, "step": 15052 }, { "epoch": 2.7401474469827978, "grad_norm": 19.25, "learning_rate": 2.148897571095225e-06, "loss": 1.4537475109100342, "step": 15054 }, { "epoch": 2.74051151360699, "grad_norm": 24.25, "learning_rate": 2.1484836707487633e-06, "loss": 1.4401055574417114, "step": 15056 }, { "epoch": 2.740875580231182, "grad_norm": 11.375, "learning_rate": 2.1480703355967134e-06, "loss": 1.3841688632965088, "step": 15058 }, { "epoch": 2.741239646855375, "grad_norm": 11.4375, "learning_rate": 2.1476575656997313e-06, "loss": 1.1530070304870605, "step": 15060 }, { "epoch": 2.7416037134795666, "grad_norm": 16.375, "learning_rate": 2.1472453611183903e-06, "loss": 1.2965025901794434, "step": 15062 }, { "epoch": 2.741967780103759, "grad_norm": 16.125, "learning_rate": 2.1468337219131783e-06, "loss": 1.4502832889556885, "step": 15064 }, { "epoch": 2.742331846727951, "grad_norm": 8.9375, "learning_rate": 2.146422648144502e-06, "loss": 1.5623334646224976, "step": 15066 }, { "epoch": 2.7426959133521436, "grad_norm": 16.375, "learning_rate": 2.1460121398726853e-06, "loss": 1.527327537536621, "step": 15068 }, { "epoch": 2.7430599799763358, "grad_norm": 25.125, "learning_rate": 2.145602197157967e-06, "loss": 1.4480284452438354, "step": 15070 }, { "epoch": 2.743424046600528, "grad_norm": 5.1875, "learning_rate": 2.145192820060507e-06, "loss": 0.4914882779121399, "step": 15072 }, { "epoch": 2.74378811322472, "grad_norm": 18.5, "learning_rate": 2.1447840086403783e-06, "loss": 1.370298147201538, "step": 15074 }, { "epoch": 2.7441521798489124, "grad_norm": 15.9375, "learning_rate": 2.144375762957572e-06, "loss": 1.6641881465911865, "step": 15076 }, { "epoch": 2.7445162464731045, "grad_norm": 7.09375, "learning_rate": 2.143968083071998e-06, "loss": 1.306210994720459, "step": 15078 }, { "epoch": 2.7448803130972967, "grad_norm": 17.875, "learning_rate": 2.143560969043479e-06, "loss": 1.4841179847717285, "step": 15080 }, { "epoch": 2.745244379721489, "grad_norm": 12.0, "learning_rate": 2.1431544209317603e-06, "loss": 1.9717035293579102, "step": 15082 }, { "epoch": 2.745608446345681, "grad_norm": 21.625, "learning_rate": 2.1427484387964994e-06, "loss": 1.2555760145187378, "step": 15084 }, { "epoch": 2.7459725129698733, "grad_norm": 8.4375, "learning_rate": 2.1423430226972735e-06, "loss": 1.5314890146255493, "step": 15086 }, { "epoch": 2.7463365795940655, "grad_norm": 8.625, "learning_rate": 2.141938172693575e-06, "loss": 1.3276894092559814, "step": 15088 }, { "epoch": 2.746700646218258, "grad_norm": 21.75, "learning_rate": 2.141533888844814e-06, "loss": 1.4084265232086182, "step": 15090 }, { "epoch": 2.74706471284245, "grad_norm": 16.0, "learning_rate": 2.1411301712103183e-06, "loss": 1.401522159576416, "step": 15092 }, { "epoch": 2.7474287794666425, "grad_norm": 13.75, "learning_rate": 2.1407270198493313e-06, "loss": 1.3489482402801514, "step": 15094 }, { "epoch": 2.7477928460908347, "grad_norm": 6.78125, "learning_rate": 2.1403244348210138e-06, "loss": 1.3110331296920776, "step": 15096 }, { "epoch": 2.748156912715027, "grad_norm": 4.375, "learning_rate": 2.1399224161844436e-06, "loss": 0.9076242446899414, "step": 15098 }, { "epoch": 2.748520979339219, "grad_norm": 26.5, "learning_rate": 2.139520963998615e-06, "loss": 1.1531896591186523, "step": 15100 }, { "epoch": 2.7488850459634113, "grad_norm": 6.03125, "learning_rate": 2.1391200783224402e-06, "loss": 0.8014776706695557, "step": 15102 }, { "epoch": 2.7492491125876035, "grad_norm": 12.0, "learning_rate": 2.1387197592147467e-06, "loss": 1.4162440299987793, "step": 15104 }, { "epoch": 2.7496131792117957, "grad_norm": 116.5, "learning_rate": 2.13832000673428e-06, "loss": 1.578439474105835, "step": 15106 }, { "epoch": 2.749977245835988, "grad_norm": 11.625, "learning_rate": 2.137920820939703e-06, "loss": 1.4433608055114746, "step": 15108 }, { "epoch": 2.75034131246018, "grad_norm": 10.0625, "learning_rate": 2.1375222018895932e-06, "loss": 1.4136310815811157, "step": 15110 }, { "epoch": 2.7507053790843723, "grad_norm": 15.0625, "learning_rate": 2.137124149642448e-06, "loss": 1.2016401290893555, "step": 15112 }, { "epoch": 2.7510694457085645, "grad_norm": 33.25, "learning_rate": 2.1367266642566785e-06, "loss": 1.7396986484527588, "step": 15114 }, { "epoch": 2.751433512332757, "grad_norm": 24.125, "learning_rate": 2.136329745790614e-06, "loss": 1.8357279300689697, "step": 15116 }, { "epoch": 2.751797578956949, "grad_norm": 8.0625, "learning_rate": 2.1359333943025017e-06, "loss": 1.2419112920761108, "step": 15118 }, { "epoch": 2.7521616455811415, "grad_norm": 7.5625, "learning_rate": 2.1355376098505033e-06, "loss": 0.870661735534668, "step": 15120 }, { "epoch": 2.7525257122053337, "grad_norm": 21.0, "learning_rate": 2.1351423924927006e-06, "loss": 1.0458451509475708, "step": 15122 }, { "epoch": 2.752889778829526, "grad_norm": 18.25, "learning_rate": 2.1347477422870885e-06, "loss": 1.3755351305007935, "step": 15124 }, { "epoch": 2.753253845453718, "grad_norm": 12.4375, "learning_rate": 2.1343536592915805e-06, "loss": 1.3855971097946167, "step": 15126 }, { "epoch": 2.7536179120779103, "grad_norm": 3.921875, "learning_rate": 2.133960143564007e-06, "loss": 1.1703510284423828, "step": 15128 }, { "epoch": 2.7539819787021025, "grad_norm": 14.625, "learning_rate": 2.133567195162114e-06, "loss": 1.7142263650894165, "step": 15130 }, { "epoch": 2.7543460453262947, "grad_norm": 10.75, "learning_rate": 2.1331748141435675e-06, "loss": 1.4029278755187988, "step": 15132 }, { "epoch": 2.754710111950487, "grad_norm": 11.625, "learning_rate": 2.1327830005659454e-06, "loss": 1.249886393547058, "step": 15134 }, { "epoch": 2.755074178574679, "grad_norm": 18.0, "learning_rate": 2.132391754486745e-06, "loss": 1.6176830530166626, "step": 15136 }, { "epoch": 2.7554382451988713, "grad_norm": 7.78125, "learning_rate": 2.1320010759633812e-06, "loss": 0.8771883845329285, "step": 15138 }, { "epoch": 2.7558023118230635, "grad_norm": 23.125, "learning_rate": 2.1316109650531826e-06, "loss": 1.2134993076324463, "step": 15140 }, { "epoch": 2.756166378447256, "grad_norm": 7.53125, "learning_rate": 2.131221421813399e-06, "loss": 1.7754442691802979, "step": 15142 }, { "epoch": 2.756530445071448, "grad_norm": 9.4375, "learning_rate": 2.1308324463011932e-06, "loss": 1.2062547206878662, "step": 15144 }, { "epoch": 2.7568945116956405, "grad_norm": 5.9375, "learning_rate": 2.130444038573645e-06, "loss": 1.3467555046081543, "step": 15146 }, { "epoch": 2.7572585783198327, "grad_norm": 10.9375, "learning_rate": 2.1300561986877517e-06, "loss": 1.3722015619277954, "step": 15148 }, { "epoch": 2.757622644944025, "grad_norm": 12.5625, "learning_rate": 2.129668926700428e-06, "loss": 1.748478651046753, "step": 15150 }, { "epoch": 2.757986711568217, "grad_norm": 11.25, "learning_rate": 2.129282222668505e-06, "loss": 1.450814962387085, "step": 15152 }, { "epoch": 2.7583507781924093, "grad_norm": 16.625, "learning_rate": 2.128896086648728e-06, "loss": 1.5398958921432495, "step": 15154 }, { "epoch": 2.7587148448166015, "grad_norm": 14.75, "learning_rate": 2.1285105186977627e-06, "loss": 1.3081355094909668, "step": 15156 }, { "epoch": 2.7590789114407936, "grad_norm": 7.90625, "learning_rate": 2.1281255188721894e-06, "loss": 1.2844856977462769, "step": 15158 }, { "epoch": 2.759442978064986, "grad_norm": 9.0, "learning_rate": 2.1277410872285037e-06, "loss": 1.192986011505127, "step": 15160 }, { "epoch": 2.759807044689178, "grad_norm": 10.625, "learning_rate": 2.1273572238231217e-06, "loss": 1.519608736038208, "step": 15162 }, { "epoch": 2.7601711113133702, "grad_norm": 10.6875, "learning_rate": 2.126973928712372e-06, "loss": 1.3468029499053955, "step": 15164 }, { "epoch": 2.7605351779375624, "grad_norm": 11.125, "learning_rate": 2.126591201952503e-06, "loss": 1.2082629203796387, "step": 15166 }, { "epoch": 2.760899244561755, "grad_norm": 13.125, "learning_rate": 2.126209043599677e-06, "loss": 1.1842734813690186, "step": 15168 }, { "epoch": 2.761263311185947, "grad_norm": 13.125, "learning_rate": 2.125827453709974e-06, "loss": 1.920096755027771, "step": 15170 }, { "epoch": 2.7616273778101394, "grad_norm": 23.375, "learning_rate": 2.125446432339393e-06, "loss": 1.5306665897369385, "step": 15172 }, { "epoch": 2.7619914444343316, "grad_norm": 14.75, "learning_rate": 2.1250659795438453e-06, "loss": 1.4640711545944214, "step": 15174 }, { "epoch": 2.762355511058524, "grad_norm": 20.5, "learning_rate": 2.124686095379161e-06, "loss": 1.20827317237854, "step": 15176 }, { "epoch": 2.762719577682716, "grad_norm": 7.40625, "learning_rate": 2.1243067799010875e-06, "loss": 1.2888175249099731, "step": 15178 }, { "epoch": 2.7630836443069082, "grad_norm": 8.0, "learning_rate": 2.1239280331652862e-06, "loss": 1.035913348197937, "step": 15180 }, { "epoch": 2.7634477109311004, "grad_norm": 12.8125, "learning_rate": 2.123549855227339e-06, "loss": 1.4175491333007812, "step": 15182 }, { "epoch": 2.7638117775552926, "grad_norm": 12.25, "learning_rate": 2.12317224614274e-06, "loss": 1.025011658668518, "step": 15184 }, { "epoch": 2.764175844179485, "grad_norm": 18.75, "learning_rate": 2.1227952059669026e-06, "loss": 1.5971038341522217, "step": 15186 }, { "epoch": 2.764539910803677, "grad_norm": 13.0, "learning_rate": 2.122418734755156e-06, "loss": 1.4588137865066528, "step": 15188 }, { "epoch": 2.764903977427869, "grad_norm": 20.125, "learning_rate": 2.1220428325627447e-06, "loss": 1.587701439857483, "step": 15190 }, { "epoch": 2.7652680440520614, "grad_norm": 12.75, "learning_rate": 2.121667499444833e-06, "loss": 1.4294147491455078, "step": 15192 }, { "epoch": 2.765632110676254, "grad_norm": 10.8125, "learning_rate": 2.1212927354564973e-06, "loss": 1.398268699645996, "step": 15194 }, { "epoch": 2.765996177300446, "grad_norm": 11.0, "learning_rate": 2.120918540652734e-06, "loss": 1.4866583347320557, "step": 15196 }, { "epoch": 2.7663602439246384, "grad_norm": 19.875, "learning_rate": 2.1205449150884542e-06, "loss": 1.1561260223388672, "step": 15198 }, { "epoch": 2.76672431054883, "grad_norm": 12.125, "learning_rate": 2.120171858818486e-06, "loss": 1.3582969903945923, "step": 15200 }, { "epoch": 2.767088377173023, "grad_norm": 11.875, "learning_rate": 2.119799371897574e-06, "loss": 1.1560012102127075, "step": 15202 }, { "epoch": 2.767452443797215, "grad_norm": 8.9375, "learning_rate": 2.11942745438038e-06, "loss": 1.0556871891021729, "step": 15204 }, { "epoch": 2.767816510421407, "grad_norm": 6.53125, "learning_rate": 2.1190561063214795e-06, "loss": 1.2058480978012085, "step": 15206 }, { "epoch": 2.7681805770455994, "grad_norm": 12.4375, "learning_rate": 2.118685327775367e-06, "loss": 1.141218900680542, "step": 15208 }, { "epoch": 2.7685446436697916, "grad_norm": 9.875, "learning_rate": 2.1183151187964533e-06, "loss": 1.6277103424072266, "step": 15210 }, { "epoch": 2.7689087102939838, "grad_norm": 27.875, "learning_rate": 2.117945479439066e-06, "loss": 1.9199076890945435, "step": 15212 }, { "epoch": 2.769272776918176, "grad_norm": 17.0, "learning_rate": 2.117576409757446e-06, "loss": 1.3194208145141602, "step": 15214 }, { "epoch": 2.769636843542368, "grad_norm": 17.75, "learning_rate": 2.1172079098057537e-06, "loss": 1.5167080163955688, "step": 15216 }, { "epoch": 2.7700009101665604, "grad_norm": 16.5, "learning_rate": 2.116839979638065e-06, "loss": 1.3913594484329224, "step": 15218 }, { "epoch": 2.7703649767907526, "grad_norm": 11.0, "learning_rate": 2.116472619308372e-06, "loss": 1.0446672439575195, "step": 15220 }, { "epoch": 2.7707290434149447, "grad_norm": 10.1875, "learning_rate": 2.1161058288705846e-06, "loss": 0.47014302015304565, "step": 15222 }, { "epoch": 2.7710931100391374, "grad_norm": 27.125, "learning_rate": 2.1157396083785263e-06, "loss": 1.5051355361938477, "step": 15224 }, { "epoch": 2.771457176663329, "grad_norm": 11.3125, "learning_rate": 2.1153739578859384e-06, "loss": 1.4453892707824707, "step": 15226 }, { "epoch": 2.7718212432875218, "grad_norm": 5.90625, "learning_rate": 2.1150088774464795e-06, "loss": 1.5794672966003418, "step": 15228 }, { "epoch": 2.772185309911714, "grad_norm": 7.9375, "learning_rate": 2.114644367113723e-06, "loss": 1.2086212635040283, "step": 15230 }, { "epoch": 2.772549376535906, "grad_norm": 14.625, "learning_rate": 2.114280426941161e-06, "loss": 1.2649345397949219, "step": 15232 }, { "epoch": 2.7729134431600984, "grad_norm": 7.96875, "learning_rate": 2.1139170569821976e-06, "loss": 1.2124547958374023, "step": 15234 }, { "epoch": 2.7732775097842906, "grad_norm": 15.0625, "learning_rate": 2.113554257290158e-06, "loss": 1.4067473411560059, "step": 15236 }, { "epoch": 2.7736415764084827, "grad_norm": 13.1875, "learning_rate": 2.1131920279182798e-06, "loss": 1.6940354108810425, "step": 15238 }, { "epoch": 2.774005643032675, "grad_norm": 11.0, "learning_rate": 2.1128303689197198e-06, "loss": 1.4393317699432373, "step": 15240 }, { "epoch": 2.774369709656867, "grad_norm": 23.0, "learning_rate": 2.112469280347551e-06, "loss": 1.5619231462478638, "step": 15242 }, { "epoch": 2.7747337762810593, "grad_norm": 8.5, "learning_rate": 2.1121087622547594e-06, "loss": 1.5718462467193604, "step": 15244 }, { "epoch": 2.7750978429052515, "grad_norm": 18.125, "learning_rate": 2.111748814694251e-06, "loss": 1.1862127780914307, "step": 15246 }, { "epoch": 2.7754619095294437, "grad_norm": 12.875, "learning_rate": 2.1113894377188463e-06, "loss": 1.3076564073562622, "step": 15248 }, { "epoch": 2.7758259761536364, "grad_norm": 15.125, "learning_rate": 2.111030631381282e-06, "loss": 1.5666275024414062, "step": 15250 }, { "epoch": 2.776190042777828, "grad_norm": 39.75, "learning_rate": 2.1106723957342127e-06, "loss": 1.8964344263076782, "step": 15252 }, { "epoch": 2.7765541094020207, "grad_norm": 10.0625, "learning_rate": 2.1103147308302073e-06, "loss": 1.3087196350097656, "step": 15254 }, { "epoch": 2.776918176026213, "grad_norm": 9.5625, "learning_rate": 2.109957636721751e-06, "loss": 1.155623435974121, "step": 15256 }, { "epoch": 2.777282242650405, "grad_norm": 7.4375, "learning_rate": 2.109601113461247e-06, "loss": 1.3293956518173218, "step": 15258 }, { "epoch": 2.7776463092745973, "grad_norm": 14.75, "learning_rate": 2.1092451611010124e-06, "loss": 1.4465820789337158, "step": 15260 }, { "epoch": 2.7780103758987895, "grad_norm": 18.75, "learning_rate": 2.108889779693284e-06, "loss": 1.1767781972885132, "step": 15262 }, { "epoch": 2.7783744425229817, "grad_norm": 18.25, "learning_rate": 2.1085349692902103e-06, "loss": 0.9152565002441406, "step": 15264 }, { "epoch": 2.778738509147174, "grad_norm": 9.625, "learning_rate": 2.108180729943859e-06, "loss": 1.3390154838562012, "step": 15266 }, { "epoch": 2.779102575771366, "grad_norm": 5.90625, "learning_rate": 2.1078270617062135e-06, "loss": 1.2630285024642944, "step": 15268 }, { "epoch": 2.7794666423955583, "grad_norm": 14.1875, "learning_rate": 2.1074739646291733e-06, "loss": 1.1905521154403687, "step": 15270 }, { "epoch": 2.7798307090197505, "grad_norm": 18.5, "learning_rate": 2.1071214387645537e-06, "loss": 1.6768046617507935, "step": 15272 }, { "epoch": 2.7801947756439427, "grad_norm": 7.5625, "learning_rate": 2.106769484164086e-06, "loss": 1.272176742553711, "step": 15274 }, { "epoch": 2.7805588422681353, "grad_norm": 5.625, "learning_rate": 2.1064181008794195e-06, "loss": 1.0415253639221191, "step": 15276 }, { "epoch": 2.780922908892327, "grad_norm": 5.6875, "learning_rate": 2.106067288962117e-06, "loss": 1.1963510513305664, "step": 15278 }, { "epoch": 2.7812869755165197, "grad_norm": 9.5, "learning_rate": 2.1057170484636587e-06, "loss": 1.2466492652893066, "step": 15280 }, { "epoch": 2.781651042140712, "grad_norm": 64.5, "learning_rate": 2.1053673794354424e-06, "loss": 1.7249022722244263, "step": 15282 }, { "epoch": 2.782015108764904, "grad_norm": 6.21875, "learning_rate": 2.1050182819287787e-06, "loss": 1.2817299365997314, "step": 15284 }, { "epoch": 2.7823791753890963, "grad_norm": 48.5, "learning_rate": 2.1046697559948974e-06, "loss": 1.3179740905761719, "step": 15286 }, { "epoch": 2.7827432420132885, "grad_norm": 11.1875, "learning_rate": 2.104321801684943e-06, "loss": 1.5478861331939697, "step": 15288 }, { "epoch": 2.7831073086374807, "grad_norm": 10.6875, "learning_rate": 2.103974419049976e-06, "loss": 1.339264988899231, "step": 15290 }, { "epoch": 2.783471375261673, "grad_norm": 14.4375, "learning_rate": 2.1036276081409745e-06, "loss": 1.4556256532669067, "step": 15292 }, { "epoch": 2.783835441885865, "grad_norm": 25.125, "learning_rate": 2.1032813690088307e-06, "loss": 1.416735053062439, "step": 15294 }, { "epoch": 2.7841995085100573, "grad_norm": 110.0, "learning_rate": 2.102935701704354e-06, "loss": 1.2232414484024048, "step": 15296 }, { "epoch": 2.7845635751342495, "grad_norm": 23.125, "learning_rate": 2.1025906062782694e-06, "loss": 1.3057293891906738, "step": 15298 }, { "epoch": 2.7849276417584417, "grad_norm": 8.125, "learning_rate": 2.1022460827812185e-06, "loss": 1.1561237573623657, "step": 15300 }, { "epoch": 2.7852917083826343, "grad_norm": 27.75, "learning_rate": 2.1019021312637592e-06, "loss": 1.3912616968154907, "step": 15302 }, { "epoch": 2.785655775006826, "grad_norm": 3.59375, "learning_rate": 2.1015587517763645e-06, "loss": 1.1076183319091797, "step": 15304 }, { "epoch": 2.7860198416310187, "grad_norm": 12.5625, "learning_rate": 2.1012159443694234e-06, "loss": 1.2979843616485596, "step": 15306 }, { "epoch": 2.7863839082552104, "grad_norm": 12.5, "learning_rate": 2.1008737090932426e-06, "loss": 1.3886433839797974, "step": 15308 }, { "epoch": 2.786747974879403, "grad_norm": 14.5625, "learning_rate": 2.1005320459980425e-06, "loss": 1.5989450216293335, "step": 15310 }, { "epoch": 2.7871120415035953, "grad_norm": 13.5625, "learning_rate": 2.1001909551339626e-06, "loss": 1.7310841083526611, "step": 15312 }, { "epoch": 2.7874761081277875, "grad_norm": 5.15625, "learning_rate": 2.099850436551055e-06, "loss": 0.9716083407402039, "step": 15314 }, { "epoch": 2.7878401747519796, "grad_norm": 25.25, "learning_rate": 2.0995104902992895e-06, "loss": 1.184140920639038, "step": 15316 }, { "epoch": 2.788204241376172, "grad_norm": 11.5625, "learning_rate": 2.0991711164285525e-06, "loss": 1.1790357828140259, "step": 15318 }, { "epoch": 2.788568308000364, "grad_norm": 33.5, "learning_rate": 2.098832314988645e-06, "loss": 0.8578373193740845, "step": 15320 }, { "epoch": 2.7889323746245562, "grad_norm": 5.90625, "learning_rate": 2.0984940860292864e-06, "loss": 0.9887957572937012, "step": 15322 }, { "epoch": 2.7892964412487484, "grad_norm": 9.75, "learning_rate": 2.098156429600108e-06, "loss": 1.2182203531265259, "step": 15324 }, { "epoch": 2.7896605078729406, "grad_norm": 6.8125, "learning_rate": 2.0978193457506616e-06, "loss": 1.5072941780090332, "step": 15326 }, { "epoch": 2.790024574497133, "grad_norm": 7.375, "learning_rate": 2.097482834530412e-06, "loss": 0.9449323415756226, "step": 15328 }, { "epoch": 2.790388641121325, "grad_norm": 26.0, "learning_rate": 2.0971468959887405e-06, "loss": 1.5134354829788208, "step": 15330 }, { "epoch": 2.7907527077455176, "grad_norm": 110.5, "learning_rate": 2.0968115301749454e-06, "loss": 1.2343155145645142, "step": 15332 }, { "epoch": 2.7911167743697094, "grad_norm": 20.0, "learning_rate": 2.09647673713824e-06, "loss": 0.9876255989074707, "step": 15334 }, { "epoch": 2.791480840993902, "grad_norm": 24.25, "learning_rate": 2.0961425169277537e-06, "loss": 1.7146257162094116, "step": 15336 }, { "epoch": 2.7918449076180942, "grad_norm": 5.09375, "learning_rate": 2.0958088695925324e-06, "loss": 0.9566456079483032, "step": 15338 }, { "epoch": 2.7922089742422864, "grad_norm": 14.375, "learning_rate": 2.095475795181536e-06, "loss": 1.476578712463379, "step": 15340 }, { "epoch": 2.7925730408664786, "grad_norm": 6.625, "learning_rate": 2.095143293743645e-06, "loss": 1.4261420965194702, "step": 15342 }, { "epoch": 2.792937107490671, "grad_norm": 8.4375, "learning_rate": 2.0948113653276496e-06, "loss": 1.3135368824005127, "step": 15344 }, { "epoch": 2.793301174114863, "grad_norm": 7.25, "learning_rate": 2.0944800099822603e-06, "loss": 1.449081540107727, "step": 15346 }, { "epoch": 2.793665240739055, "grad_norm": 8.0625, "learning_rate": 2.0941492277561014e-06, "loss": 1.4387869834899902, "step": 15348 }, { "epoch": 2.7940293073632474, "grad_norm": 7.34375, "learning_rate": 2.0938190186977137e-06, "loss": 1.2676608562469482, "step": 15350 }, { "epoch": 2.7943933739874396, "grad_norm": 10.625, "learning_rate": 2.093489382855556e-06, "loss": 1.2819623947143555, "step": 15352 }, { "epoch": 2.794757440611632, "grad_norm": 19.875, "learning_rate": 2.0931603202779994e-06, "loss": 1.3368175029754639, "step": 15354 }, { "epoch": 2.795121507235824, "grad_norm": 5.71875, "learning_rate": 2.0928318310133324e-06, "loss": 1.376208782196045, "step": 15356 }, { "epoch": 2.7954855738600166, "grad_norm": 15.125, "learning_rate": 2.0925039151097596e-06, "loss": 1.300868272781372, "step": 15358 }, { "epoch": 2.7958496404842084, "grad_norm": 7.5, "learning_rate": 2.0921765726154014e-06, "loss": 1.3364508152008057, "step": 15360 }, { "epoch": 2.796213707108401, "grad_norm": 4.65625, "learning_rate": 2.0918498035782948e-06, "loss": 1.2598626613616943, "step": 15362 }, { "epoch": 2.796577773732593, "grad_norm": 9.1875, "learning_rate": 2.0915236080463906e-06, "loss": 1.4418838024139404, "step": 15364 }, { "epoch": 2.7969418403567854, "grad_norm": 8.8125, "learning_rate": 2.091197986067558e-06, "loss": 1.2414124011993408, "step": 15366 }, { "epoch": 2.7973059069809776, "grad_norm": 9.875, "learning_rate": 2.09087293768958e-06, "loss": 1.4556466341018677, "step": 15368 }, { "epoch": 2.79766997360517, "grad_norm": 10.5, "learning_rate": 2.090548462960155e-06, "loss": 1.3624932765960693, "step": 15370 }, { "epoch": 2.798034040229362, "grad_norm": 10.5, "learning_rate": 2.0902245619269005e-06, "loss": 1.255264163017273, "step": 15372 }, { "epoch": 2.798398106853554, "grad_norm": 11.375, "learning_rate": 2.089901234637346e-06, "loss": 1.3311280012130737, "step": 15374 }, { "epoch": 2.7987621734777464, "grad_norm": 96.5, "learning_rate": 2.0895784811389393e-06, "loss": 1.5165164470672607, "step": 15376 }, { "epoch": 2.7991262401019386, "grad_norm": 9.5625, "learning_rate": 2.0892563014790427e-06, "loss": 1.1123021841049194, "step": 15378 }, { "epoch": 2.7994903067261308, "grad_norm": 14.5, "learning_rate": 2.088934695704935e-06, "loss": 0.6490347385406494, "step": 15380 }, { "epoch": 2.799854373350323, "grad_norm": 29.875, "learning_rate": 2.088613663863811e-06, "loss": 1.233338475227356, "step": 15382 }, { "epoch": 2.8002184399745156, "grad_norm": 7.96875, "learning_rate": 2.08829320600278e-06, "loss": 1.4194599390029907, "step": 15384 }, { "epoch": 2.8005825065987073, "grad_norm": 6.78125, "learning_rate": 2.0879733221688685e-06, "loss": 1.238454818725586, "step": 15386 }, { "epoch": 2.8009465732229, "grad_norm": 18.5, "learning_rate": 2.087654012409018e-06, "loss": 1.2255334854125977, "step": 15388 }, { "epoch": 2.801310639847092, "grad_norm": 17.0, "learning_rate": 2.087335276770085e-06, "loss": 1.4678912162780762, "step": 15390 }, { "epoch": 2.8016747064712844, "grad_norm": 14.1875, "learning_rate": 2.0870171152988443e-06, "loss": 1.2598161697387695, "step": 15392 }, { "epoch": 2.8020387730954766, "grad_norm": 112.0, "learning_rate": 2.0866995280419843e-06, "loss": 1.2336199283599854, "step": 15394 }, { "epoch": 2.8024028397196687, "grad_norm": 14.1875, "learning_rate": 2.086382515046108e-06, "loss": 0.8416131734848022, "step": 15396 }, { "epoch": 2.802766906343861, "grad_norm": 8.4375, "learning_rate": 2.0860660763577384e-06, "loss": 1.6554086208343506, "step": 15398 }, { "epoch": 2.803130972968053, "grad_norm": 12.625, "learning_rate": 2.0857502120233093e-06, "loss": 1.0026733875274658, "step": 15400 }, { "epoch": 2.8034950395922453, "grad_norm": 2.609375, "learning_rate": 2.0854349220891746e-06, "loss": 0.6647732257843018, "step": 15402 }, { "epoch": 2.8038591062164375, "grad_norm": 6.5, "learning_rate": 2.0851202066016e-06, "loss": 1.1833223104476929, "step": 15404 }, { "epoch": 2.8042231728406297, "grad_norm": 8.625, "learning_rate": 2.08480606560677e-06, "loss": 1.1770908832550049, "step": 15406 }, { "epoch": 2.804587239464822, "grad_norm": 2.34375, "learning_rate": 2.084492499150782e-06, "loss": 1.1623116731643677, "step": 15408 }, { "epoch": 2.8049513060890146, "grad_norm": 11.875, "learning_rate": 2.0841795072796524e-06, "loss": 1.1141105890274048, "step": 15410 }, { "epoch": 2.8053153727132063, "grad_norm": 7.21875, "learning_rate": 2.0838670900393107e-06, "loss": 1.430346965789795, "step": 15412 }, { "epoch": 2.805679439337399, "grad_norm": 14.0625, "learning_rate": 2.083555247475603e-06, "loss": 1.2860307693481445, "step": 15414 }, { "epoch": 2.806043505961591, "grad_norm": 5.125, "learning_rate": 2.0832439796342902e-06, "loss": 1.220209002494812, "step": 15416 }, { "epoch": 2.8064075725857833, "grad_norm": 26.75, "learning_rate": 2.0829332865610503e-06, "loss": 1.7848587036132812, "step": 15418 }, { "epoch": 2.8067716392099755, "grad_norm": 16.375, "learning_rate": 2.082623168301476e-06, "loss": 1.2602730989456177, "step": 15420 }, { "epoch": 2.8071357058341677, "grad_norm": 6.65625, "learning_rate": 2.082313624901077e-06, "loss": 1.1987271308898926, "step": 15422 }, { "epoch": 2.80749977245836, "grad_norm": 9.625, "learning_rate": 2.0820046564052753e-06, "loss": 1.3231608867645264, "step": 15424 }, { "epoch": 2.807863839082552, "grad_norm": 7.28125, "learning_rate": 2.0816962628594124e-06, "loss": 1.2748762369155884, "step": 15426 }, { "epoch": 2.8082279057067443, "grad_norm": 3.828125, "learning_rate": 2.0813884443087436e-06, "loss": 1.0443068742752075, "step": 15428 }, { "epoch": 2.8085919723309365, "grad_norm": 16.375, "learning_rate": 2.0810812007984394e-06, "loss": 1.5760655403137207, "step": 15430 }, { "epoch": 2.8089560389551287, "grad_norm": 27.25, "learning_rate": 2.0807745323735877e-06, "loss": 1.6567323207855225, "step": 15432 }, { "epoch": 2.809320105579321, "grad_norm": 27.625, "learning_rate": 2.0804684390791897e-06, "loss": 1.2157567739486694, "step": 15434 }, { "epoch": 2.8096841722035135, "grad_norm": 6.25, "learning_rate": 2.080162920960164e-06, "loss": 1.5075218677520752, "step": 15436 }, { "epoch": 2.8100482388277053, "grad_norm": 4.0625, "learning_rate": 2.079857978061344e-06, "loss": 0.9266378879547119, "step": 15438 }, { "epoch": 2.810412305451898, "grad_norm": 40.0, "learning_rate": 2.079553610427478e-06, "loss": 1.0016363859176636, "step": 15440 }, { "epoch": 2.8107763720760897, "grad_norm": 13.9375, "learning_rate": 2.0792498181032326e-06, "loss": 0.7211898565292358, "step": 15442 }, { "epoch": 2.8111404387002823, "grad_norm": 12.5625, "learning_rate": 2.0789466011331863e-06, "loss": 0.9842705726623535, "step": 15444 }, { "epoch": 2.8115045053244745, "grad_norm": 21.25, "learning_rate": 2.078643959561836e-06, "loss": 1.2994048595428467, "step": 15446 }, { "epoch": 2.8118685719486667, "grad_norm": 19.25, "learning_rate": 2.0783418934335922e-06, "loss": 1.939651608467102, "step": 15448 }, { "epoch": 2.812232638572859, "grad_norm": 19.25, "learning_rate": 2.0780404027927827e-06, "loss": 1.9610989093780518, "step": 15450 }, { "epoch": 2.812596705197051, "grad_norm": 16.75, "learning_rate": 2.0777394876836503e-06, "loss": 1.9850043058395386, "step": 15452 }, { "epoch": 2.8129607718212433, "grad_norm": 7.46875, "learning_rate": 2.077439148150352e-06, "loss": 1.4576671123504639, "step": 15454 }, { "epoch": 2.8133248384454355, "grad_norm": 6.71875, "learning_rate": 2.0771393842369627e-06, "loss": 1.4522960186004639, "step": 15456 }, { "epoch": 2.8136889050696277, "grad_norm": 9.1875, "learning_rate": 2.0768401959874697e-06, "loss": 1.3378958702087402, "step": 15458 }, { "epoch": 2.81405297169382, "grad_norm": 11.5, "learning_rate": 2.0765415834457787e-06, "loss": 1.4418935775756836, "step": 15460 }, { "epoch": 2.814417038318012, "grad_norm": 6.875, "learning_rate": 2.076243546655711e-06, "loss": 1.4748448133468628, "step": 15462 }, { "epoch": 2.8147811049422042, "grad_norm": 14.3125, "learning_rate": 2.075946085661001e-06, "loss": 1.0807702541351318, "step": 15464 }, { "epoch": 2.815145171566397, "grad_norm": 11.4375, "learning_rate": 2.0756492005053e-06, "loss": 1.3025349378585815, "step": 15466 }, { "epoch": 2.8155092381905886, "grad_norm": 7.46875, "learning_rate": 2.0753528912321747e-06, "loss": 1.8363618850708008, "step": 15468 }, { "epoch": 2.8158733048147813, "grad_norm": 10.5625, "learning_rate": 2.075057157885107e-06, "loss": 1.331172227859497, "step": 15470 }, { "epoch": 2.8162373714389735, "grad_norm": 11.25, "learning_rate": 2.074762000507496e-06, "loss": 1.6024922132492065, "step": 15472 }, { "epoch": 2.8166014380631657, "grad_norm": 10.25, "learning_rate": 2.074467419142653e-06, "loss": 1.291154384613037, "step": 15474 }, { "epoch": 2.816965504687358, "grad_norm": 20.25, "learning_rate": 2.074173413833808e-06, "loss": 1.0730677843093872, "step": 15476 }, { "epoch": 2.81732957131155, "grad_norm": 17.75, "learning_rate": 2.0738799846241036e-06, "loss": 0.6087172031402588, "step": 15478 }, { "epoch": 2.8176936379357422, "grad_norm": 17.75, "learning_rate": 2.073587131556601e-06, "loss": 1.4700336456298828, "step": 15480 }, { "epoch": 2.8180577045599344, "grad_norm": 165.0, "learning_rate": 2.0732948546742745e-06, "loss": 1.5363982915878296, "step": 15482 }, { "epoch": 2.8184217711841266, "grad_norm": 12.8125, "learning_rate": 2.0730031540200142e-06, "loss": 1.5605051517486572, "step": 15484 }, { "epoch": 2.818785837808319, "grad_norm": 10.8125, "learning_rate": 2.072712029636627e-06, "loss": 1.6522586345672607, "step": 15486 }, { "epoch": 2.819149904432511, "grad_norm": 15.6875, "learning_rate": 2.072421481566833e-06, "loss": 1.505131721496582, "step": 15488 }, { "epoch": 2.819513971056703, "grad_norm": 8.375, "learning_rate": 2.072131509853269e-06, "loss": 1.3004802465438843, "step": 15490 }, { "epoch": 2.819878037680896, "grad_norm": 23.75, "learning_rate": 2.0718421145384884e-06, "loss": 1.040716290473938, "step": 15492 }, { "epoch": 2.8202421043050876, "grad_norm": 11.0625, "learning_rate": 2.0715532956649584e-06, "loss": 1.477367877960205, "step": 15494 }, { "epoch": 2.8206061709292802, "grad_norm": 19.125, "learning_rate": 2.071265053275061e-06, "loss": 1.567819595336914, "step": 15496 }, { "epoch": 2.8209702375534724, "grad_norm": 14.1875, "learning_rate": 2.0709773874110956e-06, "loss": 0.4847180247306824, "step": 15498 }, { "epoch": 2.8213343041776646, "grad_norm": 11.6875, "learning_rate": 2.070690298115275e-06, "loss": 1.4409312009811401, "step": 15500 }, { "epoch": 2.821698370801857, "grad_norm": 81.5, "learning_rate": 2.07040378542973e-06, "loss": 1.2525432109832764, "step": 15502 }, { "epoch": 2.822062437426049, "grad_norm": 14.6875, "learning_rate": 2.070117849396504e-06, "loss": 1.3137834072113037, "step": 15504 }, { "epoch": 2.822426504050241, "grad_norm": 15.0625, "learning_rate": 2.0698324900575563e-06, "loss": 1.3841185569763184, "step": 15506 }, { "epoch": 2.8227905706744334, "grad_norm": 19.875, "learning_rate": 2.069547707454764e-06, "loss": 1.2313940525054932, "step": 15508 }, { "epoch": 2.8231546372986256, "grad_norm": 8.6875, "learning_rate": 2.0692635016299163e-06, "loss": 0.4161737263202667, "step": 15510 }, { "epoch": 2.823518703922818, "grad_norm": 6.375, "learning_rate": 2.0689798726247205e-06, "loss": 1.2001042366027832, "step": 15512 }, { "epoch": 2.82388277054701, "grad_norm": 41.5, "learning_rate": 2.0686968204807968e-06, "loss": 1.2105071544647217, "step": 15514 }, { "epoch": 2.824246837171202, "grad_norm": 13.75, "learning_rate": 2.068414345239683e-06, "loss": 1.5903499126434326, "step": 15516 }, { "epoch": 2.824610903795395, "grad_norm": 16.125, "learning_rate": 2.068132446942831e-06, "loss": 1.4579613208770752, "step": 15518 }, { "epoch": 2.8249749704195866, "grad_norm": 8.125, "learning_rate": 2.067851125631607e-06, "loss": 1.1417038440704346, "step": 15520 }, { "epoch": 2.825339037043779, "grad_norm": 8.8125, "learning_rate": 2.0675703813472953e-06, "loss": 1.263839602470398, "step": 15522 }, { "epoch": 2.8257031036679714, "grad_norm": 8.6875, "learning_rate": 2.067290214131093e-06, "loss": 1.1335194110870361, "step": 15524 }, { "epoch": 2.8260671702921636, "grad_norm": 12.4375, "learning_rate": 2.067010624024114e-06, "loss": 1.3315210342407227, "step": 15526 }, { "epoch": 2.826431236916356, "grad_norm": 14.75, "learning_rate": 2.0667316110673875e-06, "loss": 1.434949278831482, "step": 15528 }, { "epoch": 2.826795303540548, "grad_norm": 30.625, "learning_rate": 2.066453175301856e-06, "loss": 1.5310791730880737, "step": 15530 }, { "epoch": 2.82715937016474, "grad_norm": 15.6875, "learning_rate": 2.0661753167683805e-06, "loss": 1.2255624532699585, "step": 15532 }, { "epoch": 2.8275234367889324, "grad_norm": 16.875, "learning_rate": 2.0658980355077346e-06, "loss": 1.3062666654586792, "step": 15534 }, { "epoch": 2.8278875034131246, "grad_norm": 11.1875, "learning_rate": 2.065621331560609e-06, "loss": 1.1019104719161987, "step": 15536 }, { "epoch": 2.8282515700373168, "grad_norm": 4.40625, "learning_rate": 2.0653452049676073e-06, "loss": 0.9320621490478516, "step": 15538 }, { "epoch": 2.828615636661509, "grad_norm": 26.375, "learning_rate": 2.0650696557692517e-06, "loss": 1.1965681314468384, "step": 15540 }, { "epoch": 2.828979703285701, "grad_norm": 19.75, "learning_rate": 2.064794684005977e-06, "loss": 1.7365366220474243, "step": 15542 }, { "epoch": 2.829343769909894, "grad_norm": 13.8125, "learning_rate": 2.0645202897181345e-06, "loss": 1.3912160396575928, "step": 15544 }, { "epoch": 2.8297078365340855, "grad_norm": 10.6875, "learning_rate": 2.0642464729459906e-06, "loss": 1.3329311609268188, "step": 15546 }, { "epoch": 2.830071903158278, "grad_norm": 11.75, "learning_rate": 2.0639732337297263e-06, "loss": 1.3877556324005127, "step": 15548 }, { "epoch": 2.83043596978247, "grad_norm": 11.5, "learning_rate": 2.0637005721094386e-06, "loss": 1.3779581785202026, "step": 15550 }, { "epoch": 2.8308000364066626, "grad_norm": 13.1875, "learning_rate": 2.06342848812514e-06, "loss": 0.9625064134597778, "step": 15552 }, { "epoch": 2.8311641030308548, "grad_norm": 14.0, "learning_rate": 2.0631569818167563e-06, "loss": 1.2871441841125488, "step": 15554 }, { "epoch": 2.831528169655047, "grad_norm": 19.625, "learning_rate": 2.062886053224132e-06, "loss": 1.640420913696289, "step": 15556 }, { "epoch": 2.831892236279239, "grad_norm": 14.6875, "learning_rate": 2.062615702387023e-06, "loss": 1.3000365495681763, "step": 15558 }, { "epoch": 2.8322563029034313, "grad_norm": 9.0, "learning_rate": 2.0623459293451026e-06, "loss": 1.299499273300171, "step": 15560 }, { "epoch": 2.8326203695276235, "grad_norm": 6.625, "learning_rate": 2.06207673413796e-06, "loss": 1.4046026468276978, "step": 15562 }, { "epoch": 2.8329844361518157, "grad_norm": 3.609375, "learning_rate": 2.0618081168050965e-06, "loss": 1.197812557220459, "step": 15564 }, { "epoch": 2.833348502776008, "grad_norm": 11.8125, "learning_rate": 2.061540077385933e-06, "loss": 1.3336005210876465, "step": 15566 }, { "epoch": 2.8337125694002, "grad_norm": 14.6875, "learning_rate": 2.0612726159198015e-06, "loss": 1.8679522275924683, "step": 15568 }, { "epoch": 2.8340766360243923, "grad_norm": 20.375, "learning_rate": 2.0610057324459504e-06, "loss": 1.4219310283660889, "step": 15570 }, { "epoch": 2.8344407026485845, "grad_norm": 24.75, "learning_rate": 2.0607394270035465e-06, "loss": 1.5261874198913574, "step": 15572 }, { "epoch": 2.834804769272777, "grad_norm": 10.4375, "learning_rate": 2.060473699631666e-06, "loss": 1.2180685997009277, "step": 15574 }, { "epoch": 2.835168835896969, "grad_norm": 19.0, "learning_rate": 2.0602085503693048e-06, "loss": 1.8519091606140137, "step": 15576 }, { "epoch": 2.8355329025211615, "grad_norm": 14.75, "learning_rate": 2.0599439792553727e-06, "loss": 1.835161805152893, "step": 15578 }, { "epoch": 2.8358969691453537, "grad_norm": 2.578125, "learning_rate": 2.0596799863286932e-06, "loss": 1.0542484521865845, "step": 15580 }, { "epoch": 2.836261035769546, "grad_norm": 3.53125, "learning_rate": 2.059416571628008e-06, "loss": 1.0575069189071655, "step": 15582 }, { "epoch": 2.836625102393738, "grad_norm": 9.1875, "learning_rate": 2.05915373519197e-06, "loss": 0.9817519187927246, "step": 15584 }, { "epoch": 2.8369891690179303, "grad_norm": 13.6875, "learning_rate": 2.058891477059151e-06, "loss": 1.3612810373306274, "step": 15586 }, { "epoch": 2.8373532356421225, "grad_norm": 17.375, "learning_rate": 2.058629797268036e-06, "loss": 1.9211276769638062, "step": 15588 }, { "epoch": 2.8377173022663147, "grad_norm": 19.0, "learning_rate": 2.0583686958570247e-06, "loss": 1.8029141426086426, "step": 15590 }, { "epoch": 2.838081368890507, "grad_norm": 7.46875, "learning_rate": 2.0581081728644346e-06, "loss": 1.4475373029708862, "step": 15592 }, { "epoch": 2.838445435514699, "grad_norm": 12.875, "learning_rate": 2.057848228328494e-06, "loss": 1.2792654037475586, "step": 15594 }, { "epoch": 2.8388095021388913, "grad_norm": 10.75, "learning_rate": 2.0575888622873496e-06, "loss": 1.319126844406128, "step": 15596 }, { "epoch": 2.8391735687630835, "grad_norm": 30.5, "learning_rate": 2.057330074779063e-06, "loss": 1.4657354354858398, "step": 15598 }, { "epoch": 2.839537635387276, "grad_norm": 9.4375, "learning_rate": 2.057071865841609e-06, "loss": 1.6511945724487305, "step": 15600 }, { "epoch": 2.839901702011468, "grad_norm": 2.796875, "learning_rate": 2.05681423551288e-06, "loss": 1.0609612464904785, "step": 15602 }, { "epoch": 2.8402657686356605, "grad_norm": 2.34375, "learning_rate": 2.0565571838306815e-06, "loss": 1.0955684185028076, "step": 15604 }, { "epoch": 2.8406298352598527, "grad_norm": 71.0, "learning_rate": 2.056300710832735e-06, "loss": 1.1401350498199463, "step": 15606 }, { "epoch": 2.840993901884045, "grad_norm": 28.625, "learning_rate": 2.0560448165566767e-06, "loss": 1.673246145248413, "step": 15608 }, { "epoch": 2.841357968508237, "grad_norm": 12.4375, "learning_rate": 2.0557895010400577e-06, "loss": 1.4311206340789795, "step": 15610 }, { "epoch": 2.8417220351324293, "grad_norm": 7.84375, "learning_rate": 2.0555347643203457e-06, "loss": 1.3651249408721924, "step": 15612 }, { "epoch": 2.8420861017566215, "grad_norm": 8.5625, "learning_rate": 2.0552806064349207e-06, "loss": 1.7251278162002563, "step": 15614 }, { "epoch": 2.8424501683808137, "grad_norm": 17.5, "learning_rate": 2.0550270274210805e-06, "loss": 1.5464675426483154, "step": 15616 }, { "epoch": 2.842814235005006, "grad_norm": 22.0, "learning_rate": 2.054774027316037e-06, "loss": 1.6187057495117188, "step": 15618 }, { "epoch": 2.843178301629198, "grad_norm": 14.5625, "learning_rate": 2.054521606156915e-06, "loss": 1.9455593824386597, "step": 15620 }, { "epoch": 2.8435423682533902, "grad_norm": 21.75, "learning_rate": 2.0542697639807596e-06, "loss": 1.276139497756958, "step": 15622 }, { "epoch": 2.8439064348775824, "grad_norm": 21.0, "learning_rate": 2.054018500824524e-06, "loss": 0.8827497959136963, "step": 15624 }, { "epoch": 2.844270501501775, "grad_norm": 10.9375, "learning_rate": 2.0537678167250825e-06, "loss": 1.4192324876785278, "step": 15626 }, { "epoch": 2.844634568125967, "grad_norm": 8.0, "learning_rate": 2.0535177117192215e-06, "loss": 1.2304869890213013, "step": 15628 }, { "epoch": 2.8449986347501595, "grad_norm": 10.9375, "learning_rate": 2.053268185843642e-06, "loss": 1.2841804027557373, "step": 15630 }, { "epoch": 2.8453627013743517, "grad_norm": 6.875, "learning_rate": 2.0530192391349617e-06, "loss": 1.3602601289749146, "step": 15632 }, { "epoch": 2.845726767998544, "grad_norm": 8.5, "learning_rate": 2.052770871629712e-06, "loss": 1.4628217220306396, "step": 15634 }, { "epoch": 2.846090834622736, "grad_norm": 12.8125, "learning_rate": 2.052523083364341e-06, "loss": 1.5020369291305542, "step": 15636 }, { "epoch": 2.8464549012469282, "grad_norm": 37.0, "learning_rate": 2.052275874375209e-06, "loss": 0.5445114374160767, "step": 15638 }, { "epoch": 2.8468189678711204, "grad_norm": 15.1875, "learning_rate": 2.0520292446985944e-06, "loss": 1.3979467153549194, "step": 15640 }, { "epoch": 2.8471830344953126, "grad_norm": 10.25, "learning_rate": 2.051783194370688e-06, "loss": 1.17988920211792, "step": 15642 }, { "epoch": 2.847547101119505, "grad_norm": 9.1875, "learning_rate": 2.051537723427597e-06, "loss": 1.3148683309555054, "step": 15644 }, { "epoch": 2.847911167743697, "grad_norm": 6.71875, "learning_rate": 2.0512928319053436e-06, "loss": 1.2289530038833618, "step": 15646 }, { "epoch": 2.848275234367889, "grad_norm": 19.0, "learning_rate": 2.0510485198398644e-06, "loss": 1.502204179763794, "step": 15648 }, { "epoch": 2.8486393009920814, "grad_norm": 12.25, "learning_rate": 2.050804787267011e-06, "loss": 1.7907421588897705, "step": 15650 }, { "epoch": 2.849003367616274, "grad_norm": 3.390625, "learning_rate": 2.050561634222551e-06, "loss": 0.8259409666061401, "step": 15652 }, { "epoch": 2.849367434240466, "grad_norm": 66.5, "learning_rate": 2.0503190607421645e-06, "loss": 0.9685641527175903, "step": 15654 }, { "epoch": 2.8497315008646584, "grad_norm": 25.0, "learning_rate": 2.05007706686145e-06, "loss": 1.569624662399292, "step": 15656 }, { "epoch": 2.8500955674888506, "grad_norm": 25.875, "learning_rate": 2.049835652615918e-06, "loss": 1.6426762342453003, "step": 15658 }, { "epoch": 2.850459634113043, "grad_norm": 11.75, "learning_rate": 2.0495948180409954e-06, "loss": 1.4154115915298462, "step": 15660 }, { "epoch": 2.850823700737235, "grad_norm": 19.75, "learning_rate": 2.0493545631720233e-06, "loss": 1.4515305757522583, "step": 15662 }, { "epoch": 2.851187767361427, "grad_norm": 18.5, "learning_rate": 2.049114888044259e-06, "loss": 1.7178009748458862, "step": 15664 }, { "epoch": 2.8515518339856194, "grad_norm": 38.75, "learning_rate": 2.048875792692873e-06, "loss": 1.2690167427062988, "step": 15666 }, { "epoch": 2.8519159006098116, "grad_norm": 21.75, "learning_rate": 2.0486372771529523e-06, "loss": 1.2907917499542236, "step": 15668 }, { "epoch": 2.852279967234004, "grad_norm": 8.25, "learning_rate": 2.048399341459497e-06, "loss": 1.3868441581726074, "step": 15670 }, { "epoch": 2.852644033858196, "grad_norm": 9.0625, "learning_rate": 2.048161985647425e-06, "loss": 1.3096508979797363, "step": 15672 }, { "epoch": 2.853008100482388, "grad_norm": 3.828125, "learning_rate": 2.0479252097515657e-06, "loss": 1.108668327331543, "step": 15674 }, { "epoch": 2.8533721671065804, "grad_norm": 34.5, "learning_rate": 2.0476890138066656e-06, "loss": 1.0276817083358765, "step": 15676 }, { "epoch": 2.853736233730773, "grad_norm": 12.6875, "learning_rate": 2.047453397847385e-06, "loss": 1.3779515027999878, "step": 15678 }, { "epoch": 2.8541003003549648, "grad_norm": 60.25, "learning_rate": 2.0472183619083e-06, "loss": 1.6853160858154297, "step": 15680 }, { "epoch": 2.8544643669791574, "grad_norm": 30.0, "learning_rate": 2.0469839060239015e-06, "loss": 1.9663193225860596, "step": 15682 }, { "epoch": 2.854828433603349, "grad_norm": 88.5, "learning_rate": 2.0467500302285945e-06, "loss": 1.6754333972930908, "step": 15684 }, { "epoch": 2.855192500227542, "grad_norm": 14.3125, "learning_rate": 2.0465167345566994e-06, "loss": 1.333894968032837, "step": 15686 }, { "epoch": 2.855556566851734, "grad_norm": 13.0, "learning_rate": 2.0462840190424515e-06, "loss": 1.3003911972045898, "step": 15688 }, { "epoch": 2.855920633475926, "grad_norm": 7.03125, "learning_rate": 2.0460518837200007e-06, "loss": 1.461794376373291, "step": 15690 }, { "epoch": 2.8562847001001184, "grad_norm": 19.0, "learning_rate": 2.0458203286234124e-06, "loss": 1.3130040168762207, "step": 15692 }, { "epoch": 2.8566487667243106, "grad_norm": 16.125, "learning_rate": 2.045589353786665e-06, "loss": 1.0638649463653564, "step": 15694 }, { "epoch": 2.8570128333485028, "grad_norm": 7.5625, "learning_rate": 2.045358959243655e-06, "loss": 1.4187135696411133, "step": 15696 }, { "epoch": 2.857376899972695, "grad_norm": 18.75, "learning_rate": 2.045129145028191e-06, "loss": 1.2999001741409302, "step": 15698 }, { "epoch": 2.857740966596887, "grad_norm": 42.0, "learning_rate": 2.044899911173997e-06, "loss": 0.8540710210800171, "step": 15700 }, { "epoch": 2.8581050332210793, "grad_norm": 22.625, "learning_rate": 2.0446712577147128e-06, "loss": 0.3583671748638153, "step": 15702 }, { "epoch": 2.8584690998452715, "grad_norm": 16.625, "learning_rate": 2.044443184683891e-06, "loss": 1.6492469310760498, "step": 15704 }, { "epoch": 2.8588331664694637, "grad_norm": 7.25, "learning_rate": 2.0442156921150025e-06, "loss": 1.2886207103729248, "step": 15706 }, { "epoch": 2.8591972330936564, "grad_norm": 15.6875, "learning_rate": 2.0439887800414294e-06, "loss": 1.9317054748535156, "step": 15708 }, { "epoch": 2.859561299717848, "grad_norm": 18.125, "learning_rate": 2.04376244849647e-06, "loss": 1.86313796043396, "step": 15710 }, { "epoch": 2.8599253663420408, "grad_norm": 13.6875, "learning_rate": 2.0435366975133384e-06, "loss": 1.291890263557434, "step": 15712 }, { "epoch": 2.860289432966233, "grad_norm": 22.75, "learning_rate": 2.0433115271251626e-06, "loss": 1.4764728546142578, "step": 15714 }, { "epoch": 2.860653499590425, "grad_norm": 16.25, "learning_rate": 2.0430869373649847e-06, "loss": 1.3750933408737183, "step": 15716 }, { "epoch": 2.8610175662146173, "grad_norm": 13.875, "learning_rate": 2.042862928265763e-06, "loss": 1.2089996337890625, "step": 15718 }, { "epoch": 2.8613816328388095, "grad_norm": 8.375, "learning_rate": 2.0426394998603694e-06, "loss": 1.3231537342071533, "step": 15720 }, { "epoch": 2.8617456994630017, "grad_norm": 9.125, "learning_rate": 2.0424166521815924e-06, "loss": 1.4454478025436401, "step": 15722 }, { "epoch": 2.862109766087194, "grad_norm": 8.8125, "learning_rate": 2.042194385262132e-06, "loss": 1.411362648010254, "step": 15724 }, { "epoch": 2.862473832711386, "grad_norm": 6.6875, "learning_rate": 2.0419726991346065e-06, "loss": 1.2141002416610718, "step": 15726 }, { "epoch": 2.8628378993355783, "grad_norm": 11.8125, "learning_rate": 2.0417515938315468e-06, "loss": 1.022182822227478, "step": 15728 }, { "epoch": 2.8632019659597705, "grad_norm": 15.4375, "learning_rate": 2.041531069385399e-06, "loss": 0.5809782147407532, "step": 15730 }, { "epoch": 2.8635660325839627, "grad_norm": 8.5625, "learning_rate": 2.0413111258285247e-06, "loss": 1.5237994194030762, "step": 15732 }, { "epoch": 2.8639300992081553, "grad_norm": 12.8125, "learning_rate": 2.0410917631931994e-06, "loss": 1.4772496223449707, "step": 15734 }, { "epoch": 2.864294165832347, "grad_norm": 11.0, "learning_rate": 2.040872981511614e-06, "loss": 1.3659594058990479, "step": 15736 }, { "epoch": 2.8646582324565397, "grad_norm": 12.125, "learning_rate": 2.040654780815874e-06, "loss": 1.2304561138153076, "step": 15738 }, { "epoch": 2.865022299080732, "grad_norm": 24.0, "learning_rate": 2.040437161137998e-06, "loss": 1.602603554725647, "step": 15740 }, { "epoch": 2.865386365704924, "grad_norm": 8.5, "learning_rate": 2.040220122509923e-06, "loss": 1.4287970066070557, "step": 15742 }, { "epoch": 2.8657504323291163, "grad_norm": 12.625, "learning_rate": 2.0400036649634967e-06, "loss": 1.388832449913025, "step": 15744 }, { "epoch": 2.8661144989533085, "grad_norm": 8.375, "learning_rate": 2.039787788530485e-06, "loss": 1.1057956218719482, "step": 15746 }, { "epoch": 2.8664785655775007, "grad_norm": 6.59375, "learning_rate": 2.0395724932425652e-06, "loss": 1.2073942422866821, "step": 15748 }, { "epoch": 2.866842632201693, "grad_norm": 9.0625, "learning_rate": 2.0393577791313314e-06, "loss": 1.4321943521499634, "step": 15750 }, { "epoch": 2.867206698825885, "grad_norm": 16.125, "learning_rate": 2.0391436462282934e-06, "loss": 1.4703847169876099, "step": 15752 }, { "epoch": 2.8675707654500773, "grad_norm": 36.75, "learning_rate": 2.0389300945648733e-06, "loss": 1.25998854637146, "step": 15754 }, { "epoch": 2.8679348320742695, "grad_norm": 11.8125, "learning_rate": 2.038717124172409e-06, "loss": 1.2880170345306396, "step": 15756 }, { "epoch": 2.8682988986984617, "grad_norm": 10.25, "learning_rate": 2.0385047350821524e-06, "loss": 1.6903637647628784, "step": 15758 }, { "epoch": 2.8686629653226543, "grad_norm": 8.75, "learning_rate": 2.0382929273252716e-06, "loss": 1.130076289176941, "step": 15760 }, { "epoch": 2.869027031946846, "grad_norm": 8.5625, "learning_rate": 2.038081700932849e-06, "loss": 1.4082366228103638, "step": 15762 }, { "epoch": 2.8693910985710387, "grad_norm": 7.625, "learning_rate": 2.0378710559358796e-06, "loss": 1.1690301895141602, "step": 15764 }, { "epoch": 2.869755165195231, "grad_norm": 26.875, "learning_rate": 2.037660992365276e-06, "loss": 1.290572166442871, "step": 15766 }, { "epoch": 2.870119231819423, "grad_norm": 10.75, "learning_rate": 2.037451510251864e-06, "loss": 1.1646549701690674, "step": 15768 }, { "epoch": 2.8704832984436153, "grad_norm": 103.5, "learning_rate": 2.037242609626384e-06, "loss": 1.3685389757156372, "step": 15770 }, { "epoch": 2.8708473650678075, "grad_norm": 11.5, "learning_rate": 2.037034290519492e-06, "loss": 1.4348698854446411, "step": 15772 }, { "epoch": 2.8712114316919997, "grad_norm": 56.0, "learning_rate": 2.036826552961756e-06, "loss": 1.3860540390014648, "step": 15774 }, { "epoch": 2.871575498316192, "grad_norm": 24.5, "learning_rate": 2.036619396983663e-06, "loss": 1.311455488204956, "step": 15776 }, { "epoch": 2.871939564940384, "grad_norm": 7.25, "learning_rate": 2.036412822615611e-06, "loss": 1.3445053100585938, "step": 15778 }, { "epoch": 2.8723036315645762, "grad_norm": 11.1875, "learning_rate": 2.0362068298879143e-06, "loss": 1.4567331075668335, "step": 15780 }, { "epoch": 2.8726676981887684, "grad_norm": 6.5625, "learning_rate": 2.0360014188308016e-06, "loss": 1.3662030696868896, "step": 15782 }, { "epoch": 2.8730317648129606, "grad_norm": 9.0, "learning_rate": 2.035796589474416e-06, "loss": 2.10536527633667, "step": 15784 }, { "epoch": 2.8733958314371533, "grad_norm": 8.3125, "learning_rate": 2.035592341848815e-06, "loss": 1.174559235572815, "step": 15786 }, { "epoch": 2.873759898061345, "grad_norm": 15.0625, "learning_rate": 2.035388675983972e-06, "loss": 1.8436174392700195, "step": 15788 }, { "epoch": 2.8741239646855377, "grad_norm": 13.3125, "learning_rate": 2.035185591909773e-06, "loss": 1.5882307291030884, "step": 15790 }, { "epoch": 2.8744880313097294, "grad_norm": 5.0, "learning_rate": 2.034983089656021e-06, "loss": 1.302262306213379, "step": 15792 }, { "epoch": 2.874852097933922, "grad_norm": 330.0, "learning_rate": 2.0347811692524312e-06, "loss": 1.3024449348449707, "step": 15794 }, { "epoch": 2.8752161645581142, "grad_norm": 20.625, "learning_rate": 2.034579830728636e-06, "loss": 1.30109441280365, "step": 15796 }, { "epoch": 2.8755802311823064, "grad_norm": 9.5, "learning_rate": 2.03437907411418e-06, "loss": 0.8696068525314331, "step": 15798 }, { "epoch": 2.8759442978064986, "grad_norm": 5.53125, "learning_rate": 2.0341788994385227e-06, "loss": 1.3312145471572876, "step": 15800 }, { "epoch": 2.876308364430691, "grad_norm": 12.3125, "learning_rate": 2.033979306731041e-06, "loss": 1.341722011566162, "step": 15802 }, { "epoch": 2.876672431054883, "grad_norm": 16.5, "learning_rate": 2.0337802960210225e-06, "loss": 1.6390020847320557, "step": 15804 }, { "epoch": 2.877036497679075, "grad_norm": 30.75, "learning_rate": 2.033581867337672e-06, "loss": 1.4610161781311035, "step": 15806 }, { "epoch": 2.8774005643032674, "grad_norm": 18.0, "learning_rate": 2.033384020710108e-06, "loss": 1.8534249067306519, "step": 15808 }, { "epoch": 2.8777646309274596, "grad_norm": 40.0, "learning_rate": 2.0331867561673636e-06, "loss": 1.7233022451400757, "step": 15810 }, { "epoch": 2.878128697551652, "grad_norm": 9.6875, "learning_rate": 2.032990073738387e-06, "loss": 1.1840872764587402, "step": 15812 }, { "epoch": 2.878492764175844, "grad_norm": 39.5, "learning_rate": 2.0327939734520398e-06, "loss": 1.0164161920547485, "step": 15814 }, { "epoch": 2.8788568308000366, "grad_norm": 21.75, "learning_rate": 2.0325984553370995e-06, "loss": 1.2551268339157104, "step": 15816 }, { "epoch": 2.8792208974242284, "grad_norm": 27.75, "learning_rate": 2.0324035194222573e-06, "loss": 1.7345207929611206, "step": 15818 }, { "epoch": 2.879584964048421, "grad_norm": 9.75, "learning_rate": 2.0322091657361194e-06, "loss": 0.8374035358428955, "step": 15820 }, { "epoch": 2.879949030672613, "grad_norm": 9.5625, "learning_rate": 2.0320153943072065e-06, "loss": 1.3922512531280518, "step": 15822 }, { "epoch": 2.8803130972968054, "grad_norm": 3.46875, "learning_rate": 2.0318222051639535e-06, "loss": 0.9179459810256958, "step": 15824 }, { "epoch": 2.8806771639209976, "grad_norm": 7.1875, "learning_rate": 2.0316295983347107e-06, "loss": 1.0979242324829102, "step": 15826 }, { "epoch": 2.88104123054519, "grad_norm": 5.84375, "learning_rate": 2.0314375738477415e-06, "loss": 1.2580231428146362, "step": 15828 }, { "epoch": 2.881405297169382, "grad_norm": 16.125, "learning_rate": 2.0312461317312248e-06, "loss": 1.3663318157196045, "step": 15830 }, { "epoch": 2.881769363793574, "grad_norm": 15.4375, "learning_rate": 2.031055272013255e-06, "loss": 0.878201425075531, "step": 15832 }, { "epoch": 2.8821334304177664, "grad_norm": 9.3125, "learning_rate": 2.030864994721839e-06, "loss": 1.3123582601547241, "step": 15834 }, { "epoch": 2.8824974970419586, "grad_norm": 13.6875, "learning_rate": 2.0306752998849e-06, "loss": 0.7969541549682617, "step": 15836 }, { "epoch": 2.8828615636661508, "grad_norm": 6.40625, "learning_rate": 2.030486187530274e-06, "loss": 1.2317688465118408, "step": 15838 }, { "epoch": 2.883225630290343, "grad_norm": 13.25, "learning_rate": 2.0302976576857127e-06, "loss": 1.588843584060669, "step": 15840 }, { "epoch": 2.8835896969145356, "grad_norm": 12.0625, "learning_rate": 2.030109710378883e-06, "loss": 0.7127867341041565, "step": 15842 }, { "epoch": 2.8839537635387273, "grad_norm": 15.125, "learning_rate": 2.029922345637364e-06, "loss": 1.6774282455444336, "step": 15844 }, { "epoch": 2.88431783016292, "grad_norm": 13.9375, "learning_rate": 2.029735563488652e-06, "loss": 1.494886875152588, "step": 15846 }, { "epoch": 2.884681896787112, "grad_norm": 20.125, "learning_rate": 2.029549363960156e-06, "loss": 1.8388550281524658, "step": 15848 }, { "epoch": 2.8850459634113044, "grad_norm": 10.4375, "learning_rate": 2.0293637470791996e-06, "loss": 1.740090250968933, "step": 15850 }, { "epoch": 2.8854100300354966, "grad_norm": 3.671875, "learning_rate": 2.0291787128730223e-06, "loss": 1.2536745071411133, "step": 15852 }, { "epoch": 2.8857740966596888, "grad_norm": 4.46875, "learning_rate": 2.028994261368776e-06, "loss": 0.7723858952522278, "step": 15854 }, { "epoch": 2.886138163283881, "grad_norm": 16.375, "learning_rate": 2.028810392593529e-06, "loss": 0.9452016949653625, "step": 15856 }, { "epoch": 2.886502229908073, "grad_norm": 7.96875, "learning_rate": 2.028627106574263e-06, "loss": 0.9506535530090332, "step": 15858 }, { "epoch": 2.8868662965322653, "grad_norm": 7.25, "learning_rate": 2.0284444033378744e-06, "loss": 1.289560317993164, "step": 15860 }, { "epoch": 2.8872303631564575, "grad_norm": 52.75, "learning_rate": 2.0282622829111753e-06, "loss": 1.4744064807891846, "step": 15862 }, { "epoch": 2.8875944297806497, "grad_norm": 19.375, "learning_rate": 2.0280807453208887e-06, "loss": 1.450692892074585, "step": 15864 }, { "epoch": 2.887958496404842, "grad_norm": 8.1875, "learning_rate": 2.0278997905936566e-06, "loss": 1.568651795387268, "step": 15866 }, { "epoch": 2.8883225630290346, "grad_norm": 5.9375, "learning_rate": 2.0277194187560332e-06, "loss": 1.1895782947540283, "step": 15868 }, { "epoch": 2.8886866296532263, "grad_norm": 8.6875, "learning_rate": 2.0275396298344856e-06, "loss": 1.176336407661438, "step": 15870 }, { "epoch": 2.889050696277419, "grad_norm": 8.6875, "learning_rate": 2.0273604238554e-06, "loss": 1.067319393157959, "step": 15872 }, { "epoch": 2.889414762901611, "grad_norm": 3.984375, "learning_rate": 2.027181800845071e-06, "loss": 1.488231897354126, "step": 15874 }, { "epoch": 2.8897788295258033, "grad_norm": 11.4375, "learning_rate": 2.027003760829713e-06, "loss": 1.062765121459961, "step": 15876 }, { "epoch": 2.8901428961499955, "grad_norm": 14.8125, "learning_rate": 2.026826303835452e-06, "loss": 1.4547913074493408, "step": 15878 }, { "epoch": 2.8905069627741877, "grad_norm": 24.0, "learning_rate": 2.0266494298883286e-06, "loss": 1.5140360593795776, "step": 15880 }, { "epoch": 2.89087102939838, "grad_norm": 5.21875, "learning_rate": 2.0264731390142997e-06, "loss": 1.241590976715088, "step": 15882 }, { "epoch": 2.891235096022572, "grad_norm": 15.3125, "learning_rate": 2.0262974312392335e-06, "loss": 0.7726262211799622, "step": 15884 }, { "epoch": 2.8915991626467643, "grad_norm": 32.5, "learning_rate": 2.0261223065889155e-06, "loss": 0.5962456464767456, "step": 15886 }, { "epoch": 2.8919632292709565, "grad_norm": 5.78125, "learning_rate": 2.0259477650890442e-06, "loss": 0.9035093188285828, "step": 15888 }, { "epoch": 2.8923272958951487, "grad_norm": 31.375, "learning_rate": 2.025773806765233e-06, "loss": 1.264603853225708, "step": 15890 }, { "epoch": 2.892691362519341, "grad_norm": 43.25, "learning_rate": 2.02560043164301e-06, "loss": 1.2605006694793701, "step": 15892 }, { "epoch": 2.8930554291435335, "grad_norm": 23.125, "learning_rate": 2.025427639747816e-06, "loss": 1.0313756465911865, "step": 15894 }, { "epoch": 2.8934194957677253, "grad_norm": 14.9375, "learning_rate": 2.025255431105009e-06, "loss": 1.1444826126098633, "step": 15896 }, { "epoch": 2.893783562391918, "grad_norm": 14.875, "learning_rate": 2.0250838057398586e-06, "loss": 1.7389105558395386, "step": 15898 }, { "epoch": 2.89414762901611, "grad_norm": 6.0, "learning_rate": 2.024912763677551e-06, "loss": 1.1460657119750977, "step": 15900 }, { "epoch": 2.8945116956403023, "grad_norm": 9.875, "learning_rate": 2.0247423049431864e-06, "loss": 1.4714908599853516, "step": 15902 }, { "epoch": 2.8948757622644945, "grad_norm": 11.1875, "learning_rate": 2.0245724295617776e-06, "loss": 1.3630484342575073, "step": 15904 }, { "epoch": 2.8952398288886867, "grad_norm": 16.875, "learning_rate": 2.024403137558254e-06, "loss": 1.1351715326309204, "step": 15906 }, { "epoch": 2.895603895512879, "grad_norm": 24.875, "learning_rate": 2.024234428957458e-06, "loss": 1.93813157081604, "step": 15908 }, { "epoch": 2.895967962137071, "grad_norm": 21.625, "learning_rate": 2.024066303784147e-06, "loss": 1.401777982711792, "step": 15910 }, { "epoch": 2.8963320287612633, "grad_norm": 16.625, "learning_rate": 2.0238987620629936e-06, "loss": 1.0589489936828613, "step": 15912 }, { "epoch": 2.8966960953854555, "grad_norm": 11.5625, "learning_rate": 2.0237318038185824e-06, "loss": 1.4927003383636475, "step": 15914 }, { "epoch": 2.8970601620096477, "grad_norm": 10.875, "learning_rate": 2.023565429075415e-06, "loss": 1.311495065689087, "step": 15916 }, { "epoch": 2.89742422863384, "grad_norm": 36.75, "learning_rate": 2.0233996378579057e-06, "loss": 1.671440601348877, "step": 15918 }, { "epoch": 2.897788295258032, "grad_norm": 15.9375, "learning_rate": 2.023234430190384e-06, "loss": 1.2805222272872925, "step": 15920 }, { "epoch": 2.8981523618822242, "grad_norm": 8.25, "learning_rate": 2.0230698060970934e-06, "loss": 1.5018774271011353, "step": 15922 }, { "epoch": 2.898516428506417, "grad_norm": 9.625, "learning_rate": 2.022905765602191e-06, "loss": 1.1422563791275024, "step": 15924 }, { "epoch": 2.8988804951306086, "grad_norm": 10.125, "learning_rate": 2.02274230872975e-06, "loss": 1.209306001663208, "step": 15926 }, { "epoch": 2.8992445617548013, "grad_norm": 41.0, "learning_rate": 2.022579435503757e-06, "loss": 1.550145149230957, "step": 15928 }, { "epoch": 2.8996086283789935, "grad_norm": 18.75, "learning_rate": 2.0224171459481125e-06, "loss": 1.749100685119629, "step": 15930 }, { "epoch": 2.8999726950031857, "grad_norm": 6.03125, "learning_rate": 2.0222554400866327e-06, "loss": 1.1123734712600708, "step": 15932 }, { "epoch": 2.900336761627378, "grad_norm": 7.15625, "learning_rate": 2.0220943179430455e-06, "loss": 1.2370554208755493, "step": 15934 }, { "epoch": 2.90070082825157, "grad_norm": 13.375, "learning_rate": 2.0219337795409973e-06, "loss": 1.2984586954116821, "step": 15936 }, { "epoch": 2.9010648948757622, "grad_norm": 9.5625, "learning_rate": 2.021773824904045e-06, "loss": 1.2779642343521118, "step": 15938 }, { "epoch": 2.9014289614999544, "grad_norm": 20.125, "learning_rate": 2.021614454055661e-06, "loss": 1.8613547086715698, "step": 15940 }, { "epoch": 2.9017930281241466, "grad_norm": 10.4375, "learning_rate": 2.0214556670192334e-06, "loss": 0.9604834318161011, "step": 15942 }, { "epoch": 2.902157094748339, "grad_norm": 6.71875, "learning_rate": 2.0212974638180626e-06, "loss": 1.0142650604248047, "step": 15944 }, { "epoch": 2.902521161372531, "grad_norm": 8.5, "learning_rate": 2.021139844475365e-06, "loss": 1.3438489437103271, "step": 15946 }, { "epoch": 2.902885227996723, "grad_norm": 11.625, "learning_rate": 2.0209828090142704e-06, "loss": 1.478317379951477, "step": 15948 }, { "epoch": 2.903249294620916, "grad_norm": 9.125, "learning_rate": 2.0208263574578226e-06, "loss": 1.1138684749603271, "step": 15950 }, { "epoch": 2.9036133612451076, "grad_norm": 19.125, "learning_rate": 2.020670489828981e-06, "loss": 1.3077504634857178, "step": 15952 }, { "epoch": 2.9039774278693002, "grad_norm": 7.90625, "learning_rate": 2.0205152061506184e-06, "loss": 1.2632169723510742, "step": 15954 }, { "epoch": 2.9043414944934924, "grad_norm": 32.75, "learning_rate": 2.0203605064455214e-06, "loss": 0.9591492414474487, "step": 15956 }, { "epoch": 2.9047055611176846, "grad_norm": 12.125, "learning_rate": 2.020206390736392e-06, "loss": 1.1280864477157593, "step": 15958 }, { "epoch": 2.905069627741877, "grad_norm": 14.6875, "learning_rate": 2.0200528590458466e-06, "loss": 1.4261302947998047, "step": 15960 }, { "epoch": 2.905433694366069, "grad_norm": 41.25, "learning_rate": 2.0198999113964145e-06, "loss": 1.5224285125732422, "step": 15962 }, { "epoch": 2.905797760990261, "grad_norm": 10.1875, "learning_rate": 2.0197475478105403e-06, "loss": 1.3850963115692139, "step": 15964 }, { "epoch": 2.9061618276144534, "grad_norm": 8.1875, "learning_rate": 2.0195957683105833e-06, "loss": 1.3561177253723145, "step": 15966 }, { "epoch": 2.9065258942386456, "grad_norm": 36.5, "learning_rate": 2.019444572918816e-06, "loss": 1.3137898445129395, "step": 15968 }, { "epoch": 2.906889960862838, "grad_norm": 13.0, "learning_rate": 2.0192939616574258e-06, "loss": 1.251298189163208, "step": 15970 }, { "epoch": 2.90725402748703, "grad_norm": 61.5, "learning_rate": 2.019143934548514e-06, "loss": 1.9831271171569824, "step": 15972 }, { "epoch": 2.907618094111222, "grad_norm": 6.46875, "learning_rate": 2.018994491614097e-06, "loss": 1.1440458297729492, "step": 15974 }, { "epoch": 2.907982160735415, "grad_norm": 24.625, "learning_rate": 2.0188456328761052e-06, "loss": 1.7433085441589355, "step": 15976 }, { "epoch": 2.9083462273596066, "grad_norm": 21.875, "learning_rate": 2.018697358356382e-06, "loss": 1.3896424770355225, "step": 15978 }, { "epoch": 2.908710293983799, "grad_norm": 26.125, "learning_rate": 2.018549668076687e-06, "loss": 1.91237211227417, "step": 15980 }, { "epoch": 2.9090743606079914, "grad_norm": 10.6875, "learning_rate": 2.018402562058693e-06, "loss": 1.3994228839874268, "step": 15982 }, { "epoch": 2.9094384272321836, "grad_norm": 26.5, "learning_rate": 2.0182560403239863e-06, "loss": 1.3572702407836914, "step": 15984 }, { "epoch": 2.909802493856376, "grad_norm": 10.75, "learning_rate": 2.0181101028940698e-06, "loss": 0.895589292049408, "step": 15986 }, { "epoch": 2.910166560480568, "grad_norm": 13.4375, "learning_rate": 2.0179647497903583e-06, "loss": 0.9564281105995178, "step": 15988 }, { "epoch": 2.91053062710476, "grad_norm": 10.75, "learning_rate": 2.0178199810341815e-06, "loss": 0.4834787845611572, "step": 15990 }, { "epoch": 2.9108946937289524, "grad_norm": 8.1875, "learning_rate": 2.0176757966467842e-06, "loss": 1.3822435140609741, "step": 15992 }, { "epoch": 2.9112587603531446, "grad_norm": 17.625, "learning_rate": 2.0175321966493254e-06, "loss": 1.8450682163238525, "step": 15994 }, { "epoch": 2.9116228269773368, "grad_norm": 7.53125, "learning_rate": 2.017389181062877e-06, "loss": 1.1677730083465576, "step": 15996 }, { "epoch": 2.911986893601529, "grad_norm": 10.5, "learning_rate": 2.0172467499084263e-06, "loss": 1.4569188356399536, "step": 15998 }, { "epoch": 2.912350960225721, "grad_norm": 7.96875, "learning_rate": 2.0171049032068736e-06, "loss": 1.0316945314407349, "step": 16000 }, { "epoch": 2.912715026849914, "grad_norm": 10.5, "learning_rate": 2.016963640979036e-06, "loss": 1.4173057079315186, "step": 16002 }, { "epoch": 2.9130790934741055, "grad_norm": 7.03125, "learning_rate": 2.0168229632456415e-06, "loss": 1.1555143594741821, "step": 16004 }, { "epoch": 2.913443160098298, "grad_norm": 27.0, "learning_rate": 2.0166828700273355e-06, "loss": 1.3070731163024902, "step": 16006 }, { "epoch": 2.9138072267224904, "grad_norm": 10.75, "learning_rate": 2.016543361344675e-06, "loss": 1.3394155502319336, "step": 16008 }, { "epoch": 2.9141712933466826, "grad_norm": 11.625, "learning_rate": 2.0164044372181328e-06, "loss": 1.4120908975601196, "step": 16010 }, { "epoch": 2.9145353599708748, "grad_norm": 9.25, "learning_rate": 2.016266097668095e-06, "loss": 1.2500807046890259, "step": 16012 }, { "epoch": 2.914899426595067, "grad_norm": 6.28125, "learning_rate": 2.0161283427148625e-06, "loss": 1.174961805343628, "step": 16014 }, { "epoch": 2.915263493219259, "grad_norm": 24.875, "learning_rate": 2.0159911723786513e-06, "loss": 1.4608769416809082, "step": 16016 }, { "epoch": 2.9156275598434513, "grad_norm": 17.25, "learning_rate": 2.0158545866795896e-06, "loss": 1.5372098684310913, "step": 16018 }, { "epoch": 2.9159916264676435, "grad_norm": 12.9375, "learning_rate": 2.0157185856377205e-06, "loss": 1.503348469734192, "step": 16020 }, { "epoch": 2.9163556930918357, "grad_norm": 14.0625, "learning_rate": 2.0155831692730026e-06, "loss": 1.502626895904541, "step": 16022 }, { "epoch": 2.916719759716028, "grad_norm": 7.25, "learning_rate": 2.015448337605307e-06, "loss": 1.3237050771713257, "step": 16024 }, { "epoch": 2.91708382634022, "grad_norm": 15.0625, "learning_rate": 2.0153140906544194e-06, "loss": 1.4869897365570068, "step": 16026 }, { "epoch": 2.9174478929644128, "grad_norm": 13.0, "learning_rate": 2.015180428440041e-06, "loss": 1.3573061227798462, "step": 16028 }, { "epoch": 2.9178119595886045, "grad_norm": 10.3125, "learning_rate": 2.015047350981785e-06, "loss": 1.2338680028915405, "step": 16030 }, { "epoch": 2.918176026212797, "grad_norm": 7.375, "learning_rate": 2.0149148582991816e-06, "loss": 0.936947226524353, "step": 16032 }, { "epoch": 2.918540092836989, "grad_norm": 18.375, "learning_rate": 2.0147829504116724e-06, "loss": 1.853521704673767, "step": 16034 }, { "epoch": 2.9189041594611815, "grad_norm": 8.25, "learning_rate": 2.0146516273386145e-06, "loss": 1.1638528108596802, "step": 16036 }, { "epoch": 2.9192682260853737, "grad_norm": 4.6875, "learning_rate": 2.0145208890992784e-06, "loss": 0.8490620255470276, "step": 16038 }, { "epoch": 2.919632292709566, "grad_norm": 11.875, "learning_rate": 2.0143907357128507e-06, "loss": 1.5628931522369385, "step": 16040 }, { "epoch": 2.919996359333758, "grad_norm": 9.375, "learning_rate": 2.0142611671984304e-06, "loss": 0.942184567451477, "step": 16042 }, { "epoch": 2.9203604259579503, "grad_norm": 11.0, "learning_rate": 2.0141321835750306e-06, "loss": 1.249287486076355, "step": 16044 }, { "epoch": 2.9207244925821425, "grad_norm": 5.9375, "learning_rate": 2.0140037848615798e-06, "loss": 1.2209815979003906, "step": 16046 }, { "epoch": 2.9210885592063347, "grad_norm": 6.8125, "learning_rate": 2.0138759710769196e-06, "loss": 1.1884042024612427, "step": 16048 }, { "epoch": 2.921452625830527, "grad_norm": 4.15625, "learning_rate": 2.0137487422398063e-06, "loss": 1.2047877311706543, "step": 16050 }, { "epoch": 2.921816692454719, "grad_norm": 8.6875, "learning_rate": 2.0136220983689104e-06, "loss": 1.1593250036239624, "step": 16052 }, { "epoch": 2.9221807590789113, "grad_norm": 15.0, "learning_rate": 2.0134960394828164e-06, "loss": 1.3413877487182617, "step": 16054 }, { "epoch": 2.9225448257031035, "grad_norm": 25.625, "learning_rate": 2.0133705656000224e-06, "loss": 1.4110627174377441, "step": 16056 }, { "epoch": 2.922908892327296, "grad_norm": 10.625, "learning_rate": 2.0132456767389415e-06, "loss": 1.5338093042373657, "step": 16058 }, { "epoch": 2.923272958951488, "grad_norm": 26.0, "learning_rate": 2.0131213729179002e-06, "loss": 0.45551732182502747, "step": 16060 }, { "epoch": 2.9236370255756805, "grad_norm": 12.5, "learning_rate": 2.012997654155141e-06, "loss": 1.3342375755310059, "step": 16062 }, { "epoch": 2.9240010921998727, "grad_norm": 6.75, "learning_rate": 2.012874520468817e-06, "loss": 1.0010552406311035, "step": 16064 }, { "epoch": 2.924365158824065, "grad_norm": 5.25, "learning_rate": 2.0127519718769997e-06, "loss": 0.9340542554855347, "step": 16066 }, { "epoch": 2.924729225448257, "grad_norm": 12.25, "learning_rate": 2.0126300083976714e-06, "loss": 1.2615128755569458, "step": 16068 }, { "epoch": 2.9250932920724493, "grad_norm": 11.0625, "learning_rate": 2.0125086300487293e-06, "loss": 1.278813123703003, "step": 16070 }, { "epoch": 2.9254573586966415, "grad_norm": 12.8125, "learning_rate": 2.0123878368479866e-06, "loss": 1.0369194746017456, "step": 16072 }, { "epoch": 2.9258214253208337, "grad_norm": 29.0, "learning_rate": 2.0122676288131687e-06, "loss": 1.0971245765686035, "step": 16074 }, { "epoch": 2.926185491945026, "grad_norm": 11.3125, "learning_rate": 2.012148005961915e-06, "loss": 1.3509385585784912, "step": 16076 }, { "epoch": 2.926549558569218, "grad_norm": 12.875, "learning_rate": 2.01202896831178e-06, "loss": 1.2240263223648071, "step": 16078 }, { "epoch": 2.9269136251934103, "grad_norm": 27.25, "learning_rate": 2.0119105158802314e-06, "loss": 1.729132890701294, "step": 16080 }, { "epoch": 2.9272776918176024, "grad_norm": 14.5, "learning_rate": 2.0117926486846533e-06, "loss": 1.9291480779647827, "step": 16082 }, { "epoch": 2.927641758441795, "grad_norm": 6.0, "learning_rate": 2.0116753667423405e-06, "loss": 1.3116145133972168, "step": 16084 }, { "epoch": 2.928005825065987, "grad_norm": 15.875, "learning_rate": 2.011558670070505e-06, "loss": 1.166590690612793, "step": 16086 }, { "epoch": 2.9283698916901795, "grad_norm": 11.3125, "learning_rate": 2.011442558686271e-06, "loss": 1.081920862197876, "step": 16088 }, { "epoch": 2.9287339583143717, "grad_norm": 23.75, "learning_rate": 2.011327032606677e-06, "loss": 1.2143311500549316, "step": 16090 }, { "epoch": 2.929098024938564, "grad_norm": 31.25, "learning_rate": 2.011212091848676e-06, "loss": 1.2882063388824463, "step": 16092 }, { "epoch": 2.929462091562756, "grad_norm": 10.1875, "learning_rate": 2.0110977364291356e-06, "loss": 1.613126516342163, "step": 16094 }, { "epoch": 2.9298261581869482, "grad_norm": 5.90625, "learning_rate": 2.0109839663648365e-06, "loss": 1.3092408180236816, "step": 16096 }, { "epoch": 2.9301902248111404, "grad_norm": 7.1875, "learning_rate": 2.010870781672475e-06, "loss": 1.3179875612258911, "step": 16098 }, { "epoch": 2.9305542914353326, "grad_norm": 14.75, "learning_rate": 2.0107581823686592e-06, "loss": 1.1656293869018555, "step": 16100 }, { "epoch": 2.930918358059525, "grad_norm": 13.1875, "learning_rate": 2.010646168469913e-06, "loss": 0.8745218515396118, "step": 16102 }, { "epoch": 2.931282424683717, "grad_norm": 45.25, "learning_rate": 2.0105347399926747e-06, "loss": 1.3327651023864746, "step": 16104 }, { "epoch": 2.931646491307909, "grad_norm": 13.125, "learning_rate": 2.010423896953295e-06, "loss": 1.9630465507507324, "step": 16106 }, { "epoch": 2.9320105579321014, "grad_norm": 9.125, "learning_rate": 2.0103136393680406e-06, "loss": 1.1468077898025513, "step": 16108 }, { "epoch": 2.932374624556294, "grad_norm": 12.875, "learning_rate": 2.0102039672530904e-06, "loss": 1.1899211406707764, "step": 16110 }, { "epoch": 2.932738691180486, "grad_norm": 32.0, "learning_rate": 2.010094880624539e-06, "loss": 1.5794847011566162, "step": 16112 }, { "epoch": 2.9331027578046784, "grad_norm": 27.75, "learning_rate": 2.009986379498394e-06, "loss": 1.1136564016342163, "step": 16114 }, { "epoch": 2.9334668244288706, "grad_norm": 3.203125, "learning_rate": 2.0098784638905776e-06, "loss": 1.2769078016281128, "step": 16116 }, { "epoch": 2.933830891053063, "grad_norm": 8.5, "learning_rate": 2.0097711338169264e-06, "loss": 0.9284353256225586, "step": 16118 }, { "epoch": 2.934194957677255, "grad_norm": 8.125, "learning_rate": 2.00966438929319e-06, "loss": 1.3715157508850098, "step": 16120 }, { "epoch": 2.934559024301447, "grad_norm": 12.5, "learning_rate": 2.0095582303350334e-06, "loss": 1.0132701396942139, "step": 16122 }, { "epoch": 2.9349230909256394, "grad_norm": 14.1875, "learning_rate": 2.0094526569580343e-06, "loss": 1.4751384258270264, "step": 16124 }, { "epoch": 2.9352871575498316, "grad_norm": 28.875, "learning_rate": 2.009347669177686e-06, "loss": 1.856689691543579, "step": 16126 }, { "epoch": 2.935651224174024, "grad_norm": 19.625, "learning_rate": 2.009243267009394e-06, "loss": 1.7609423398971558, "step": 16128 }, { "epoch": 2.936015290798216, "grad_norm": 14.5625, "learning_rate": 2.0091394504684792e-06, "loss": 1.3988747596740723, "step": 16130 }, { "epoch": 2.936379357422408, "grad_norm": 14.625, "learning_rate": 2.009036219570177e-06, "loss": 1.5770587921142578, "step": 16132 }, { "epoch": 2.9367434240466004, "grad_norm": 8.8125, "learning_rate": 2.008933574329636e-06, "loss": 1.36320960521698, "step": 16134 }, { "epoch": 2.937107490670793, "grad_norm": 3.53125, "learning_rate": 2.0088315147619187e-06, "loss": 0.9429577589035034, "step": 16136 }, { "epoch": 2.9374715572949848, "grad_norm": 7.9375, "learning_rate": 2.008730040882001e-06, "loss": 0.9821655750274658, "step": 16138 }, { "epoch": 2.9378356239191774, "grad_norm": 10.875, "learning_rate": 2.008629152704775e-06, "loss": 1.2745946645736694, "step": 16140 }, { "epoch": 2.9381996905433696, "grad_norm": 10.6875, "learning_rate": 2.008528850245045e-06, "loss": 1.2576141357421875, "step": 16142 }, { "epoch": 2.938563757167562, "grad_norm": 12.0625, "learning_rate": 2.00842913351753e-06, "loss": 1.7421000003814697, "step": 16144 }, { "epoch": 2.938927823791754, "grad_norm": 8.375, "learning_rate": 2.008330002536864e-06, "loss": 1.280937671661377, "step": 16146 }, { "epoch": 2.939291890415946, "grad_norm": 14.125, "learning_rate": 2.008231457317593e-06, "loss": 1.3187261819839478, "step": 16148 }, { "epoch": 2.9396559570401384, "grad_norm": 13.625, "learning_rate": 2.008133497874178e-06, "loss": 1.3234012126922607, "step": 16150 }, { "epoch": 2.9400200236643306, "grad_norm": 8.4375, "learning_rate": 2.0080361242209945e-06, "loss": 1.1583294868469238, "step": 16152 }, { "epoch": 2.9403840902885228, "grad_norm": 12.1875, "learning_rate": 2.0079393363723322e-06, "loss": 0.9946235418319702, "step": 16154 }, { "epoch": 2.940748156912715, "grad_norm": 18.875, "learning_rate": 2.0078431343423945e-06, "loss": 1.4703614711761475, "step": 16156 }, { "epoch": 2.941112223536907, "grad_norm": 30.375, "learning_rate": 2.0077475181452967e-06, "loss": 1.6725093126296997, "step": 16158 }, { "epoch": 2.9414762901610993, "grad_norm": 12.0625, "learning_rate": 2.007652487795072e-06, "loss": 1.404555082321167, "step": 16160 }, { "epoch": 2.9418403567852915, "grad_norm": 10.0625, "learning_rate": 2.0075580433056654e-06, "loss": 1.473804235458374, "step": 16162 }, { "epoch": 2.9422044234094837, "grad_norm": 5.25, "learning_rate": 2.007464184690936e-06, "loss": 1.1531662940979004, "step": 16164 }, { "epoch": 2.9425684900336764, "grad_norm": 9.125, "learning_rate": 2.0073709119646567e-06, "loss": 1.3482539653778076, "step": 16166 }, { "epoch": 2.942932556657868, "grad_norm": 14.5, "learning_rate": 2.0072782251405155e-06, "loss": 1.3525104522705078, "step": 16168 }, { "epoch": 2.9432966232820608, "grad_norm": 10.375, "learning_rate": 2.0071861242321142e-06, "loss": 1.3789052963256836, "step": 16170 }, { "epoch": 2.943660689906253, "grad_norm": 136.0, "learning_rate": 2.007094609252967e-06, "loss": 1.0541635751724243, "step": 16172 }, { "epoch": 2.944024756530445, "grad_norm": 12.4375, "learning_rate": 2.0070036802165044e-06, "loss": 0.8645976781845093, "step": 16174 }, { "epoch": 2.9443888231546373, "grad_norm": 19.5, "learning_rate": 2.0069133371360693e-06, "loss": 1.3241825103759766, "step": 16176 }, { "epoch": 2.9447528897788295, "grad_norm": 13.25, "learning_rate": 2.0068235800249197e-06, "loss": 1.2801040410995483, "step": 16178 }, { "epoch": 2.9451169564030217, "grad_norm": 6.9375, "learning_rate": 2.0067344088962266e-06, "loss": 1.1969417333602905, "step": 16180 }, { "epoch": 2.945481023027214, "grad_norm": 8.875, "learning_rate": 2.0066458237630758e-06, "loss": 1.4311037063598633, "step": 16182 }, { "epoch": 2.945845089651406, "grad_norm": 12.125, "learning_rate": 2.006557824638467e-06, "loss": 1.3590706586837769, "step": 16184 }, { "epoch": 2.9462091562755983, "grad_norm": 14.25, "learning_rate": 2.0064704115353135e-06, "loss": 1.559118628501892, "step": 16186 }, { "epoch": 2.9465732228997905, "grad_norm": 18.25, "learning_rate": 2.006383584466442e-06, "loss": 1.9387481212615967, "step": 16188 }, { "epoch": 2.9469372895239827, "grad_norm": 9.5625, "learning_rate": 2.0062973434445953e-06, "loss": 1.1815623044967651, "step": 16190 }, { "epoch": 2.9473013561481753, "grad_norm": 13.8125, "learning_rate": 2.006211688482428e-06, "loss": 1.3023184537887573, "step": 16192 }, { "epoch": 2.947665422772367, "grad_norm": 10.5625, "learning_rate": 2.0061266195925104e-06, "loss": 1.7914209365844727, "step": 16194 }, { "epoch": 2.9480294893965597, "grad_norm": 5.53125, "learning_rate": 2.0060421367873255e-06, "loss": 1.307399868965149, "step": 16196 }, { "epoch": 2.948393556020752, "grad_norm": 4.1875, "learning_rate": 2.005958240079271e-06, "loss": 0.950873851776123, "step": 16198 }, { "epoch": 2.948757622644944, "grad_norm": 53.75, "learning_rate": 2.005874929480658e-06, "loss": 1.5999395847320557, "step": 16200 }, { "epoch": 2.9491216892691363, "grad_norm": 8.6875, "learning_rate": 2.005792205003713e-06, "loss": 1.4964826107025146, "step": 16202 }, { "epoch": 2.9494857558933285, "grad_norm": 17.375, "learning_rate": 2.0057100666605743e-06, "loss": 1.369545340538025, "step": 16204 }, { "epoch": 2.9498498225175207, "grad_norm": 16.875, "learning_rate": 2.005628514463296e-06, "loss": 1.1087909936904907, "step": 16206 }, { "epoch": 2.950213889141713, "grad_norm": 17.5, "learning_rate": 2.0055475484238453e-06, "loss": 0.8225274085998535, "step": 16208 }, { "epoch": 2.950577955765905, "grad_norm": 9.0625, "learning_rate": 2.005467168554104e-06, "loss": 1.014095425605774, "step": 16210 }, { "epoch": 2.9509420223900973, "grad_norm": 3.078125, "learning_rate": 2.005387374865867e-06, "loss": 0.9132812023162842, "step": 16212 }, { "epoch": 2.9513060890142895, "grad_norm": 7.75, "learning_rate": 2.005308167370844e-06, "loss": 1.1453416347503662, "step": 16214 }, { "epoch": 2.9516701556384817, "grad_norm": 9.6875, "learning_rate": 2.005229546080659e-06, "loss": 1.3302788734436035, "step": 16216 }, { "epoch": 2.9520342222626743, "grad_norm": 12.125, "learning_rate": 2.0051515110068477e-06, "loss": 1.365898609161377, "step": 16218 }, { "epoch": 2.952398288886866, "grad_norm": 8.625, "learning_rate": 2.0050740621608632e-06, "loss": 1.263388991355896, "step": 16220 }, { "epoch": 2.9527623555110587, "grad_norm": 23.75, "learning_rate": 2.00499719955407e-06, "loss": 1.4513204097747803, "step": 16222 }, { "epoch": 2.953126422135251, "grad_norm": 51.5, "learning_rate": 2.004920923197747e-06, "loss": 2.0801005363464355, "step": 16224 }, { "epoch": 2.953490488759443, "grad_norm": 26.125, "learning_rate": 2.004845233103088e-06, "loss": 1.3742839097976685, "step": 16226 }, { "epoch": 2.9538545553836353, "grad_norm": 9.875, "learning_rate": 2.0047701292812003e-06, "loss": 1.4993829727172852, "step": 16228 }, { "epoch": 2.9542186220078275, "grad_norm": 11.3125, "learning_rate": 2.004695611743105e-06, "loss": 1.3701081275939941, "step": 16230 }, { "epoch": 2.9545826886320197, "grad_norm": 19.125, "learning_rate": 2.004621680499737e-06, "loss": 1.457590103149414, "step": 16232 }, { "epoch": 2.954946755256212, "grad_norm": 9.0, "learning_rate": 2.0045483355619455e-06, "loss": 1.4919146299362183, "step": 16234 }, { "epoch": 2.955310821880404, "grad_norm": 8.875, "learning_rate": 2.0044755769404937e-06, "loss": 1.237777590751648, "step": 16236 }, { "epoch": 2.9556748885045963, "grad_norm": 9.0625, "learning_rate": 2.004403404646058e-06, "loss": 0.9515402913093567, "step": 16238 }, { "epoch": 2.9560389551287884, "grad_norm": 29.75, "learning_rate": 2.0043318186892303e-06, "loss": 1.6690784692764282, "step": 16240 }, { "epoch": 2.9564030217529806, "grad_norm": 14.4375, "learning_rate": 2.004260819080516e-06, "loss": 1.6799991130828857, "step": 16242 }, { "epoch": 2.9567670883771733, "grad_norm": 16.625, "learning_rate": 2.004190405830332e-06, "loss": 1.5002727508544922, "step": 16244 }, { "epoch": 2.957131155001365, "grad_norm": 16.375, "learning_rate": 2.0041205789490127e-06, "loss": 1.704007625579834, "step": 16246 }, { "epoch": 2.9574952216255577, "grad_norm": 17.25, "learning_rate": 2.0040513384468047e-06, "loss": 1.4280048608779907, "step": 16248 }, { "epoch": 2.95785928824975, "grad_norm": 8.25, "learning_rate": 2.0039826843338687e-06, "loss": 1.0051014423370361, "step": 16250 }, { "epoch": 2.958223354873942, "grad_norm": 8.875, "learning_rate": 2.0039146166202793e-06, "loss": 1.5191724300384521, "step": 16252 }, { "epoch": 2.9585874214981343, "grad_norm": 11.375, "learning_rate": 2.0038471353160248e-06, "loss": 1.4216265678405762, "step": 16254 }, { "epoch": 2.9589514881223264, "grad_norm": 10.125, "learning_rate": 2.0037802404310086e-06, "loss": 1.3860918283462524, "step": 16256 }, { "epoch": 2.9593155547465186, "grad_norm": 3.234375, "learning_rate": 2.0037139319750465e-06, "loss": 1.0458552837371826, "step": 16258 }, { "epoch": 2.959679621370711, "grad_norm": 10.5625, "learning_rate": 2.00364820995787e-06, "loss": 1.0160319805145264, "step": 16260 }, { "epoch": 2.960043687994903, "grad_norm": 15.1875, "learning_rate": 2.0035830743891223e-06, "loss": 1.4707136154174805, "step": 16262 }, { "epoch": 2.960407754619095, "grad_norm": 13.4375, "learning_rate": 2.0035185252783627e-06, "loss": 1.3480578660964966, "step": 16264 }, { "epoch": 2.9607718212432874, "grad_norm": 18.875, "learning_rate": 2.003454562635063e-06, "loss": 1.3034281730651855, "step": 16266 }, { "epoch": 2.9611358878674796, "grad_norm": 21.625, "learning_rate": 2.0033911864686097e-06, "loss": 1.4252829551696777, "step": 16268 }, { "epoch": 2.9614999544916722, "grad_norm": 9.9375, "learning_rate": 2.0033283967883027e-06, "loss": 1.4396251440048218, "step": 16270 }, { "epoch": 2.961864021115864, "grad_norm": 10.0, "learning_rate": 2.003266193603357e-06, "loss": 1.0804598331451416, "step": 16272 }, { "epoch": 2.9622280877400566, "grad_norm": 21.875, "learning_rate": 2.003204576922899e-06, "loss": 1.15805983543396, "step": 16274 }, { "epoch": 2.9625921543642484, "grad_norm": 42.25, "learning_rate": 2.003143546755973e-06, "loss": 1.342362403869629, "step": 16276 }, { "epoch": 2.962956220988441, "grad_norm": 12.5, "learning_rate": 2.0030831031115332e-06, "loss": 0.8277813792228699, "step": 16278 }, { "epoch": 2.963320287612633, "grad_norm": 9.5, "learning_rate": 2.00302324599845e-06, "loss": 0.9522460699081421, "step": 16280 }, { "epoch": 2.9636843542368254, "grad_norm": 17.125, "learning_rate": 2.002963975425506e-06, "loss": 1.5351015329360962, "step": 16282 }, { "epoch": 2.9640484208610176, "grad_norm": 18.125, "learning_rate": 2.0029052914014014e-06, "loss": 1.8087066411972046, "step": 16284 }, { "epoch": 2.96441248748521, "grad_norm": 8.9375, "learning_rate": 2.002847193934746e-06, "loss": 0.9702337980270386, "step": 16286 }, { "epoch": 2.964776554109402, "grad_norm": 11.6875, "learning_rate": 2.002789683034066e-06, "loss": 0.9694652557373047, "step": 16288 }, { "epoch": 2.965140620733594, "grad_norm": 10.6875, "learning_rate": 2.0027327587078006e-06, "loss": 1.4253320693969727, "step": 16290 }, { "epoch": 2.9655046873577864, "grad_norm": 6.09375, "learning_rate": 2.0026764209643033e-06, "loss": 1.1654529571533203, "step": 16292 }, { "epoch": 2.9658687539819786, "grad_norm": 53.25, "learning_rate": 2.0026206698118417e-06, "loss": 1.2843589782714844, "step": 16294 }, { "epoch": 2.9662328206061708, "grad_norm": 26.375, "learning_rate": 2.002565505258597e-06, "loss": 1.5341386795043945, "step": 16296 }, { "epoch": 2.966596887230363, "grad_norm": 11.25, "learning_rate": 2.0025109273126634e-06, "loss": 1.6817675828933716, "step": 16298 }, { "epoch": 2.9669609538545556, "grad_norm": 20.0, "learning_rate": 2.0024569359820513e-06, "loss": 1.088269591331482, "step": 16300 }, { "epoch": 2.9673250204787474, "grad_norm": 8.4375, "learning_rate": 2.0024035312746833e-06, "loss": 1.3678847551345825, "step": 16302 }, { "epoch": 2.96768908710294, "grad_norm": 20.5, "learning_rate": 2.0023507131983966e-06, "loss": 1.2941250801086426, "step": 16304 }, { "epoch": 2.968053153727132, "grad_norm": 24.625, "learning_rate": 2.0022984817609407e-06, "loss": 1.5039629936218262, "step": 16306 }, { "epoch": 2.9684172203513244, "grad_norm": 7.875, "learning_rate": 2.0022468369699825e-06, "loss": 1.3794997930526733, "step": 16308 }, { "epoch": 2.9687812869755166, "grad_norm": 10.8125, "learning_rate": 2.0021957788330986e-06, "loss": 1.1620562076568604, "step": 16310 }, { "epoch": 2.9691453535997088, "grad_norm": 18.625, "learning_rate": 2.0021453073577825e-06, "loss": 1.695955514907837, "step": 16312 }, { "epoch": 2.969509420223901, "grad_norm": 14.25, "learning_rate": 2.0020954225514413e-06, "loss": 1.35657799243927, "step": 16314 }, { "epoch": 2.969873486848093, "grad_norm": 8.6875, "learning_rate": 2.0020461244213943e-06, "loss": 1.2799112796783447, "step": 16316 }, { "epoch": 2.9702375534722854, "grad_norm": 10.125, "learning_rate": 2.0019974129748765e-06, "loss": 1.2740790843963623, "step": 16318 }, { "epoch": 2.9706016200964775, "grad_norm": 13.25, "learning_rate": 2.001949288219036e-06, "loss": 1.4777576923370361, "step": 16320 }, { "epoch": 2.9709656867206697, "grad_norm": 22.875, "learning_rate": 2.001901750160934e-06, "loss": 1.8469033241271973, "step": 16322 }, { "epoch": 2.971329753344862, "grad_norm": 18.375, "learning_rate": 2.0018547988075476e-06, "loss": 1.25077223777771, "step": 16324 }, { "epoch": 2.9716938199690546, "grad_norm": 30.25, "learning_rate": 2.001808434165767e-06, "loss": 1.4238383769989014, "step": 16326 }, { "epoch": 2.9720578865932463, "grad_norm": 25.875, "learning_rate": 2.0017626562423947e-06, "loss": 1.7187809944152832, "step": 16328 }, { "epoch": 2.972421953217439, "grad_norm": 14.0625, "learning_rate": 2.0017174650441494e-06, "loss": 1.59052574634552, "step": 16330 }, { "epoch": 2.972786019841631, "grad_norm": 25.125, "learning_rate": 2.001672860577663e-06, "loss": 0.9362858533859253, "step": 16332 }, { "epoch": 2.9731500864658233, "grad_norm": 22.875, "learning_rate": 2.0016288428494803e-06, "loss": 1.1181987524032593, "step": 16334 }, { "epoch": 2.9735141530900155, "grad_norm": 20.125, "learning_rate": 2.00158541186606e-06, "loss": 1.4670336246490479, "step": 16336 }, { "epoch": 2.9738782197142077, "grad_norm": 19.25, "learning_rate": 2.0015425676337773e-06, "loss": 1.3774993419647217, "step": 16338 }, { "epoch": 2.9742422863384, "grad_norm": 5.59375, "learning_rate": 2.001500310158918e-06, "loss": 1.333992838859558, "step": 16340 }, { "epoch": 2.974606352962592, "grad_norm": 2.75, "learning_rate": 2.001458639447684e-06, "loss": 1.207629680633545, "step": 16342 }, { "epoch": 2.9749704195867843, "grad_norm": 5.03125, "learning_rate": 2.0014175555061897e-06, "loss": 1.0353045463562012, "step": 16344 }, { "epoch": 2.9753344862109765, "grad_norm": 4.875, "learning_rate": 2.001377058340465e-06, "loss": 1.170973777770996, "step": 16346 }, { "epoch": 2.9756985528351687, "grad_norm": 24.25, "learning_rate": 2.0013371479564514e-06, "loss": 1.2009278535842896, "step": 16348 }, { "epoch": 2.976062619459361, "grad_norm": 10.0, "learning_rate": 2.001297824360006e-06, "loss": 1.0104331970214844, "step": 16350 }, { "epoch": 2.9764266860835535, "grad_norm": 17.25, "learning_rate": 2.0012590875568997e-06, "loss": 1.3624790906906128, "step": 16352 }, { "epoch": 2.9767907527077453, "grad_norm": 12.5, "learning_rate": 2.001220937552817e-06, "loss": 1.492138147354126, "step": 16354 }, { "epoch": 2.977154819331938, "grad_norm": 38.0, "learning_rate": 2.001183374353356e-06, "loss": 1.5225915908813477, "step": 16356 }, { "epoch": 2.97751888595613, "grad_norm": 6.4375, "learning_rate": 2.001146397964029e-06, "loss": 1.3869601488113403, "step": 16358 }, { "epoch": 2.9778829525803223, "grad_norm": 28.25, "learning_rate": 2.0011100083902625e-06, "loss": 1.1126316785812378, "step": 16360 }, { "epoch": 2.9782470192045145, "grad_norm": 26.625, "learning_rate": 2.0010742056373954e-06, "loss": 2.039464235305786, "step": 16362 }, { "epoch": 2.9786110858287067, "grad_norm": 15.8125, "learning_rate": 2.001038989710683e-06, "loss": 1.8592197895050049, "step": 16364 }, { "epoch": 2.978975152452899, "grad_norm": 16.25, "learning_rate": 2.0010043606152925e-06, "loss": 1.1575515270233154, "step": 16366 }, { "epoch": 2.979339219077091, "grad_norm": 19.0, "learning_rate": 2.0009703183563054e-06, "loss": 0.9458638429641724, "step": 16368 }, { "epoch": 2.9797032857012833, "grad_norm": 16.0, "learning_rate": 2.0009368629387174e-06, "loss": 1.3846774101257324, "step": 16370 }, { "epoch": 2.9800673523254755, "grad_norm": 11.1875, "learning_rate": 2.000903994367438e-06, "loss": 1.3616442680358887, "step": 16372 }, { "epoch": 2.9804314189496677, "grad_norm": 11.25, "learning_rate": 2.0008717126472904e-06, "loss": 1.2130813598632812, "step": 16374 }, { "epoch": 2.98079548557386, "grad_norm": 13.5625, "learning_rate": 2.0008400177830123e-06, "loss": 0.5574257373809814, "step": 16376 }, { "epoch": 2.9811595521980525, "grad_norm": 10.375, "learning_rate": 2.000808909779254e-06, "loss": 0.8714935183525085, "step": 16378 }, { "epoch": 2.9815236188222443, "grad_norm": 4.8125, "learning_rate": 2.0007783886405813e-06, "loss": 1.4558019638061523, "step": 16380 }, { "epoch": 2.981887685446437, "grad_norm": 3.703125, "learning_rate": 2.0007484543714718e-06, "loss": 0.9858094453811646, "step": 16382 }, { "epoch": 2.9822517520706286, "grad_norm": 8.875, "learning_rate": 2.00071910697632e-06, "loss": 1.1196403503417969, "step": 16384 }, { "epoch": 2.9826158186948213, "grad_norm": 9.875, "learning_rate": 2.000690346459431e-06, "loss": 1.3953125476837158, "step": 16386 }, { "epoch": 2.9829798853190135, "grad_norm": 6.8125, "learning_rate": 2.0006621728250264e-06, "loss": 1.5328149795532227, "step": 16388 }, { "epoch": 2.9833439519432057, "grad_norm": 6.28125, "learning_rate": 2.0006345860772395e-06, "loss": 1.0095698833465576, "step": 16390 }, { "epoch": 2.983708018567398, "grad_norm": 4.84375, "learning_rate": 2.0006075862201195e-06, "loss": 1.47171151638031, "step": 16392 }, { "epoch": 2.98407208519159, "grad_norm": 8.8125, "learning_rate": 2.000581173257628e-06, "loss": 1.4703402519226074, "step": 16394 }, { "epoch": 2.9844361518157823, "grad_norm": 16.5, "learning_rate": 2.0005553471936413e-06, "loss": 1.6649112701416016, "step": 16396 }, { "epoch": 2.9848002184399745, "grad_norm": 13.875, "learning_rate": 2.0005301080319485e-06, "loss": 1.2723469734191895, "step": 16398 }, { "epoch": 2.9851642850641666, "grad_norm": 9.5, "learning_rate": 2.000505455776254e-06, "loss": 1.138935923576355, "step": 16400 }, { "epoch": 2.985528351688359, "grad_norm": 10.9375, "learning_rate": 2.0004813904301756e-06, "loss": 1.3505103588104248, "step": 16402 }, { "epoch": 2.985892418312551, "grad_norm": 19.25, "learning_rate": 2.0004579119972446e-06, "loss": 1.2249739170074463, "step": 16404 }, { "epoch": 2.9862564849367432, "grad_norm": 11.875, "learning_rate": 2.0004350204809063e-06, "loss": 0.6565002202987671, "step": 16406 }, { "epoch": 2.986620551560936, "grad_norm": 16.75, "learning_rate": 2.00041271588452e-06, "loss": 0.2887282967567444, "step": 16408 }, { "epoch": 2.9869846181851276, "grad_norm": 12.875, "learning_rate": 2.000390998211358e-06, "loss": 1.6883145570755005, "step": 16410 }, { "epoch": 2.9873486848093203, "grad_norm": 22.25, "learning_rate": 2.000369867464609e-06, "loss": 1.104772686958313, "step": 16412 }, { "epoch": 2.9877127514335124, "grad_norm": 9.0, "learning_rate": 2.0003493236473725e-06, "loss": 1.3531534671783447, "step": 16414 }, { "epoch": 2.9880768180577046, "grad_norm": 7.46875, "learning_rate": 2.000329366762663e-06, "loss": 1.304721474647522, "step": 16416 }, { "epoch": 2.988440884681897, "grad_norm": 23.875, "learning_rate": 2.0003099968134104e-06, "loss": 1.1761491298675537, "step": 16418 }, { "epoch": 2.988804951306089, "grad_norm": 24.125, "learning_rate": 2.0002912138024565e-06, "loss": 1.5270304679870605, "step": 16420 }, { "epoch": 2.9891690179302812, "grad_norm": 13.0625, "learning_rate": 2.000273017732557e-06, "loss": 1.0654093027114868, "step": 16422 }, { "epoch": 2.9895330845544734, "grad_norm": 14.375, "learning_rate": 2.000255408606383e-06, "loss": 1.4028539657592773, "step": 16424 }, { "epoch": 2.9898971511786656, "grad_norm": 16.375, "learning_rate": 2.000238386426518e-06, "loss": 1.2568185329437256, "step": 16426 }, { "epoch": 2.990261217802858, "grad_norm": 14.0, "learning_rate": 2.0002219511954605e-06, "loss": 1.2188596725463867, "step": 16428 }, { "epoch": 2.99062528442705, "grad_norm": 21.0, "learning_rate": 2.000206102915622e-06, "loss": 0.8418086767196655, "step": 16430 }, { "epoch": 2.990989351051242, "grad_norm": 23.25, "learning_rate": 2.000190841589328e-06, "loss": 1.3837864398956299, "step": 16432 }, { "epoch": 2.991353417675435, "grad_norm": 7.8125, "learning_rate": 2.0001761672188182e-06, "loss": 1.3582119941711426, "step": 16434 }, { "epoch": 2.9917174842996266, "grad_norm": 5.0, "learning_rate": 2.000162079806246e-06, "loss": 1.3401474952697754, "step": 16436 }, { "epoch": 2.992081550923819, "grad_norm": 15.375, "learning_rate": 2.0001485793536785e-06, "loss": 1.3087061643600464, "step": 16438 }, { "epoch": 2.9924456175480114, "grad_norm": 11.5, "learning_rate": 2.000135665863097e-06, "loss": 1.4459304809570312, "step": 16440 }, { "epoch": 2.9928096841722036, "grad_norm": 14.5625, "learning_rate": 2.0001233393363968e-06, "loss": 1.300643801689148, "step": 16442 }, { "epoch": 2.993173750796396, "grad_norm": 11.9375, "learning_rate": 2.0001115997753866e-06, "loss": 1.3814854621887207, "step": 16444 }, { "epoch": 2.993537817420588, "grad_norm": 25.125, "learning_rate": 2.0001004471817887e-06, "loss": 1.370299220085144, "step": 16446 }, { "epoch": 2.99390188404478, "grad_norm": 24.5, "learning_rate": 2.00008988155724e-06, "loss": 1.9163622856140137, "step": 16448 }, { "epoch": 2.9942659506689724, "grad_norm": 47.5, "learning_rate": 2.0000799029032906e-06, "loss": 1.4259893894195557, "step": 16450 }, { "epoch": 2.9946300172931646, "grad_norm": 12.25, "learning_rate": 2.0000705112214055e-06, "loss": 1.2691893577575684, "step": 16452 }, { "epoch": 2.9949940839173568, "grad_norm": 6.1875, "learning_rate": 2.0000617065129626e-06, "loss": 1.184010624885559, "step": 16454 }, { "epoch": 2.995358150541549, "grad_norm": 6.4375, "learning_rate": 2.000053488779254e-06, "loss": 1.2077614068984985, "step": 16456 }, { "epoch": 2.995722217165741, "grad_norm": 10.25, "learning_rate": 2.000045858021486e-06, "loss": 1.4405052661895752, "step": 16458 }, { "epoch": 2.996086283789934, "grad_norm": 79.5, "learning_rate": 2.0000388142407775e-06, "loss": 1.3099960088729858, "step": 16460 }, { "epoch": 2.9964503504141256, "grad_norm": 12.125, "learning_rate": 2.0000323574381624e-06, "loss": 0.7813707590103149, "step": 16462 }, { "epoch": 2.996814417038318, "grad_norm": 5.8125, "learning_rate": 2.0000264876145884e-06, "loss": 1.3825950622558594, "step": 16464 }, { "epoch": 2.9971784836625104, "grad_norm": 5.34375, "learning_rate": 2.000021204770917e-06, "loss": 1.2435548305511475, "step": 16466 }, { "epoch": 2.9975425502867026, "grad_norm": 8.25, "learning_rate": 2.0000165089079237e-06, "loss": 1.1585209369659424, "step": 16468 }, { "epoch": 2.9979066169108948, "grad_norm": 8.375, "learning_rate": 2.0000124000262966e-06, "loss": 1.3255114555358887, "step": 16470 }, { "epoch": 2.998270683535087, "grad_norm": 12.875, "learning_rate": 2.0000088781266396e-06, "loss": 1.2500178813934326, "step": 16472 }, { "epoch": 2.998634750159279, "grad_norm": 12.125, "learning_rate": 2.000005943209469e-06, "loss": 1.351958155632019, "step": 16474 }, { "epoch": 2.9989988167834714, "grad_norm": 7.15625, "learning_rate": 2.000003595275216e-06, "loss": 1.1764051914215088, "step": 16476 }, { "epoch": 2.9993628834076635, "grad_norm": 14.375, "learning_rate": 2.0000018343242243e-06, "loss": 1.8574588298797607, "step": 16478 }, { "epoch": 2.9997269500318557, "grad_norm": 11.75, "learning_rate": 2.000000660356753e-06, "loss": 1.5543640851974487, "step": 16480 }, { "epoch": 3.0, "grad_norm": 15.9375, "learning_rate": 2.0000000733729745e-06, "loss": 1.4083051681518555, "step": 16482 }, { "epoch": 3.0, "step": 16482, "total_flos": 3.229073396012679e+18, "train_loss": 1.3400128969104237, "train_runtime": 21051.3439, "train_samples_per_second": 1.566, "train_steps_per_second": 0.783 } ], "logging_steps": 2, "max_steps": 16482, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 9999999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.229073396012679e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }