{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 16482, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00036406662419222717, "grad_norm": 2.203125, "learning_rate": 6.250000000000001e-08, "loss": 1.4439427852630615, "step": 2 }, { "epoch": 0.0007281332483844543, "grad_norm": 31.375, "learning_rate": 1.875e-07, "loss": 1.8952510356903076, "step": 4 }, { "epoch": 0.0010921998725766816, "grad_norm": 17.0, "learning_rate": 3.125e-07, "loss": 1.9810845851898193, "step": 6 }, { "epoch": 0.0014562664967689087, "grad_norm": 4.46875, "learning_rate": 4.375e-07, "loss": 1.1081929206848145, "step": 8 }, { "epoch": 0.0018203331209611358, "grad_norm": 58.25, "learning_rate": 5.625e-07, "loss": 1.6922560930252075, "step": 10 }, { "epoch": 0.002184399745153363, "grad_norm": 67.5, "learning_rate": 6.875000000000001e-07, "loss": 2.8736701011657715, "step": 12 }, { "epoch": 0.0025484663693455902, "grad_norm": 13.6875, "learning_rate": 8.125000000000001e-07, "loss": 1.8159987926483154, "step": 14 }, { "epoch": 0.0029125329935378174, "grad_norm": 5.0, "learning_rate": 9.375000000000001e-07, "loss": 1.4211392402648926, "step": 16 }, { "epoch": 0.0032765996177300445, "grad_norm": 19.25, "learning_rate": 1.0625e-06, "loss": 1.7765966653823853, "step": 18 }, { "epoch": 0.0036406662419222716, "grad_norm": 36.5, "learning_rate": 1.1875e-06, "loss": 2.275758981704712, "step": 20 }, { "epoch": 0.004004732866114499, "grad_norm": 179.0, "learning_rate": 1.3125000000000001e-06, "loss": 2.7087841033935547, "step": 22 }, { "epoch": 0.004368799490306726, "grad_norm": 30.25, "learning_rate": 1.4375e-06, "loss": 1.7012183666229248, "step": 24 }, { "epoch": 0.004732866114498953, "grad_norm": 16.625, "learning_rate": 1.5625e-06, "loss": 1.5296523571014404, "step": 26 }, { "epoch": 0.0050969327386911805, "grad_norm": 24.75, "learning_rate": 1.6875000000000001e-06, "loss": 1.8775393962860107, "step": 28 }, { "epoch": 0.005460999362883408, "grad_norm": 10.0, "learning_rate": 1.8125e-06, "loss": 1.9424123764038086, "step": 30 }, { "epoch": 0.005825065987075635, "grad_norm": 17.25, "learning_rate": 1.9375e-06, "loss": 1.7390620708465576, "step": 32 }, { "epoch": 0.006189132611267862, "grad_norm": 28.125, "learning_rate": 2.0625e-06, "loss": 2.4498658180236816, "step": 34 }, { "epoch": 0.006553199235460089, "grad_norm": 10.375, "learning_rate": 2.1875000000000002e-06, "loss": 1.2069549560546875, "step": 36 }, { "epoch": 0.006917265859652316, "grad_norm": 61.5, "learning_rate": 2.3125000000000003e-06, "loss": 1.9799649715423584, "step": 38 }, { "epoch": 0.007281332483844543, "grad_norm": 9.5625, "learning_rate": 2.4375e-06, "loss": 1.8582046031951904, "step": 40 }, { "epoch": 0.007645399108036771, "grad_norm": 15.0, "learning_rate": 2.5625e-06, "loss": 1.9181056022644043, "step": 42 }, { "epoch": 0.008009465732228998, "grad_norm": 10.5, "learning_rate": 2.6875e-06, "loss": 1.8542757034301758, "step": 44 }, { "epoch": 0.008373532356421225, "grad_norm": 68.0, "learning_rate": 2.8125e-06, "loss": 2.026920795440674, "step": 46 }, { "epoch": 0.008737598980613452, "grad_norm": 12.125, "learning_rate": 2.9375000000000003e-06, "loss": 1.7167916297912598, "step": 48 }, { "epoch": 0.00910166560480568, "grad_norm": 27.875, "learning_rate": 3.0625000000000003e-06, "loss": 2.0626585483551025, "step": 50 }, { "epoch": 0.009465732228997907, "grad_norm": 9.75, "learning_rate": 3.1875e-06, "loss": 1.354461908340454, "step": 52 }, { "epoch": 0.009829798853190134, "grad_norm": 2.09375, "learning_rate": 3.3125e-06, "loss": 1.2345013618469238, "step": 54 }, { "epoch": 0.010193865477382361, "grad_norm": 5.65625, "learning_rate": 3.4375e-06, "loss": 1.0628052949905396, "step": 56 }, { "epoch": 0.010557932101574588, "grad_norm": 13.9375, "learning_rate": 3.5625e-06, "loss": 1.9976742267608643, "step": 58 }, { "epoch": 0.010921998725766815, "grad_norm": 10.1875, "learning_rate": 3.6875000000000007e-06, "loss": 1.4608995914459229, "step": 60 }, { "epoch": 0.011286065349959042, "grad_norm": 17.75, "learning_rate": 3.8125e-06, "loss": 1.997931957244873, "step": 62 }, { "epoch": 0.01165013197415127, "grad_norm": 5.875, "learning_rate": 3.9375e-06, "loss": 1.4563177824020386, "step": 64 }, { "epoch": 0.012014198598343497, "grad_norm": 11.5625, "learning_rate": 4.0625000000000005e-06, "loss": 1.8709797859191895, "step": 66 }, { "epoch": 0.012378265222535724, "grad_norm": 42.5, "learning_rate": 4.1875e-06, "loss": 1.2400493621826172, "step": 68 }, { "epoch": 0.01274233184672795, "grad_norm": 7.28125, "learning_rate": 4.312500000000001e-06, "loss": 0.9505149126052856, "step": 70 }, { "epoch": 0.013106398470920178, "grad_norm": 24.875, "learning_rate": 4.4375e-06, "loss": 1.850921392440796, "step": 72 }, { "epoch": 0.013470465095112405, "grad_norm": 15.3125, "learning_rate": 4.5625e-06, "loss": 1.7793394327163696, "step": 74 }, { "epoch": 0.013834531719304632, "grad_norm": 18.375, "learning_rate": 4.6875000000000004e-06, "loss": 1.660430908203125, "step": 76 }, { "epoch": 0.01419859834349686, "grad_norm": 18.125, "learning_rate": 4.8125e-06, "loss": 1.788752794265747, "step": 78 }, { "epoch": 0.014562664967689086, "grad_norm": 26.0, "learning_rate": 4.937500000000001e-06, "loss": 1.2316336631774902, "step": 80 }, { "epoch": 0.014926731591881313, "grad_norm": 17.5, "learning_rate": 4.9999999633135135e-06, "loss": 1.8627386093139648, "step": 82 }, { "epoch": 0.015290798216073542, "grad_norm": 3.71875, "learning_rate": 4.9999996698216244e-06, "loss": 1.1692750453948975, "step": 84 }, { "epoch": 0.015654864840265768, "grad_norm": 9.4375, "learning_rate": 4.999999082837889e-06, "loss": 1.704923152923584, "step": 86 }, { "epoch": 0.016018931464457997, "grad_norm": 11.875, "learning_rate": 4.9999982023623925e-06, "loss": 1.838752269744873, "step": 88 }, { "epoch": 0.016382998088650222, "grad_norm": 9.25, "learning_rate": 4.999997028395266e-06, "loss": 1.5959210395812988, "step": 90 }, { "epoch": 0.01674706471284245, "grad_norm": 16.75, "learning_rate": 4.999995560936682e-06, "loss": 1.7869584560394287, "step": 92 }, { "epoch": 0.017111131337034676, "grad_norm": 17.0, "learning_rate": 4.999993799986852e-06, "loss": 1.9351712465286255, "step": 94 }, { "epoch": 0.017475197961226905, "grad_norm": 3.9375, "learning_rate": 4.9999917455460385e-06, "loss": 1.0348005294799805, "step": 96 }, { "epoch": 0.01783926458541913, "grad_norm": 54.25, "learning_rate": 4.999989397614542e-06, "loss": 2.414271593093872, "step": 98 }, { "epoch": 0.01820333120961136, "grad_norm": 12.6875, "learning_rate": 4.999986756192706e-06, "loss": 1.5969310998916626, "step": 100 }, { "epoch": 0.018567397833803585, "grad_norm": 13.25, "learning_rate": 4.999983821280919e-06, "loss": 1.6561098098754883, "step": 102 }, { "epoch": 0.018931464457995813, "grad_norm": 15.6875, "learning_rate": 4.999980592879612e-06, "loss": 1.6747602224349976, "step": 104 }, { "epoch": 0.01929553108218804, "grad_norm": 38.0, "learning_rate": 4.999977070989258e-06, "loss": 1.5859401226043701, "step": 106 }, { "epoch": 0.019659597706380268, "grad_norm": 16.5, "learning_rate": 4.999973255610374e-06, "loss": 1.6103729009628296, "step": 108 }, { "epoch": 0.020023664330572493, "grad_norm": 9.6875, "learning_rate": 4.99996914674352e-06, "loss": 1.655151605606079, "step": 110 }, { "epoch": 0.020387730954764722, "grad_norm": 9.5, "learning_rate": 4.999964744389298e-06, "loss": 1.4251558780670166, "step": 112 }, { "epoch": 0.02075179757895695, "grad_norm": 21.625, "learning_rate": 4.999960048548356e-06, "loss": 1.7941898107528687, "step": 114 }, { "epoch": 0.021115864203149176, "grad_norm": 28.375, "learning_rate": 4.999955059221381e-06, "loss": 1.775604009628296, "step": 116 }, { "epoch": 0.021479930827341405, "grad_norm": 37.5, "learning_rate": 4.999949776409106e-06, "loss": 1.9844095706939697, "step": 118 }, { "epoch": 0.02184399745153363, "grad_norm": 12.375, "learning_rate": 4.999944200112308e-06, "loss": 1.089508295059204, "step": 120 }, { "epoch": 0.02220806407572586, "grad_norm": 4.875, "learning_rate": 4.999938330331802e-06, "loss": 1.1590930223464966, "step": 122 }, { "epoch": 0.022572130699918085, "grad_norm": 11.9375, "learning_rate": 4.999932167068452e-06, "loss": 1.5268014669418335, "step": 124 }, { "epoch": 0.022936197324110313, "grad_norm": 8.5, "learning_rate": 4.999925710323161e-06, "loss": 1.1743862628936768, "step": 126 }, { "epoch": 0.02330026394830254, "grad_norm": 13.125, "learning_rate": 4.999918960096878e-06, "loss": 1.8321239948272705, "step": 128 }, { "epoch": 0.023664330572494768, "grad_norm": 15.4375, "learning_rate": 4.999911916390592e-06, "loss": 1.7687184810638428, "step": 130 }, { "epoch": 0.024028397196686993, "grad_norm": 14.3125, "learning_rate": 4.999904579205337e-06, "loss": 1.763999342918396, "step": 132 }, { "epoch": 0.024392463820879222, "grad_norm": 16.25, "learning_rate": 4.99989694854219e-06, "loss": 1.6820811033248901, "step": 134 }, { "epoch": 0.024756530445071447, "grad_norm": 8.5, "learning_rate": 4.9998890244022705e-06, "loss": 1.6157740354537964, "step": 136 }, { "epoch": 0.025120597069263676, "grad_norm": 40.0, "learning_rate": 4.999880806786742e-06, "loss": 1.7108687162399292, "step": 138 }, { "epoch": 0.0254846636934559, "grad_norm": 12.8125, "learning_rate": 4.999872295696809e-06, "loss": 1.628999948501587, "step": 140 }, { "epoch": 0.02584873031764813, "grad_norm": 9.1875, "learning_rate": 4.999863491133722e-06, "loss": 0.9917442202568054, "step": 142 }, { "epoch": 0.026212796941840356, "grad_norm": 10.1875, "learning_rate": 4.999854393098773e-06, "loss": 1.8497142791748047, "step": 144 }, { "epoch": 0.026576863566032585, "grad_norm": 31.5, "learning_rate": 4.999845001593295e-06, "loss": 1.157009482383728, "step": 146 }, { "epoch": 0.02694093019022481, "grad_norm": 5.15625, "learning_rate": 4.999835316618668e-06, "loss": 0.9564247131347656, "step": 148 }, { "epoch": 0.02730499681441704, "grad_norm": 3.453125, "learning_rate": 4.999825338176315e-06, "loss": 0.9530770778656006, "step": 150 }, { "epoch": 0.027669063438609264, "grad_norm": 24.875, "learning_rate": 4.999815066267696e-06, "loss": 2.0179812908172607, "step": 152 }, { "epoch": 0.028033130062801493, "grad_norm": 9.625, "learning_rate": 4.999804500894322e-06, "loss": 1.5720603466033936, "step": 154 }, { "epoch": 0.02839719668699372, "grad_norm": 11.75, "learning_rate": 4.999793642057741e-06, "loss": 1.8015835285186768, "step": 156 }, { "epoch": 0.028761263311185947, "grad_norm": 12.5, "learning_rate": 4.999782489759548e-06, "loss": 1.7068777084350586, "step": 158 }, { "epoch": 0.029125329935378173, "grad_norm": 8.125, "learning_rate": 4.999771044001378e-06, "loss": 1.7578529119491577, "step": 160 }, { "epoch": 0.0294893965595704, "grad_norm": 10.3125, "learning_rate": 4.999759304784912e-06, "loss": 1.7013229131698608, "step": 162 }, { "epoch": 0.029853463183762627, "grad_norm": 5.75, "learning_rate": 4.999747272111874e-06, "loss": 1.1762781143188477, "step": 164 }, { "epoch": 0.030217529807954856, "grad_norm": 13.1875, "learning_rate": 4.999734945984026e-06, "loss": 1.6700516939163208, "step": 166 }, { "epoch": 0.030581596432147085, "grad_norm": 16.25, "learning_rate": 4.9997223264031805e-06, "loss": 1.02428138256073, "step": 168 }, { "epoch": 0.03094566305633931, "grad_norm": 9.6875, "learning_rate": 4.999709413371187e-06, "loss": 1.6712379455566406, "step": 170 }, { "epoch": 0.031309729680531535, "grad_norm": 34.5, "learning_rate": 4.999696206889942e-06, "loss": 1.5965200662612915, "step": 172 }, { "epoch": 0.031673796304723764, "grad_norm": 63.25, "learning_rate": 4.999682706961381e-06, "loss": 1.1542786359786987, "step": 174 }, { "epoch": 0.03203786292891599, "grad_norm": 14.0625, "learning_rate": 4.999668913587488e-06, "loss": 1.7522170543670654, "step": 176 }, { "epoch": 0.03240192955310822, "grad_norm": 22.25, "learning_rate": 4.999654826770285e-06, "loss": 1.4951016902923584, "step": 178 }, { "epoch": 0.032765996177300444, "grad_norm": 5.875, "learning_rate": 4.999640446511841e-06, "loss": 1.3878135681152344, "step": 180 }, { "epoch": 0.03313006280149267, "grad_norm": 42.0, "learning_rate": 4.999625772814265e-06, "loss": 2.111569404602051, "step": 182 }, { "epoch": 0.0334941294256849, "grad_norm": 4.9375, "learning_rate": 4.99961080567971e-06, "loss": 1.096387267112732, "step": 184 }, { "epoch": 0.03385819604987713, "grad_norm": 8.75, "learning_rate": 4.999595545110374e-06, "loss": 1.646989107131958, "step": 186 }, { "epoch": 0.03422226267406935, "grad_norm": 2.53125, "learning_rate": 4.999579991108495e-06, "loss": 1.101284384727478, "step": 188 }, { "epoch": 0.03458632929826158, "grad_norm": 13.3125, "learning_rate": 4.999564143676355e-06, "loss": 1.894997000694275, "step": 190 }, { "epoch": 0.03495039592245381, "grad_norm": 9.6875, "learning_rate": 4.999548002816283e-06, "loss": 1.7325177192687988, "step": 192 }, { "epoch": 0.03531446254664604, "grad_norm": 39.5, "learning_rate": 4.999531568530642e-06, "loss": 1.519869089126587, "step": 194 }, { "epoch": 0.03567852917083826, "grad_norm": 14.0, "learning_rate": 4.999514840821847e-06, "loss": 1.671815276145935, "step": 196 }, { "epoch": 0.03604259579503049, "grad_norm": 6.34375, "learning_rate": 4.9994978196923545e-06, "loss": 1.409099817276001, "step": 198 }, { "epoch": 0.03640666241922272, "grad_norm": 23.25, "learning_rate": 4.99948050514466e-06, "loss": 1.731999158859253, "step": 200 }, { "epoch": 0.03677072904341495, "grad_norm": 5.6875, "learning_rate": 4.999462897181303e-06, "loss": 0.8340705633163452, "step": 202 }, { "epoch": 0.03713479566760717, "grad_norm": 38.75, "learning_rate": 4.99944499580487e-06, "loss": 0.9876308441162109, "step": 204 }, { "epoch": 0.0374988622917994, "grad_norm": 8.875, "learning_rate": 4.999426801017987e-06, "loss": 1.5689787864685059, "step": 206 }, { "epoch": 0.03786292891599163, "grad_norm": 4.3125, "learning_rate": 4.999408312823323e-06, "loss": 1.3400174379348755, "step": 208 }, { "epoch": 0.038226995540183856, "grad_norm": 3.703125, "learning_rate": 4.9993895312235915e-06, "loss": 1.0843534469604492, "step": 210 }, { "epoch": 0.03859106216437608, "grad_norm": 22.25, "learning_rate": 4.99937045622155e-06, "loss": 1.7182073593139648, "step": 212 }, { "epoch": 0.038955128788568306, "grad_norm": 8.75, "learning_rate": 4.999351087819998e-06, "loss": 0.9828606247901917, "step": 214 }, { "epoch": 0.039319195412760535, "grad_norm": 12.0, "learning_rate": 4.9993314260217755e-06, "loss": 1.2823659181594849, "step": 216 }, { "epoch": 0.039683262036952764, "grad_norm": 89.0, "learning_rate": 4.999311470829769e-06, "loss": 1.8203669786453247, "step": 218 }, { "epoch": 0.040047328661144986, "grad_norm": 13.0625, "learning_rate": 4.999291222246906e-06, "loss": 1.7533475160598755, "step": 220 }, { "epoch": 0.040411395285337215, "grad_norm": 3.4375, "learning_rate": 4.999270680276159e-06, "loss": 1.1587835550308228, "step": 222 }, { "epoch": 0.040775461909529444, "grad_norm": 14.125, "learning_rate": 4.999249844920542e-06, "loss": 1.6403859853744507, "step": 224 }, { "epoch": 0.04113952853372167, "grad_norm": 19.375, "learning_rate": 4.999228716183112e-06, "loss": 1.605838418006897, "step": 226 }, { "epoch": 0.0415035951579139, "grad_norm": 22.0, "learning_rate": 4.999207294066971e-06, "loss": 1.7368394136428833, "step": 228 }, { "epoch": 0.04186766178210612, "grad_norm": 8.3125, "learning_rate": 4.999185578575261e-06, "loss": 1.2459367513656616, "step": 230 }, { "epoch": 0.04223172840629835, "grad_norm": 7.65625, "learning_rate": 4.9991635697111695e-06, "loss": 1.6125494241714478, "step": 232 }, { "epoch": 0.04259579503049058, "grad_norm": 11.375, "learning_rate": 4.999141267477926e-06, "loss": 1.3983747959136963, "step": 234 }, { "epoch": 0.04295986165468281, "grad_norm": 32.0, "learning_rate": 4.999118671878803e-06, "loss": 2.105119466781616, "step": 236 }, { "epoch": 0.04332392827887503, "grad_norm": 4.65625, "learning_rate": 4.999095782917118e-06, "loss": 0.8702592849731445, "step": 238 }, { "epoch": 0.04368799490306726, "grad_norm": 12.125, "learning_rate": 4.999072600596226e-06, "loss": 1.6933553218841553, "step": 240 }, { "epoch": 0.04405206152725949, "grad_norm": 4.53125, "learning_rate": 4.999049124919534e-06, "loss": 0.9142586588859558, "step": 242 }, { "epoch": 0.04441612815145172, "grad_norm": 17.875, "learning_rate": 4.999025355890482e-06, "loss": 1.405271291732788, "step": 244 }, { "epoch": 0.04478019477564394, "grad_norm": 25.75, "learning_rate": 4.999001293512562e-06, "loss": 1.8398109674453735, "step": 246 }, { "epoch": 0.04514426139983617, "grad_norm": 14.0, "learning_rate": 4.998976937789304e-06, "loss": 1.8075313568115234, "step": 248 }, { "epoch": 0.0455083280240284, "grad_norm": 5.8125, "learning_rate": 4.9989522887242806e-06, "loss": 1.103116750717163, "step": 250 }, { "epoch": 0.04587239464822063, "grad_norm": 6.96875, "learning_rate": 4.99892734632111e-06, "loss": 1.1777992248535156, "step": 252 }, { "epoch": 0.04623646127241285, "grad_norm": 13.5625, "learning_rate": 4.9989021105834515e-06, "loss": 2.266021251678467, "step": 254 }, { "epoch": 0.04660052789660508, "grad_norm": 16.625, "learning_rate": 4.99887658151501e-06, "loss": 1.7739362716674805, "step": 256 }, { "epoch": 0.046964594520797306, "grad_norm": 25.75, "learning_rate": 4.99885075911953e-06, "loss": 0.9575203061103821, "step": 258 }, { "epoch": 0.047328661144989535, "grad_norm": 10.5, "learning_rate": 4.9988246434008025e-06, "loss": 1.6021323204040527, "step": 260 }, { "epoch": 0.04769272776918176, "grad_norm": 20.125, "learning_rate": 4.998798234362659e-06, "loss": 2.106436252593994, "step": 262 }, { "epoch": 0.048056794393373986, "grad_norm": 8.5625, "learning_rate": 4.998771532008974e-06, "loss": 1.1828550100326538, "step": 264 }, { "epoch": 0.048420861017566215, "grad_norm": 52.25, "learning_rate": 4.998744536343669e-06, "loss": 1.099178671836853, "step": 266 }, { "epoch": 0.048784927641758444, "grad_norm": 9.75, "learning_rate": 4.998717247370703e-06, "loss": 1.6490399837493896, "step": 268 }, { "epoch": 0.049148994265950666, "grad_norm": 18.75, "learning_rate": 4.998689665094079e-06, "loss": 0.9570120573043823, "step": 270 }, { "epoch": 0.049513060890142895, "grad_norm": 5.375, "learning_rate": 4.998661789517849e-06, "loss": 1.5607397556304932, "step": 272 }, { "epoch": 0.04987712751433512, "grad_norm": 20.375, "learning_rate": 4.998633620646101e-06, "loss": 0.7462946772575378, "step": 274 }, { "epoch": 0.05024119413852735, "grad_norm": 2.75, "learning_rate": 4.998605158482967e-06, "loss": 0.7914036512374878, "step": 276 }, { "epoch": 0.05060526076271958, "grad_norm": 7.9375, "learning_rate": 4.998576403032628e-06, "loss": 1.245314121246338, "step": 278 }, { "epoch": 0.0509693273869118, "grad_norm": 36.0, "learning_rate": 4.9985473542993e-06, "loss": 0.9020047187805176, "step": 280 }, { "epoch": 0.05133339401110403, "grad_norm": 34.0, "learning_rate": 4.998518012287248e-06, "loss": 1.66724693775177, "step": 282 }, { "epoch": 0.05169746063529626, "grad_norm": 17.875, "learning_rate": 4.998488377000776e-06, "loss": 1.7929542064666748, "step": 284 }, { "epoch": 0.05206152725948849, "grad_norm": 19.125, "learning_rate": 4.998458448444235e-06, "loss": 1.9806444644927979, "step": 286 }, { "epoch": 0.05242559388368071, "grad_norm": 4.09375, "learning_rate": 4.998428226622014e-06, "loss": 1.0485785007476807, "step": 288 }, { "epoch": 0.05278966050787294, "grad_norm": 26.0, "learning_rate": 4.9983977115385505e-06, "loss": 2.3810715675354004, "step": 290 }, { "epoch": 0.05315372713206517, "grad_norm": 60.0, "learning_rate": 4.998366903198323e-06, "loss": 2.1565098762512207, "step": 292 }, { "epoch": 0.0535177937562574, "grad_norm": 32.75, "learning_rate": 4.9983358016058494e-06, "loss": 1.9635474681854248, "step": 294 }, { "epoch": 0.05388186038044962, "grad_norm": 6.5, "learning_rate": 4.998304406765696e-06, "loss": 1.1579265594482422, "step": 296 }, { "epoch": 0.05424592700464185, "grad_norm": 10.5625, "learning_rate": 4.99827271868247e-06, "loss": 1.6695032119750977, "step": 298 }, { "epoch": 0.05460999362883408, "grad_norm": 12.5625, "learning_rate": 4.998240737360819e-06, "loss": 1.888932228088379, "step": 300 }, { "epoch": 0.054974060253026306, "grad_norm": 3.40625, "learning_rate": 4.99820846280544e-06, "loss": 1.097129225730896, "step": 302 }, { "epoch": 0.05533812687721853, "grad_norm": 9.0, "learning_rate": 4.998175895021066e-06, "loss": 1.0700342655181885, "step": 304 }, { "epoch": 0.05570219350141076, "grad_norm": 9.8125, "learning_rate": 4.998143034012478e-06, "loss": 1.1494324207305908, "step": 306 }, { "epoch": 0.056066260125602986, "grad_norm": 12.5625, "learning_rate": 4.998109879784496e-06, "loss": 1.8598127365112305, "step": 308 }, { "epoch": 0.056430326749795215, "grad_norm": 19.625, "learning_rate": 4.998076432341988e-06, "loss": 1.6065984964370728, "step": 310 }, { "epoch": 0.05679439337398744, "grad_norm": 45.5, "learning_rate": 4.998042691689862e-06, "loss": 0.9418438673019409, "step": 312 }, { "epoch": 0.057158459998179666, "grad_norm": 10.9375, "learning_rate": 4.998008657833067e-06, "loss": 1.579975962638855, "step": 314 }, { "epoch": 0.057522526622371895, "grad_norm": 11.9375, "learning_rate": 4.997974330776598e-06, "loss": 1.1987617015838623, "step": 316 }, { "epoch": 0.05788659324656412, "grad_norm": 18.0, "learning_rate": 4.9979397105254945e-06, "loss": 2.357426404953003, "step": 318 }, { "epoch": 0.058250659870756345, "grad_norm": 13.9375, "learning_rate": 4.997904797084835e-06, "loss": 1.6827460527420044, "step": 320 }, { "epoch": 0.058614726494948574, "grad_norm": 44.5, "learning_rate": 4.997869590459743e-06, "loss": 0.7791367173194885, "step": 322 }, { "epoch": 0.0589787931191408, "grad_norm": 25.875, "learning_rate": 4.997834090655385e-06, "loss": 1.9080381393432617, "step": 324 }, { "epoch": 0.05934285974333303, "grad_norm": 10.75, "learning_rate": 4.9977982976769715e-06, "loss": 1.536203145980835, "step": 326 }, { "epoch": 0.059706926367525254, "grad_norm": 34.0, "learning_rate": 4.997762211529754e-06, "loss": 1.9274436235427856, "step": 328 }, { "epoch": 0.06007099299171748, "grad_norm": 23.375, "learning_rate": 4.9977258322190285e-06, "loss": 2.073014736175537, "step": 330 }, { "epoch": 0.06043505961590971, "grad_norm": 11.0625, "learning_rate": 4.997689159750132e-06, "loss": 1.7975918054580688, "step": 332 }, { "epoch": 0.06079912624010194, "grad_norm": 10.0625, "learning_rate": 4.997652194128449e-06, "loss": 1.551814317703247, "step": 334 }, { "epoch": 0.06116319286429417, "grad_norm": 15.375, "learning_rate": 4.9976149353594e-06, "loss": 1.4602867364883423, "step": 336 }, { "epoch": 0.06152725948848639, "grad_norm": 20.125, "learning_rate": 4.9975773834484565e-06, "loss": 1.6254475116729736, "step": 338 }, { "epoch": 0.06189132611267862, "grad_norm": 9.6875, "learning_rate": 4.997539538401127e-06, "loss": 1.5822265148162842, "step": 340 }, { "epoch": 0.06225539273687085, "grad_norm": 7.3125, "learning_rate": 4.997501400222966e-06, "loss": 1.3375024795532227, "step": 342 }, { "epoch": 0.06261945936106307, "grad_norm": 12.75, "learning_rate": 4.99746296891957e-06, "loss": 1.5867472887039185, "step": 344 }, { "epoch": 0.0629835259852553, "grad_norm": 10.3125, "learning_rate": 4.997424244496577e-06, "loss": 1.6741087436676025, "step": 346 }, { "epoch": 0.06334759260944753, "grad_norm": 27.875, "learning_rate": 4.997385226959672e-06, "loss": 2.0546278953552246, "step": 348 }, { "epoch": 0.06371165923363975, "grad_norm": 13.5625, "learning_rate": 4.997345916314578e-06, "loss": 1.687022089958191, "step": 350 }, { "epoch": 0.06407572585783199, "grad_norm": 12.875, "learning_rate": 4.997306312567067e-06, "loss": 1.7766467332839966, "step": 352 }, { "epoch": 0.06443979248202421, "grad_norm": 4.34375, "learning_rate": 4.997266415722949e-06, "loss": 1.1123629808425903, "step": 354 }, { "epoch": 0.06480385910621644, "grad_norm": 11.125, "learning_rate": 4.997226225788078e-06, "loss": 1.645906925201416, "step": 356 }, { "epoch": 0.06516792573040867, "grad_norm": 14.875, "learning_rate": 4.997185742768352e-06, "loss": 1.8206772804260254, "step": 358 }, { "epoch": 0.06553199235460089, "grad_norm": 11.0625, "learning_rate": 4.997144966669713e-06, "loss": 1.4990421533584595, "step": 360 }, { "epoch": 0.06589605897879312, "grad_norm": 9.625, "learning_rate": 4.997103897498144e-06, "loss": 1.7342629432678223, "step": 362 }, { "epoch": 0.06626012560298535, "grad_norm": 4.46875, "learning_rate": 4.997062535259672e-06, "loss": 1.2138992547988892, "step": 364 }, { "epoch": 0.06662419222717757, "grad_norm": 3.765625, "learning_rate": 4.997020879960365e-06, "loss": 1.220954418182373, "step": 366 }, { "epoch": 0.0669882588513698, "grad_norm": 4.125, "learning_rate": 4.996978931606338e-06, "loss": 1.4167884588241577, "step": 368 }, { "epoch": 0.06735232547556202, "grad_norm": 4.9375, "learning_rate": 4.996936690203746e-06, "loss": 0.8898102641105652, "step": 370 }, { "epoch": 0.06771639209975426, "grad_norm": 10.1875, "learning_rate": 4.996894155758787e-06, "loss": 1.6967730522155762, "step": 372 }, { "epoch": 0.06808045872394648, "grad_norm": 13.0625, "learning_rate": 4.996851328277703e-06, "loss": 1.5613888502120972, "step": 374 }, { "epoch": 0.0684445253481387, "grad_norm": 20.75, "learning_rate": 4.99680820776678e-06, "loss": 1.6336811780929565, "step": 376 }, { "epoch": 0.06880859197233094, "grad_norm": 25.375, "learning_rate": 4.996764794232344e-06, "loss": 1.7506146430969238, "step": 378 }, { "epoch": 0.06917265859652316, "grad_norm": 11.3125, "learning_rate": 4.996721087680767e-06, "loss": 1.9016811847686768, "step": 380 }, { "epoch": 0.06953672522071538, "grad_norm": 18.625, "learning_rate": 4.9966770881184625e-06, "loss": 1.6010924577713013, "step": 382 }, { "epoch": 0.06990079184490762, "grad_norm": 35.0, "learning_rate": 4.996632795551887e-06, "loss": 1.7810370922088623, "step": 384 }, { "epoch": 0.07026485846909984, "grad_norm": 7.375, "learning_rate": 4.996588209987541e-06, "loss": 1.1643171310424805, "step": 386 }, { "epoch": 0.07062892509329208, "grad_norm": 26.375, "learning_rate": 4.996543331431966e-06, "loss": 0.9697562456130981, "step": 388 }, { "epoch": 0.0709929917174843, "grad_norm": 15.5, "learning_rate": 4.996498159891748e-06, "loss": 1.4735223054885864, "step": 390 }, { "epoch": 0.07135705834167652, "grad_norm": 17.875, "learning_rate": 4.996452695373517e-06, "loss": 1.5924171209335327, "step": 392 }, { "epoch": 0.07172112496586876, "grad_norm": 21.5, "learning_rate": 4.996406937883944e-06, "loss": 1.2968168258666992, "step": 394 }, { "epoch": 0.07208519159006098, "grad_norm": 11.0, "learning_rate": 4.996360887429743e-06, "loss": 1.7380396127700806, "step": 396 }, { "epoch": 0.07244925821425321, "grad_norm": 10.6875, "learning_rate": 4.996314544017672e-06, "loss": 1.675560474395752, "step": 398 }, { "epoch": 0.07281332483844544, "grad_norm": 4.5625, "learning_rate": 4.9962679076545325e-06, "loss": 0.8685811161994934, "step": 400 }, { "epoch": 0.07317739146263766, "grad_norm": 86.0, "learning_rate": 4.9962209783471685e-06, "loss": 2.0414555072784424, "step": 402 }, { "epoch": 0.0735414580868299, "grad_norm": 21.75, "learning_rate": 4.9961737561024645e-06, "loss": 1.722549557685852, "step": 404 }, { "epoch": 0.07390552471102212, "grad_norm": 7.0, "learning_rate": 4.996126240927353e-06, "loss": 1.5973738431930542, "step": 406 }, { "epoch": 0.07426959133521434, "grad_norm": 8.0, "learning_rate": 4.996078432828804e-06, "loss": 1.7757899761199951, "step": 408 }, { "epoch": 0.07463365795940657, "grad_norm": 15.375, "learning_rate": 4.9960303318138345e-06, "loss": 1.5636049509048462, "step": 410 }, { "epoch": 0.0749977245835988, "grad_norm": 21.375, "learning_rate": 4.995981937889503e-06, "loss": 1.7934061288833618, "step": 412 }, { "epoch": 0.07536179120779103, "grad_norm": 7.15625, "learning_rate": 4.995933251062911e-06, "loss": 1.1408581733703613, "step": 414 }, { "epoch": 0.07572585783198325, "grad_norm": 60.5, "learning_rate": 4.9958842713412045e-06, "loss": 1.3937311172485352, "step": 416 }, { "epoch": 0.07608992445617548, "grad_norm": 24.25, "learning_rate": 4.9958349987315694e-06, "loss": 2.245112895965576, "step": 418 }, { "epoch": 0.07645399108036771, "grad_norm": 14.4375, "learning_rate": 4.9957854332412355e-06, "loss": 1.5475777387619019, "step": 420 }, { "epoch": 0.07681805770455993, "grad_norm": 26.125, "learning_rate": 4.995735574877479e-06, "loss": 1.450985074043274, "step": 422 }, { "epoch": 0.07718212432875216, "grad_norm": 10.5, "learning_rate": 4.995685423647614e-06, "loss": 1.5756415128707886, "step": 424 }, { "epoch": 0.07754619095294439, "grad_norm": 3.6875, "learning_rate": 4.995634979559001e-06, "loss": 1.1469438076019287, "step": 426 }, { "epoch": 0.07791025757713661, "grad_norm": 13.75, "learning_rate": 4.995584242619042e-06, "loss": 1.561311960220337, "step": 428 }, { "epoch": 0.07827432420132885, "grad_norm": 24.625, "learning_rate": 4.995533212835183e-06, "loss": 1.953911542892456, "step": 430 }, { "epoch": 0.07863839082552107, "grad_norm": 17.75, "learning_rate": 4.995481890214912e-06, "loss": 1.5231664180755615, "step": 432 }, { "epoch": 0.07900245744971329, "grad_norm": 10.0625, "learning_rate": 4.995430274765761e-06, "loss": 1.6239662170410156, "step": 434 }, { "epoch": 0.07936652407390553, "grad_norm": 9.3125, "learning_rate": 4.9953783664953035e-06, "loss": 1.6825274229049683, "step": 436 }, { "epoch": 0.07973059069809775, "grad_norm": 11.875, "learning_rate": 4.995326165411158e-06, "loss": 1.6108781099319458, "step": 438 }, { "epoch": 0.08009465732228997, "grad_norm": 16.375, "learning_rate": 4.995273671520984e-06, "loss": 1.752515196800232, "step": 440 }, { "epoch": 0.08045872394648221, "grad_norm": 2.5625, "learning_rate": 4.995220884832484e-06, "loss": 1.2974646091461182, "step": 442 }, { "epoch": 0.08082279057067443, "grad_norm": 23.375, "learning_rate": 4.995167805353406e-06, "loss": 2.2482070922851562, "step": 444 }, { "epoch": 0.08118685719486667, "grad_norm": 8.0625, "learning_rate": 4.995114433091538e-06, "loss": 1.6542468070983887, "step": 446 }, { "epoch": 0.08155092381905889, "grad_norm": 6.65625, "learning_rate": 4.995060768054711e-06, "loss": 0.9779666066169739, "step": 448 }, { "epoch": 0.08191499044325111, "grad_norm": 14.625, "learning_rate": 4.995006810250804e-06, "loss": 1.1051747798919678, "step": 450 }, { "epoch": 0.08227905706744335, "grad_norm": 8.5, "learning_rate": 4.9949525596877315e-06, "loss": 1.5198476314544678, "step": 452 }, { "epoch": 0.08264312369163557, "grad_norm": 32.75, "learning_rate": 4.994898016373455e-06, "loss": 2.0264604091644287, "step": 454 }, { "epoch": 0.0830071903158278, "grad_norm": 12.875, "learning_rate": 4.99484318031598e-06, "loss": 1.7488818168640137, "step": 456 }, { "epoch": 0.08337125694002002, "grad_norm": 19.75, "learning_rate": 4.994788051523353e-06, "loss": 0.8572578430175781, "step": 458 }, { "epoch": 0.08373532356421225, "grad_norm": 10.5, "learning_rate": 4.994732630003663e-06, "loss": 1.4364445209503174, "step": 460 }, { "epoch": 0.08409939018840448, "grad_norm": 73.5, "learning_rate": 4.994676915765044e-06, "loss": 2.2151713371276855, "step": 462 }, { "epoch": 0.0844634568125967, "grad_norm": 11.0625, "learning_rate": 4.994620908815672e-06, "loss": 1.579387903213501, "step": 464 }, { "epoch": 0.08482752343678893, "grad_norm": 22.0, "learning_rate": 4.994564609163763e-06, "loss": 2.0482544898986816, "step": 466 }, { "epoch": 0.08519159006098116, "grad_norm": 20.25, "learning_rate": 4.994508016817582e-06, "loss": 1.6465792655944824, "step": 468 }, { "epoch": 0.08555565668517338, "grad_norm": 28.75, "learning_rate": 4.9944511317854325e-06, "loss": 1.4567583799362183, "step": 470 }, { "epoch": 0.08591972330936562, "grad_norm": 20.375, "learning_rate": 4.994393954075663e-06, "loss": 1.0225732326507568, "step": 472 }, { "epoch": 0.08628378993355784, "grad_norm": 12.5625, "learning_rate": 4.994336483696663e-06, "loss": 1.2597174644470215, "step": 474 }, { "epoch": 0.08664785655775006, "grad_norm": 10.625, "learning_rate": 4.994278720656865e-06, "loss": 0.9398125410079956, "step": 476 }, { "epoch": 0.0870119231819423, "grad_norm": 4.3125, "learning_rate": 4.9942206649647485e-06, "loss": 1.1096361875534058, "step": 478 }, { "epoch": 0.08737598980613452, "grad_norm": 11.3125, "learning_rate": 4.99416231662883e-06, "loss": 1.516744613647461, "step": 480 }, { "epoch": 0.08774005643032674, "grad_norm": 8.375, "learning_rate": 4.9941036756576746e-06, "loss": 1.6033967733383179, "step": 482 }, { "epoch": 0.08810412305451898, "grad_norm": 48.5, "learning_rate": 4.994044742059885e-06, "loss": 1.4961059093475342, "step": 484 }, { "epoch": 0.0884681896787112, "grad_norm": 30.625, "learning_rate": 4.993985515844111e-06, "loss": 1.3461837768554688, "step": 486 }, { "epoch": 0.08883225630290344, "grad_norm": 25.625, "learning_rate": 4.993925997019044e-06, "loss": 1.5667678117752075, "step": 488 }, { "epoch": 0.08919632292709566, "grad_norm": 8.875, "learning_rate": 4.993866185593417e-06, "loss": 1.6020535230636597, "step": 490 }, { "epoch": 0.08956038955128788, "grad_norm": 21.75, "learning_rate": 4.993806081576007e-06, "loss": 2.1408867835998535, "step": 492 }, { "epoch": 0.08992445617548012, "grad_norm": 20.125, "learning_rate": 4.993745684975636e-06, "loss": 1.9854199886322021, "step": 494 }, { "epoch": 0.09028852279967234, "grad_norm": 24.875, "learning_rate": 4.9936849958011645e-06, "loss": 2.068175792694092, "step": 496 }, { "epoch": 0.09065258942386457, "grad_norm": 14.5, "learning_rate": 4.993624014061501e-06, "loss": 0.9099866151809692, "step": 498 }, { "epoch": 0.0910166560480568, "grad_norm": 7.4375, "learning_rate": 4.993562739765593e-06, "loss": 1.1208518743515015, "step": 500 }, { "epoch": 0.09138072267224902, "grad_norm": 20.75, "learning_rate": 4.99350117292243e-06, "loss": 1.3898968696594238, "step": 502 }, { "epoch": 0.09174478929644125, "grad_norm": 6.0, "learning_rate": 4.993439313541051e-06, "loss": 0.9804521799087524, "step": 504 }, { "epoch": 0.09210885592063348, "grad_norm": 7.65625, "learning_rate": 4.9933771616305304e-06, "loss": 1.688679575920105, "step": 506 }, { "epoch": 0.0924729225448257, "grad_norm": 20.25, "learning_rate": 4.99331471719999e-06, "loss": 0.8258498311042786, "step": 508 }, { "epoch": 0.09283698916901793, "grad_norm": 18.125, "learning_rate": 4.993251980258592e-06, "loss": 1.9254039525985718, "step": 510 }, { "epoch": 0.09320105579321016, "grad_norm": 8.0, "learning_rate": 4.993188950815545e-06, "loss": 1.6451748609542847, "step": 512 }, { "epoch": 0.09356512241740239, "grad_norm": 12.375, "learning_rate": 4.993125628880098e-06, "loss": 1.6198679208755493, "step": 514 }, { "epoch": 0.09392918904159461, "grad_norm": 15.6875, "learning_rate": 4.993062014461542e-06, "loss": 1.0850889682769775, "step": 516 }, { "epoch": 0.09429325566578683, "grad_norm": 8.9375, "learning_rate": 4.9929981075692115e-06, "loss": 1.705244779586792, "step": 518 }, { "epoch": 0.09465732228997907, "grad_norm": 9.9375, "learning_rate": 4.992933908212485e-06, "loss": 1.6756541728973389, "step": 520 }, { "epoch": 0.09502138891417129, "grad_norm": 15.5, "learning_rate": 4.992869416400785e-06, "loss": 1.7981197834014893, "step": 522 }, { "epoch": 0.09538545553836351, "grad_norm": 37.0, "learning_rate": 4.992804632143575e-06, "loss": 2.236433982849121, "step": 524 }, { "epoch": 0.09574952216255575, "grad_norm": 14.8125, "learning_rate": 4.992739555450361e-06, "loss": 1.0820854902267456, "step": 526 }, { "epoch": 0.09611358878674797, "grad_norm": 8.3125, "learning_rate": 4.992674186330694e-06, "loss": 1.7297701835632324, "step": 528 }, { "epoch": 0.09647765541094021, "grad_norm": 14.3125, "learning_rate": 4.992608524794165e-06, "loss": 1.5572032928466797, "step": 530 }, { "epoch": 0.09684172203513243, "grad_norm": 5.84375, "learning_rate": 4.99254257085041e-06, "loss": 1.0914921760559082, "step": 532 }, { "epoch": 0.09720578865932465, "grad_norm": 5.90625, "learning_rate": 4.992476324509108e-06, "loss": 1.2033464908599854, "step": 534 }, { "epoch": 0.09756985528351689, "grad_norm": 115.5, "learning_rate": 4.99240978577998e-06, "loss": 1.4244115352630615, "step": 536 }, { "epoch": 0.09793392190770911, "grad_norm": 9.125, "learning_rate": 4.992342954672791e-06, "loss": 1.5869569778442383, "step": 538 }, { "epoch": 0.09829798853190133, "grad_norm": 7.21875, "learning_rate": 4.992275831197347e-06, "loss": 1.1813812255859375, "step": 540 }, { "epoch": 0.09866205515609357, "grad_norm": 9.1875, "learning_rate": 4.9922084153635e-06, "loss": 1.3222036361694336, "step": 542 }, { "epoch": 0.09902612178028579, "grad_norm": 7.8125, "learning_rate": 4.99214070718114e-06, "loss": 1.5701090097427368, "step": 544 }, { "epoch": 0.09939018840447802, "grad_norm": 11.4375, "learning_rate": 4.992072706660206e-06, "loss": 1.4453173875808716, "step": 546 }, { "epoch": 0.09975425502867025, "grad_norm": 4.78125, "learning_rate": 4.9920044138106745e-06, "loss": 1.21589195728302, "step": 548 }, { "epoch": 0.10011832165286247, "grad_norm": 17.25, "learning_rate": 4.991935828642569e-06, "loss": 1.4480749368667603, "step": 550 }, { "epoch": 0.1004823882770547, "grad_norm": 59.75, "learning_rate": 4.991866951165954e-06, "loss": 0.8703964948654175, "step": 552 }, { "epoch": 0.10084645490124693, "grad_norm": 17.375, "learning_rate": 4.991797781390935e-06, "loss": 1.4682190418243408, "step": 554 }, { "epoch": 0.10121052152543916, "grad_norm": 10.75, "learning_rate": 4.991728319327664e-06, "loss": 1.9051607847213745, "step": 556 }, { "epoch": 0.10157458814963138, "grad_norm": 15.25, "learning_rate": 4.9916585649863335e-06, "loss": 1.9129139184951782, "step": 558 }, { "epoch": 0.1019386547738236, "grad_norm": 12.625, "learning_rate": 4.99158851837718e-06, "loss": 1.4913318157196045, "step": 560 }, { "epoch": 0.10230272139801584, "grad_norm": 28.75, "learning_rate": 4.991518179510483e-06, "loss": 1.8028734922409058, "step": 562 }, { "epoch": 0.10266678802220806, "grad_norm": 19.0, "learning_rate": 4.991447548396564e-06, "loss": 1.1229168176651, "step": 564 }, { "epoch": 0.10303085464640029, "grad_norm": 22.875, "learning_rate": 4.9913766250457885e-06, "loss": 1.4349894523620605, "step": 566 }, { "epoch": 0.10339492127059252, "grad_norm": 9.5, "learning_rate": 4.991305409468562e-06, "loss": 1.4226398468017578, "step": 568 }, { "epoch": 0.10375898789478474, "grad_norm": 7.1875, "learning_rate": 4.9912339016753375e-06, "loss": 0.8516451120376587, "step": 570 }, { "epoch": 0.10412305451897698, "grad_norm": 51.0, "learning_rate": 4.9911621016766085e-06, "loss": 1.5673589706420898, "step": 572 }, { "epoch": 0.1044871211431692, "grad_norm": 9.75, "learning_rate": 4.99109000948291e-06, "loss": 1.6054155826568604, "step": 574 }, { "epoch": 0.10485118776736142, "grad_norm": 18.375, "learning_rate": 4.991017625104821e-06, "loss": 1.466247797012329, "step": 576 }, { "epoch": 0.10521525439155366, "grad_norm": 7.625, "learning_rate": 4.990944948552966e-06, "loss": 1.7692545652389526, "step": 578 }, { "epoch": 0.10557932101574588, "grad_norm": 28.875, "learning_rate": 4.990871979838008e-06, "loss": 2.2233469486236572, "step": 580 }, { "epoch": 0.1059433876399381, "grad_norm": 11.0, "learning_rate": 4.990798718970654e-06, "loss": 1.4929414987564087, "step": 582 }, { "epoch": 0.10630745426413034, "grad_norm": 10.0, "learning_rate": 4.990725165961658e-06, "loss": 1.7463886737823486, "step": 584 }, { "epoch": 0.10667152088832256, "grad_norm": 15.5, "learning_rate": 4.99065132082181e-06, "loss": 1.547139286994934, "step": 586 }, { "epoch": 0.1070355875125148, "grad_norm": 14.75, "learning_rate": 4.990577183561949e-06, "loss": 1.3375365734100342, "step": 588 }, { "epoch": 0.10739965413670702, "grad_norm": 6.59375, "learning_rate": 4.990502754192952e-06, "loss": 1.1098231077194214, "step": 590 }, { "epoch": 0.10776372076089924, "grad_norm": 25.375, "learning_rate": 4.9904280327257435e-06, "loss": 1.6693047285079956, "step": 592 }, { "epoch": 0.10812778738509148, "grad_norm": 2.796875, "learning_rate": 4.9903530191712875e-06, "loss": 1.3150269985198975, "step": 594 }, { "epoch": 0.1084918540092837, "grad_norm": 97.0, "learning_rate": 4.990277713540594e-06, "loss": 1.7677597999572754, "step": 596 }, { "epoch": 0.10885592063347592, "grad_norm": 21.75, "learning_rate": 4.990202115844709e-06, "loss": 1.8721543550491333, "step": 598 }, { "epoch": 0.10921998725766816, "grad_norm": 7.59375, "learning_rate": 4.99012622609473e-06, "loss": 1.2447410821914673, "step": 600 }, { "epoch": 0.10958405388186038, "grad_norm": 13.25, "learning_rate": 4.990050044301794e-06, "loss": 1.3232799768447876, "step": 602 }, { "epoch": 0.10994812050605261, "grad_norm": 9.125, "learning_rate": 4.989973570477078e-06, "loss": 1.4806007146835327, "step": 604 }, { "epoch": 0.11031218713024483, "grad_norm": 9.125, "learning_rate": 4.9898968046318045e-06, "loss": 1.5398962497711182, "step": 606 }, { "epoch": 0.11067625375443706, "grad_norm": 15.0625, "learning_rate": 4.98981974677724e-06, "loss": 1.6886669397354126, "step": 608 }, { "epoch": 0.11104032037862929, "grad_norm": 9.25, "learning_rate": 4.989742396924691e-06, "loss": 1.5922999382019043, "step": 610 }, { "epoch": 0.11140438700282151, "grad_norm": 11.5625, "learning_rate": 4.9896647550855105e-06, "loss": 1.2320183515548706, "step": 612 }, { "epoch": 0.11176845362701375, "grad_norm": 6.96875, "learning_rate": 4.9895868212710895e-06, "loss": 1.351425051689148, "step": 614 }, { "epoch": 0.11213252025120597, "grad_norm": 10.6875, "learning_rate": 4.989508595492866e-06, "loss": 1.5891644954681396, "step": 616 }, { "epoch": 0.1124965868753982, "grad_norm": 3.8125, "learning_rate": 4.989430077762318e-06, "loss": 1.2235267162322998, "step": 618 }, { "epoch": 0.11286065349959043, "grad_norm": 10.875, "learning_rate": 4.9893512680909695e-06, "loss": 1.5127015113830566, "step": 620 }, { "epoch": 0.11322472012378265, "grad_norm": 12.4375, "learning_rate": 4.9892721664903845e-06, "loss": 1.6188157796859741, "step": 622 }, { "epoch": 0.11358878674797487, "grad_norm": 13.0, "learning_rate": 4.98919277297217e-06, "loss": 1.4947715997695923, "step": 624 }, { "epoch": 0.11395285337216711, "grad_norm": 24.375, "learning_rate": 4.989113087547979e-06, "loss": 1.9253239631652832, "step": 626 }, { "epoch": 0.11431691999635933, "grad_norm": 24.125, "learning_rate": 4.989033110229502e-06, "loss": 1.6219408512115479, "step": 628 }, { "epoch": 0.11468098662055157, "grad_norm": 6.6875, "learning_rate": 4.9889528410284785e-06, "loss": 1.6991894245147705, "step": 630 }, { "epoch": 0.11504505324474379, "grad_norm": 21.625, "learning_rate": 4.9888722799566845e-06, "loss": 1.488581895828247, "step": 632 }, { "epoch": 0.11540911986893601, "grad_norm": 32.75, "learning_rate": 4.988791427025944e-06, "loss": 1.9470293521881104, "step": 634 }, { "epoch": 0.11577318649312825, "grad_norm": 8.1875, "learning_rate": 4.988710282248122e-06, "loss": 1.5553927421569824, "step": 636 }, { "epoch": 0.11613725311732047, "grad_norm": 9.8125, "learning_rate": 4.988628845635125e-06, "loss": 1.617289423942566, "step": 638 }, { "epoch": 0.11650131974151269, "grad_norm": 13.0625, "learning_rate": 4.988547117198906e-06, "loss": 1.654571294784546, "step": 640 }, { "epoch": 0.11686538636570493, "grad_norm": 22.75, "learning_rate": 4.9884650969514545e-06, "loss": 1.599797010421753, "step": 642 }, { "epoch": 0.11722945298989715, "grad_norm": 9.875, "learning_rate": 4.98838278490481e-06, "loss": 1.4296228885650635, "step": 644 }, { "epoch": 0.11759351961408938, "grad_norm": 10.5625, "learning_rate": 4.988300181071047e-06, "loss": 1.5124624967575073, "step": 646 }, { "epoch": 0.1179575862382816, "grad_norm": 12.25, "learning_rate": 4.9882172854622935e-06, "loss": 1.621030569076538, "step": 648 }, { "epoch": 0.11832165286247383, "grad_norm": 26.25, "learning_rate": 4.988134098090709e-06, "loss": 0.9525178670883179, "step": 650 }, { "epoch": 0.11868571948666606, "grad_norm": 24.875, "learning_rate": 4.988050618968504e-06, "loss": 1.5937832593917847, "step": 652 }, { "epoch": 0.11904978611085829, "grad_norm": 22.5, "learning_rate": 4.987966848107927e-06, "loss": 2.128664016723633, "step": 654 }, { "epoch": 0.11941385273505051, "grad_norm": 7.1875, "learning_rate": 4.9878827855212715e-06, "loss": 1.0092219114303589, "step": 656 }, { "epoch": 0.11977791935924274, "grad_norm": 28.375, "learning_rate": 4.987798431220874e-06, "loss": 1.005147099494934, "step": 658 }, { "epoch": 0.12014198598343497, "grad_norm": 9.75, "learning_rate": 4.987713785219111e-06, "loss": 0.8914632797241211, "step": 660 }, { "epoch": 0.1205060526076272, "grad_norm": 3.8125, "learning_rate": 4.9876288475284076e-06, "loss": 0.9911003112792969, "step": 662 }, { "epoch": 0.12087011923181942, "grad_norm": 11.1875, "learning_rate": 4.987543618161225e-06, "loss": 1.546281337738037, "step": 664 }, { "epoch": 0.12123418585601164, "grad_norm": 10.0, "learning_rate": 4.987458097130071e-06, "loss": 1.531376838684082, "step": 666 }, { "epoch": 0.12159825248020388, "grad_norm": 18.875, "learning_rate": 4.987372284447496e-06, "loss": 1.6790237426757812, "step": 668 }, { "epoch": 0.1219623191043961, "grad_norm": 9.0625, "learning_rate": 4.987286180126093e-06, "loss": 1.620091199874878, "step": 670 }, { "epoch": 0.12232638572858834, "grad_norm": 19.125, "learning_rate": 4.987199784178496e-06, "loss": 2.202967405319214, "step": 672 }, { "epoch": 0.12269045235278056, "grad_norm": 6.9375, "learning_rate": 4.987113096617384e-06, "loss": 1.178342580795288, "step": 674 }, { "epoch": 0.12305451897697278, "grad_norm": 16.375, "learning_rate": 4.987026117455479e-06, "loss": 1.2810454368591309, "step": 676 }, { "epoch": 0.12341858560116502, "grad_norm": 16.25, "learning_rate": 4.986938846705544e-06, "loss": 1.3532731533050537, "step": 678 }, { "epoch": 0.12378265222535724, "grad_norm": 9.75, "learning_rate": 4.986851284380384e-06, "loss": 1.5397684574127197, "step": 680 }, { "epoch": 0.12414671884954946, "grad_norm": 8.8125, "learning_rate": 4.986763430492851e-06, "loss": 1.404807209968567, "step": 682 }, { "epoch": 0.1245107854737417, "grad_norm": 13.625, "learning_rate": 4.9866752850558365e-06, "loss": 0.9748321771621704, "step": 684 }, { "epoch": 0.12487485209793392, "grad_norm": 8.5, "learning_rate": 4.986586848082274e-06, "loss": 1.6022968292236328, "step": 686 }, { "epoch": 0.12523891872212614, "grad_norm": 10.375, "learning_rate": 4.986498119585145e-06, "loss": 1.5184335708618164, "step": 688 }, { "epoch": 0.12560298534631836, "grad_norm": 8.1875, "learning_rate": 4.986409099577465e-06, "loss": 1.596593976020813, "step": 690 }, { "epoch": 0.1259670519705106, "grad_norm": 8.4375, "learning_rate": 4.986319788072301e-06, "loss": 1.2138594388961792, "step": 692 }, { "epoch": 0.12633111859470283, "grad_norm": 4.9375, "learning_rate": 4.986230185082758e-06, "loss": 1.2089152336120605, "step": 694 }, { "epoch": 0.12669518521889506, "grad_norm": 19.625, "learning_rate": 4.986140290621985e-06, "loss": 1.5043275356292725, "step": 696 }, { "epoch": 0.12705925184308728, "grad_norm": 7.78125, "learning_rate": 4.986050104703173e-06, "loss": 1.502524971961975, "step": 698 }, { "epoch": 0.1274233184672795, "grad_norm": 14.875, "learning_rate": 4.985959627339556e-06, "loss": 2.0593042373657227, "step": 700 }, { "epoch": 0.12778738509147175, "grad_norm": 15.875, "learning_rate": 4.985868858544413e-06, "loss": 1.4950681924819946, "step": 702 }, { "epoch": 0.12815145171566397, "grad_norm": 7.8125, "learning_rate": 4.985777798331063e-06, "loss": 1.127295732498169, "step": 704 }, { "epoch": 0.1285155183398562, "grad_norm": 8.4375, "learning_rate": 4.9856864467128694e-06, "loss": 1.2744174003601074, "step": 706 }, { "epoch": 0.12887958496404842, "grad_norm": 29.875, "learning_rate": 4.9855948037032365e-06, "loss": 1.8374284505844116, "step": 708 }, { "epoch": 0.12924365158824064, "grad_norm": 7.46875, "learning_rate": 4.985502869315613e-06, "loss": 0.994347095489502, "step": 710 }, { "epoch": 0.1296077182124329, "grad_norm": 6.75, "learning_rate": 4.98541064356349e-06, "loss": 1.5544590950012207, "step": 712 }, { "epoch": 0.1299717848366251, "grad_norm": 18.875, "learning_rate": 4.985318126460401e-06, "loss": 0.7793824672698975, "step": 714 }, { "epoch": 0.13033585146081733, "grad_norm": 7.625, "learning_rate": 4.985225318019923e-06, "loss": 1.461126446723938, "step": 716 }, { "epoch": 0.13069991808500955, "grad_norm": 23.25, "learning_rate": 4.985132218255675e-06, "loss": 1.0607283115386963, "step": 718 }, { "epoch": 0.13106398470920178, "grad_norm": 15.0, "learning_rate": 4.9850388271813185e-06, "loss": 1.913686990737915, "step": 720 }, { "epoch": 0.13142805133339402, "grad_norm": 11.625, "learning_rate": 4.984945144810559e-06, "loss": 1.60567307472229, "step": 722 }, { "epoch": 0.13179211795758625, "grad_norm": 9.125, "learning_rate": 4.9848511711571444e-06, "loss": 1.5132880210876465, "step": 724 }, { "epoch": 0.13215618458177847, "grad_norm": 10.8125, "learning_rate": 4.984756906234863e-06, "loss": 1.8271973133087158, "step": 726 }, { "epoch": 0.1325202512059707, "grad_norm": 15.625, "learning_rate": 4.984662350057551e-06, "loss": 1.8523712158203125, "step": 728 }, { "epoch": 0.1328843178301629, "grad_norm": 12.875, "learning_rate": 4.984567502639082e-06, "loss": 0.896437406539917, "step": 730 }, { "epoch": 0.13324838445435513, "grad_norm": 4.28125, "learning_rate": 4.984472363993373e-06, "loss": 1.0899426937103271, "step": 732 }, { "epoch": 0.13361245107854738, "grad_norm": 11.625, "learning_rate": 4.984376934134388e-06, "loss": 1.4315885305404663, "step": 734 }, { "epoch": 0.1339765177027396, "grad_norm": 6.5625, "learning_rate": 4.98428121307613e-06, "loss": 1.5829685926437378, "step": 736 }, { "epoch": 0.13434058432693183, "grad_norm": 12.375, "learning_rate": 4.984185200832645e-06, "loss": 1.468256950378418, "step": 738 }, { "epoch": 0.13470465095112405, "grad_norm": 12.75, "learning_rate": 4.984088897418024e-06, "loss": 2.0691981315612793, "step": 740 }, { "epoch": 0.13506871757531627, "grad_norm": 8.125, "learning_rate": 4.983992302846398e-06, "loss": 1.5110063552856445, "step": 742 }, { "epoch": 0.13543278419950852, "grad_norm": 9.1875, "learning_rate": 4.983895417131941e-06, "loss": 1.5119638442993164, "step": 744 }, { "epoch": 0.13579685082370074, "grad_norm": 60.25, "learning_rate": 4.983798240288872e-06, "loss": 0.860778272151947, "step": 746 }, { "epoch": 0.13616091744789297, "grad_norm": 34.75, "learning_rate": 4.983700772331451e-06, "loss": 1.487378478050232, "step": 748 }, { "epoch": 0.1365249840720852, "grad_norm": 8.0625, "learning_rate": 4.983603013273981e-06, "loss": 1.2067204713821411, "step": 750 }, { "epoch": 0.1368890506962774, "grad_norm": 18.625, "learning_rate": 4.9835049631308074e-06, "loss": 1.641838788986206, "step": 752 }, { "epoch": 0.13725311732046966, "grad_norm": 14.5625, "learning_rate": 4.983406621916319e-06, "loss": 1.5795860290527344, "step": 754 }, { "epoch": 0.13761718394466188, "grad_norm": 9.3125, "learning_rate": 4.983307989644946e-06, "loss": 1.4934535026550293, "step": 756 }, { "epoch": 0.1379812505688541, "grad_norm": 7.25, "learning_rate": 4.983209066331165e-06, "loss": 1.2594249248504639, "step": 758 }, { "epoch": 0.13834531719304632, "grad_norm": 6.75, "learning_rate": 4.9831098519894895e-06, "loss": 1.5626554489135742, "step": 760 }, { "epoch": 0.13870938381723855, "grad_norm": 5.0625, "learning_rate": 4.983010346634481e-06, "loss": 1.1138370037078857, "step": 762 }, { "epoch": 0.13907345044143077, "grad_norm": 6.96875, "learning_rate": 4.9829105502807395e-06, "loss": 1.5185916423797607, "step": 764 }, { "epoch": 0.13943751706562302, "grad_norm": 7.65625, "learning_rate": 4.982810462942911e-06, "loss": 1.6079630851745605, "step": 766 }, { "epoch": 0.13980158368981524, "grad_norm": 19.375, "learning_rate": 4.982710084635683e-06, "loss": 1.459214210510254, "step": 768 }, { "epoch": 0.14016565031400746, "grad_norm": 7.875, "learning_rate": 4.982609415373785e-06, "loss": 1.5401808023452759, "step": 770 }, { "epoch": 0.14052971693819968, "grad_norm": 10.0, "learning_rate": 4.98250845517199e-06, "loss": 1.5686360597610474, "step": 772 }, { "epoch": 0.1408937835623919, "grad_norm": 12.75, "learning_rate": 4.982407204045114e-06, "loss": 1.478764295578003, "step": 774 }, { "epoch": 0.14125785018658416, "grad_norm": 11.0, "learning_rate": 4.982305662008015e-06, "loss": 1.5797213315963745, "step": 776 }, { "epoch": 0.14162191681077638, "grad_norm": 18.125, "learning_rate": 4.982203829075594e-06, "loss": 1.4691228866577148, "step": 778 }, { "epoch": 0.1419859834349686, "grad_norm": 15.6875, "learning_rate": 4.982101705262793e-06, "loss": 1.740663766860962, "step": 780 }, { "epoch": 0.14235005005916082, "grad_norm": 14.1875, "learning_rate": 4.981999290584601e-06, "loss": 2.018289566040039, "step": 782 }, { "epoch": 0.14271411668335304, "grad_norm": 40.25, "learning_rate": 4.981896585056044e-06, "loss": 1.7228575944900513, "step": 784 }, { "epoch": 0.1430781833075453, "grad_norm": 11.375, "learning_rate": 4.981793588692196e-06, "loss": 1.2557510137557983, "step": 786 }, { "epoch": 0.14344224993173751, "grad_norm": 5.625, "learning_rate": 4.981690301508169e-06, "loss": 1.0434479713439941, "step": 788 }, { "epoch": 0.14380631655592974, "grad_norm": 9.0, "learning_rate": 4.981586723519123e-06, "loss": 1.5941314697265625, "step": 790 }, { "epoch": 0.14417038318012196, "grad_norm": 24.25, "learning_rate": 4.981482854740255e-06, "loss": 1.5563713312149048, "step": 792 }, { "epoch": 0.14453444980431418, "grad_norm": 47.5, "learning_rate": 4.981378695186808e-06, "loss": 1.399549961090088, "step": 794 }, { "epoch": 0.14489851642850643, "grad_norm": 5.25, "learning_rate": 4.981274244874069e-06, "loss": 1.1914955377578735, "step": 796 }, { "epoch": 0.14526258305269865, "grad_norm": 30.25, "learning_rate": 4.981169503817362e-06, "loss": 2.3184032440185547, "step": 798 }, { "epoch": 0.14562664967689087, "grad_norm": 6.9375, "learning_rate": 4.981064472032061e-06, "loss": 1.563476324081421, "step": 800 }, { "epoch": 0.1459907163010831, "grad_norm": 7.21875, "learning_rate": 4.980959149533576e-06, "loss": 1.2146525382995605, "step": 802 }, { "epoch": 0.14635478292527532, "grad_norm": 2.734375, "learning_rate": 4.980853536337366e-06, "loss": 1.3861656188964844, "step": 804 }, { "epoch": 0.14671884954946754, "grad_norm": 59.25, "learning_rate": 4.9807476324589246e-06, "loss": 1.2269115447998047, "step": 806 }, { "epoch": 0.1470829161736598, "grad_norm": 37.0, "learning_rate": 4.980641437913797e-06, "loss": 2.0585579872131348, "step": 808 }, { "epoch": 0.147446982797852, "grad_norm": 11.0625, "learning_rate": 4.980534952717564e-06, "loss": 2.0534462928771973, "step": 810 }, { "epoch": 0.14781104942204423, "grad_norm": 50.5, "learning_rate": 4.9804281768858545e-06, "loss": 0.9488010406494141, "step": 812 }, { "epoch": 0.14817511604623645, "grad_norm": 13.875, "learning_rate": 4.980321110434335e-06, "loss": 1.693977952003479, "step": 814 }, { "epoch": 0.14853918267042868, "grad_norm": 16.0, "learning_rate": 4.980213753378719e-06, "loss": 1.4362962245941162, "step": 816 }, { "epoch": 0.14890324929462093, "grad_norm": 27.5, "learning_rate": 4.980106105734759e-06, "loss": 1.519448161125183, "step": 818 }, { "epoch": 0.14926731591881315, "grad_norm": 9.75, "learning_rate": 4.979998167518253e-06, "loss": 1.4360629320144653, "step": 820 }, { "epoch": 0.14963138254300537, "grad_norm": 7.6875, "learning_rate": 4.979889938745039e-06, "loss": 1.6215627193450928, "step": 822 }, { "epoch": 0.1499954491671976, "grad_norm": 16.125, "learning_rate": 4.9797814194310015e-06, "loss": 1.6443802118301392, "step": 824 }, { "epoch": 0.15035951579138981, "grad_norm": 16.0, "learning_rate": 4.979672609592064e-06, "loss": 1.2052421569824219, "step": 826 }, { "epoch": 0.15072358241558206, "grad_norm": 19.75, "learning_rate": 4.979563509244194e-06, "loss": 0.8855452537536621, "step": 828 }, { "epoch": 0.15108764903977429, "grad_norm": 13.9375, "learning_rate": 4.9794541184034004e-06, "loss": 1.6762653589248657, "step": 830 }, { "epoch": 0.1514517156639665, "grad_norm": 5.21875, "learning_rate": 4.979344437085738e-06, "loss": 1.1307954788208008, "step": 832 }, { "epoch": 0.15181578228815873, "grad_norm": 12.25, "learning_rate": 4.979234465307301e-06, "loss": 1.461775302886963, "step": 834 }, { "epoch": 0.15217984891235095, "grad_norm": 9.4375, "learning_rate": 4.979124203084228e-06, "loss": 0.9856496453285217, "step": 836 }, { "epoch": 0.1525439155365432, "grad_norm": 22.75, "learning_rate": 4.979013650432698e-06, "loss": 2.053518056869507, "step": 838 }, { "epoch": 0.15290798216073542, "grad_norm": 15.0, "learning_rate": 4.978902807368935e-06, "loss": 1.5742968320846558, "step": 840 }, { "epoch": 0.15327204878492764, "grad_norm": 12.875, "learning_rate": 4.978791673909205e-06, "loss": 1.4561097621917725, "step": 842 }, { "epoch": 0.15363611540911987, "grad_norm": 11.375, "learning_rate": 4.978680250069816e-06, "loss": 1.4235137701034546, "step": 844 }, { "epoch": 0.1540001820333121, "grad_norm": 8.375, "learning_rate": 4.9785685358671195e-06, "loss": 1.6382036209106445, "step": 846 }, { "epoch": 0.1543642486575043, "grad_norm": 9.3125, "learning_rate": 4.9784565313175084e-06, "loss": 1.7686500549316406, "step": 848 }, { "epoch": 0.15472831528169656, "grad_norm": 25.375, "learning_rate": 4.978344236437419e-06, "loss": 2.117997169494629, "step": 850 }, { "epoch": 0.15509238190588878, "grad_norm": 9.6875, "learning_rate": 4.978231651243331e-06, "loss": 1.5159403085708618, "step": 852 }, { "epoch": 0.155456448530081, "grad_norm": 8.625, "learning_rate": 4.978118775751765e-06, "loss": 1.4558329582214355, "step": 854 }, { "epoch": 0.15582051515427323, "grad_norm": 15.125, "learning_rate": 4.978005609979286e-06, "loss": 1.431607961654663, "step": 856 }, { "epoch": 0.15618458177846545, "grad_norm": 15.5, "learning_rate": 4.9778921539424995e-06, "loss": 1.4661400318145752, "step": 858 }, { "epoch": 0.1565486484026577, "grad_norm": 31.75, "learning_rate": 4.977778407658055e-06, "loss": 1.2503175735473633, "step": 860 }, { "epoch": 0.15691271502684992, "grad_norm": 14.75, "learning_rate": 4.977664371142644e-06, "loss": 1.3584790229797363, "step": 862 }, { "epoch": 0.15727678165104214, "grad_norm": 91.5, "learning_rate": 4.977550044413002e-06, "loss": 0.8588085174560547, "step": 864 }, { "epoch": 0.15764084827523436, "grad_norm": 8.3125, "learning_rate": 4.9774354274859045e-06, "loss": 1.5325477123260498, "step": 866 }, { "epoch": 0.15800491489942659, "grad_norm": 5.46875, "learning_rate": 4.977320520378173e-06, "loss": 1.419021725654602, "step": 868 }, { "epoch": 0.15836898152361883, "grad_norm": 10.125, "learning_rate": 4.977205323106667e-06, "loss": 1.4541630744934082, "step": 870 }, { "epoch": 0.15873304814781106, "grad_norm": 7.03125, "learning_rate": 4.9770898356882946e-06, "loss": 1.2505348920822144, "step": 872 }, { "epoch": 0.15909711477200328, "grad_norm": 7.25, "learning_rate": 4.976974058140001e-06, "loss": 1.640791893005371, "step": 874 }, { "epoch": 0.1594611813961955, "grad_norm": 9.625, "learning_rate": 4.976857990478775e-06, "loss": 1.4406989812850952, "step": 876 }, { "epoch": 0.15982524802038772, "grad_norm": 7.84375, "learning_rate": 4.976741632721651e-06, "loss": 1.5606796741485596, "step": 878 }, { "epoch": 0.16018931464457994, "grad_norm": 14.4375, "learning_rate": 4.976624984885704e-06, "loss": 1.668114185333252, "step": 880 }, { "epoch": 0.1605533812687722, "grad_norm": 13.5625, "learning_rate": 4.97650804698805e-06, "loss": 1.8590716123580933, "step": 882 }, { "epoch": 0.16091744789296442, "grad_norm": 15.0625, "learning_rate": 4.976390819045851e-06, "loss": 1.6209524869918823, "step": 884 }, { "epoch": 0.16128151451715664, "grad_norm": 16.875, "learning_rate": 4.976273301076309e-06, "loss": 2.1605849266052246, "step": 886 }, { "epoch": 0.16164558114134886, "grad_norm": 15.625, "learning_rate": 4.976155493096669e-06, "loss": 1.426637887954712, "step": 888 }, { "epoch": 0.16200964776554108, "grad_norm": 6.34375, "learning_rate": 4.976037395124218e-06, "loss": 1.105234980583191, "step": 890 }, { "epoch": 0.16237371438973333, "grad_norm": 6.96875, "learning_rate": 4.975919007176289e-06, "loss": 1.2164201736450195, "step": 892 }, { "epoch": 0.16273778101392555, "grad_norm": 3.703125, "learning_rate": 4.9758003292702515e-06, "loss": 0.9556933641433716, "step": 894 }, { "epoch": 0.16310184763811778, "grad_norm": 6.75, "learning_rate": 4.975681361423524e-06, "loss": 1.0577716827392578, "step": 896 }, { "epoch": 0.16346591426231, "grad_norm": 14.4375, "learning_rate": 4.9755621036535635e-06, "loss": 1.507400393486023, "step": 898 }, { "epoch": 0.16382998088650222, "grad_norm": 11.0, "learning_rate": 4.975442555977871e-06, "loss": 1.6189115047454834, "step": 900 }, { "epoch": 0.16419404751069447, "grad_norm": 20.5, "learning_rate": 4.975322718413988e-06, "loss": 1.636732816696167, "step": 902 }, { "epoch": 0.1645581141348867, "grad_norm": 15.8125, "learning_rate": 4.9752025909795035e-06, "loss": 1.5428428649902344, "step": 904 }, { "epoch": 0.1649221807590789, "grad_norm": 13.0625, "learning_rate": 4.975082173692042e-06, "loss": 1.5462164878845215, "step": 906 }, { "epoch": 0.16528624738327113, "grad_norm": 4.09375, "learning_rate": 4.974961466569276e-06, "loss": 1.1371560096740723, "step": 908 }, { "epoch": 0.16565031400746336, "grad_norm": 6.125, "learning_rate": 4.974840469628919e-06, "loss": 1.084640622138977, "step": 910 }, { "epoch": 0.1660143806316556, "grad_norm": 13.3125, "learning_rate": 4.974719182888725e-06, "loss": 1.086508870124817, "step": 912 }, { "epoch": 0.16637844725584783, "grad_norm": 9.75, "learning_rate": 4.974597606366495e-06, "loss": 1.5391892194747925, "step": 914 }, { "epoch": 0.16674251388004005, "grad_norm": 17.875, "learning_rate": 4.974475740080069e-06, "loss": 1.5544822216033936, "step": 916 }, { "epoch": 0.16710658050423227, "grad_norm": 16.0, "learning_rate": 4.974353584047329e-06, "loss": 1.3960521221160889, "step": 918 }, { "epoch": 0.1674706471284245, "grad_norm": 8.25, "learning_rate": 4.974231138286202e-06, "loss": 1.6951947212219238, "step": 920 }, { "epoch": 0.16783471375261672, "grad_norm": 70.0, "learning_rate": 4.974108402814657e-06, "loss": 1.5855159759521484, "step": 922 }, { "epoch": 0.16819878037680897, "grad_norm": 31.375, "learning_rate": 4.973985377650704e-06, "loss": 1.2763593196868896, "step": 924 }, { "epoch": 0.1685628470010012, "grad_norm": 14.0, "learning_rate": 4.973862062812397e-06, "loss": 1.5414307117462158, "step": 926 }, { "epoch": 0.1689269136251934, "grad_norm": 10.875, "learning_rate": 4.97373845831783e-06, "loss": 1.6462106704711914, "step": 928 }, { "epoch": 0.16929098024938563, "grad_norm": 18.125, "learning_rate": 4.9736145641851445e-06, "loss": 1.759765863418579, "step": 930 }, { "epoch": 0.16965504687357785, "grad_norm": 17.25, "learning_rate": 4.97349038043252e-06, "loss": 1.9544644355773926, "step": 932 }, { "epoch": 0.1700191134977701, "grad_norm": 17.25, "learning_rate": 4.973365907078179e-06, "loss": 1.5560743808746338, "step": 934 }, { "epoch": 0.17038318012196232, "grad_norm": 4.6875, "learning_rate": 4.973241144140391e-06, "loss": 1.2322574853897095, "step": 936 }, { "epoch": 0.17074724674615455, "grad_norm": 12.8125, "learning_rate": 4.973116091637459e-06, "loss": 1.453364610671997, "step": 938 }, { "epoch": 0.17111131337034677, "grad_norm": 26.625, "learning_rate": 4.972990749587738e-06, "loss": 1.4589955806732178, "step": 940 }, { "epoch": 0.171475379994539, "grad_norm": 17.0, "learning_rate": 4.972865118009621e-06, "loss": 1.6683200597763062, "step": 942 }, { "epoch": 0.17183944661873124, "grad_norm": 18.875, "learning_rate": 4.972739196921543e-06, "loss": 1.6839137077331543, "step": 944 }, { "epoch": 0.17220351324292346, "grad_norm": 30.0, "learning_rate": 4.972612986341983e-06, "loss": 1.5545344352722168, "step": 946 }, { "epoch": 0.17256757986711568, "grad_norm": 5.75, "learning_rate": 4.9724864862894605e-06, "loss": 1.1030503511428833, "step": 948 }, { "epoch": 0.1729316464913079, "grad_norm": 6.84375, "learning_rate": 4.97235969678254e-06, "loss": 1.5186653137207031, "step": 950 }, { "epoch": 0.17329571311550013, "grad_norm": 10.75, "learning_rate": 4.9722326178398286e-06, "loss": 1.6165885925292969, "step": 952 }, { "epoch": 0.17365977973969238, "grad_norm": 20.875, "learning_rate": 4.972105249479971e-06, "loss": 1.4254658222198486, "step": 954 }, { "epoch": 0.1740238463638846, "grad_norm": 18.875, "learning_rate": 4.9719775917216625e-06, "loss": 1.646559476852417, "step": 956 }, { "epoch": 0.17438791298807682, "grad_norm": 28.0, "learning_rate": 4.9718496445836325e-06, "loss": 1.7532553672790527, "step": 958 }, { "epoch": 0.17475197961226904, "grad_norm": 33.0, "learning_rate": 4.97172140808466e-06, "loss": 1.961106538772583, "step": 960 }, { "epoch": 0.17511604623646126, "grad_norm": 14.5625, "learning_rate": 4.971592882243561e-06, "loss": 1.4358066320419312, "step": 962 }, { "epoch": 0.1754801128606535, "grad_norm": 11.1875, "learning_rate": 4.971464067079196e-06, "loss": 1.5383143424987793, "step": 964 }, { "epoch": 0.17584417948484574, "grad_norm": 30.5, "learning_rate": 4.971334962610469e-06, "loss": 1.6120471954345703, "step": 966 }, { "epoch": 0.17620824610903796, "grad_norm": 24.625, "learning_rate": 4.9712055688563256e-06, "loss": 2.341151237487793, "step": 968 }, { "epoch": 0.17657231273323018, "grad_norm": 9.75, "learning_rate": 4.971075885835753e-06, "loss": 1.38916015625, "step": 970 }, { "epoch": 0.1769363793574224, "grad_norm": 4.09375, "learning_rate": 4.970945913567784e-06, "loss": 1.2353990077972412, "step": 972 }, { "epoch": 0.17730044598161462, "grad_norm": 20.125, "learning_rate": 4.970815652071488e-06, "loss": 1.47007417678833, "step": 974 }, { "epoch": 0.17766451260580687, "grad_norm": 10.875, "learning_rate": 4.970685101365983e-06, "loss": 1.7017881870269775, "step": 976 }, { "epoch": 0.1780285792299991, "grad_norm": 16.875, "learning_rate": 4.970554261470425e-06, "loss": 1.7518398761749268, "step": 978 }, { "epoch": 0.17839264585419132, "grad_norm": 9.5625, "learning_rate": 4.970423132404016e-06, "loss": 2.097485065460205, "step": 980 }, { "epoch": 0.17875671247838354, "grad_norm": 8.0, "learning_rate": 4.9702917141859965e-06, "loss": 1.3711477518081665, "step": 982 }, { "epoch": 0.17912077910257576, "grad_norm": 10.5, "learning_rate": 4.970160006835655e-06, "loss": 1.3914536237716675, "step": 984 }, { "epoch": 0.179484845726768, "grad_norm": 8.375, "learning_rate": 4.970028010372314e-06, "loss": 1.2097982168197632, "step": 986 }, { "epoch": 0.17984891235096023, "grad_norm": 37.25, "learning_rate": 4.969895724815348e-06, "loss": 1.254248857498169, "step": 988 }, { "epoch": 0.18021297897515245, "grad_norm": 11.8125, "learning_rate": 4.9697631501841685e-06, "loss": 1.3299647569656372, "step": 990 }, { "epoch": 0.18057704559934468, "grad_norm": 9.25, "learning_rate": 4.969630286498228e-06, "loss": 1.5818966627120972, "step": 992 }, { "epoch": 0.1809411122235369, "grad_norm": 7.96875, "learning_rate": 4.969497133777025e-06, "loss": 1.5132246017456055, "step": 994 }, { "epoch": 0.18130517884772915, "grad_norm": 14.5, "learning_rate": 4.9693636920401005e-06, "loss": 1.7159264087677002, "step": 996 }, { "epoch": 0.18166924547192137, "grad_norm": 15.9375, "learning_rate": 4.9692299613070346e-06, "loss": 2.1041908264160156, "step": 998 }, { "epoch": 0.1820333120961136, "grad_norm": 14.75, "learning_rate": 4.969095941597453e-06, "loss": 1.2940460443496704, "step": 1000 }, { "epoch": 0.18239737872030581, "grad_norm": 23.25, "learning_rate": 4.9689616329310204e-06, "loss": 1.2223471403121948, "step": 1002 }, { "epoch": 0.18276144534449804, "grad_norm": 11.75, "learning_rate": 4.968827035327449e-06, "loss": 0.988057017326355, "step": 1004 }, { "epoch": 0.18312551196869026, "grad_norm": 8.25, "learning_rate": 4.96869214880649e-06, "loss": 1.1488367319107056, "step": 1006 }, { "epoch": 0.1834895785928825, "grad_norm": 56.0, "learning_rate": 4.968556973387935e-06, "loss": 0.41850680112838745, "step": 1008 }, { "epoch": 0.18385364521707473, "grad_norm": 10.75, "learning_rate": 4.9684215090916224e-06, "loss": 0.6935019493103027, "step": 1010 }, { "epoch": 0.18421771184126695, "grad_norm": 12.0625, "learning_rate": 4.968285755937431e-06, "loss": 1.6634321212768555, "step": 1012 }, { "epoch": 0.18458177846545917, "grad_norm": 39.5, "learning_rate": 4.968149713945281e-06, "loss": 1.827513337135315, "step": 1014 }, { "epoch": 0.1849458450896514, "grad_norm": 26.75, "learning_rate": 4.968013383135137e-06, "loss": 1.7480419874191284, "step": 1016 }, { "epoch": 0.18530991171384364, "grad_norm": 21.375, "learning_rate": 4.967876763527005e-06, "loss": 2.1570892333984375, "step": 1018 }, { "epoch": 0.18567397833803587, "grad_norm": 9.625, "learning_rate": 4.967739855140934e-06, "loss": 1.7794275283813477, "step": 1020 }, { "epoch": 0.1860380449622281, "grad_norm": 15.6875, "learning_rate": 4.967602657997012e-06, "loss": 1.172874093055725, "step": 1022 }, { "epoch": 0.1864021115864203, "grad_norm": 16.625, "learning_rate": 4.967465172115374e-06, "loss": 1.9025425910949707, "step": 1024 }, { "epoch": 0.18676617821061253, "grad_norm": 5.75, "learning_rate": 4.967327397516197e-06, "loss": 1.5241506099700928, "step": 1026 }, { "epoch": 0.18713024483480478, "grad_norm": 19.625, "learning_rate": 4.967189334219697e-06, "loss": 1.2078227996826172, "step": 1028 }, { "epoch": 0.187494311458997, "grad_norm": 14.25, "learning_rate": 4.967050982246133e-06, "loss": 1.719788908958435, "step": 1030 }, { "epoch": 0.18785837808318923, "grad_norm": 25.25, "learning_rate": 4.96691234161581e-06, "loss": 1.5995606184005737, "step": 1032 }, { "epoch": 0.18822244470738145, "grad_norm": 11.1875, "learning_rate": 4.966773412349073e-06, "loss": 1.6250306367874146, "step": 1034 }, { "epoch": 0.18858651133157367, "grad_norm": 14.1875, "learning_rate": 4.966634194466306e-06, "loss": 1.491241693496704, "step": 1036 }, { "epoch": 0.1889505779557659, "grad_norm": 10.0, "learning_rate": 4.966494687987944e-06, "loss": 1.6580965518951416, "step": 1038 }, { "epoch": 0.18931464457995814, "grad_norm": 11.25, "learning_rate": 4.966354892934454e-06, "loss": 1.560678482055664, "step": 1040 }, { "epoch": 0.18967871120415036, "grad_norm": 7.6875, "learning_rate": 4.966214809326353e-06, "loss": 1.491178035736084, "step": 1042 }, { "epoch": 0.19004277782834259, "grad_norm": 7.0625, "learning_rate": 4.966074437184198e-06, "loss": 1.290732502937317, "step": 1044 }, { "epoch": 0.1904068444525348, "grad_norm": 6.71875, "learning_rate": 4.965933776528586e-06, "loss": 1.249894142150879, "step": 1046 }, { "epoch": 0.19077091107672703, "grad_norm": 18.625, "learning_rate": 4.965792827380159e-06, "loss": 1.5564920902252197, "step": 1048 }, { "epoch": 0.19113497770091928, "grad_norm": 22.5, "learning_rate": 4.965651589759602e-06, "loss": 1.5470733642578125, "step": 1050 }, { "epoch": 0.1914990443251115, "grad_norm": 9.0, "learning_rate": 4.965510063687641e-06, "loss": 1.4763240814208984, "step": 1052 }, { "epoch": 0.19186311094930372, "grad_norm": 11.125, "learning_rate": 4.965368249185043e-06, "loss": 1.5355173349380493, "step": 1054 }, { "epoch": 0.19222717757349594, "grad_norm": 8.375, "learning_rate": 4.965226146272619e-06, "loss": 1.5831536054611206, "step": 1056 }, { "epoch": 0.19259124419768817, "grad_norm": 17.625, "learning_rate": 4.965083754971223e-06, "loss": 1.7306116819381714, "step": 1058 }, { "epoch": 0.19295531082188042, "grad_norm": 11.625, "learning_rate": 4.964941075301749e-06, "loss": 1.905921220779419, "step": 1060 }, { "epoch": 0.19331937744607264, "grad_norm": 9.0, "learning_rate": 4.964798107285136e-06, "loss": 1.330867052078247, "step": 1062 }, { "epoch": 0.19368344407026486, "grad_norm": 3.046875, "learning_rate": 4.964654850942363e-06, "loss": 0.8626788854598999, "step": 1064 }, { "epoch": 0.19404751069445708, "grad_norm": 7.03125, "learning_rate": 4.964511306294454e-06, "loss": 1.229498028755188, "step": 1066 }, { "epoch": 0.1944115773186493, "grad_norm": 9.6875, "learning_rate": 4.96436747336247e-06, "loss": 1.5555689334869385, "step": 1068 }, { "epoch": 0.19477564394284155, "grad_norm": 20.25, "learning_rate": 4.964223352167522e-06, "loss": 1.5085597038269043, "step": 1070 }, { "epoch": 0.19513971056703378, "grad_norm": 6.5, "learning_rate": 4.964078942730757e-06, "loss": 1.1167781352996826, "step": 1072 }, { "epoch": 0.195503777191226, "grad_norm": 9.3125, "learning_rate": 4.963934245073366e-06, "loss": 1.2785255908966064, "step": 1074 }, { "epoch": 0.19586784381541822, "grad_norm": 5.9375, "learning_rate": 4.963789259216584e-06, "loss": 1.1449613571166992, "step": 1076 }, { "epoch": 0.19623191043961044, "grad_norm": 9.125, "learning_rate": 4.963643985181688e-06, "loss": 1.6972298622131348, "step": 1078 }, { "epoch": 0.19659597706380266, "grad_norm": 12.4375, "learning_rate": 4.963498422989993e-06, "loss": 1.5406492948532104, "step": 1080 }, { "epoch": 0.1969600436879949, "grad_norm": 8.1875, "learning_rate": 4.963352572662864e-06, "loss": 1.4850382804870605, "step": 1082 }, { "epoch": 0.19732411031218713, "grad_norm": 6.53125, "learning_rate": 4.9632064342217e-06, "loss": 1.420413851737976, "step": 1084 }, { "epoch": 0.19768817693637936, "grad_norm": 11.0, "learning_rate": 4.9630600076879486e-06, "loss": 1.1185743808746338, "step": 1086 }, { "epoch": 0.19805224356057158, "grad_norm": 4.96875, "learning_rate": 4.962913293083097e-06, "loss": 1.5190268754959106, "step": 1088 }, { "epoch": 0.1984163101847638, "grad_norm": 4.59375, "learning_rate": 4.9627662904286745e-06, "loss": 0.8890689611434937, "step": 1090 }, { "epoch": 0.19878037680895605, "grad_norm": 7.84375, "learning_rate": 4.962618999746253e-06, "loss": 1.3202004432678223, "step": 1092 }, { "epoch": 0.19914444343314827, "grad_norm": 17.625, "learning_rate": 4.962471421057447e-06, "loss": 1.6558799743652344, "step": 1094 }, { "epoch": 0.1995085100573405, "grad_norm": 14.75, "learning_rate": 4.962323554383913e-06, "loss": 1.6826921701431274, "step": 1096 }, { "epoch": 0.19987257668153272, "grad_norm": 20.5, "learning_rate": 4.962175399747351e-06, "loss": 1.0449330806732178, "step": 1098 }, { "epoch": 0.20023664330572494, "grad_norm": 9.9375, "learning_rate": 4.9620269571695e-06, "loss": 1.3753266334533691, "step": 1100 }, { "epoch": 0.2006007099299172, "grad_norm": 25.0, "learning_rate": 4.9618782266721455e-06, "loss": 1.4963014125823975, "step": 1102 }, { "epoch": 0.2009647765541094, "grad_norm": 10.1875, "learning_rate": 4.9617292082771106e-06, "loss": 1.8828684091567993, "step": 1104 }, { "epoch": 0.20132884317830163, "grad_norm": 7.59375, "learning_rate": 4.961579902006266e-06, "loss": 1.737760066986084, "step": 1106 }, { "epoch": 0.20169290980249385, "grad_norm": 8.6875, "learning_rate": 4.9614303078815195e-06, "loss": 1.6666333675384521, "step": 1108 }, { "epoch": 0.20205697642668607, "grad_norm": 16.875, "learning_rate": 4.961280425924825e-06, "loss": 1.4696074724197388, "step": 1110 }, { "epoch": 0.20242104305087832, "grad_norm": 6.8125, "learning_rate": 4.961130256158176e-06, "loss": 1.4471994638442993, "step": 1112 }, { "epoch": 0.20278510967507055, "grad_norm": 5.8125, "learning_rate": 4.96097979860361e-06, "loss": 1.267797827720642, "step": 1114 }, { "epoch": 0.20314917629926277, "grad_norm": 11.25, "learning_rate": 4.960829053283205e-06, "loss": 1.2800853252410889, "step": 1116 }, { "epoch": 0.203513242923455, "grad_norm": 17.375, "learning_rate": 4.960678020219083e-06, "loss": 1.7602601051330566, "step": 1118 }, { "epoch": 0.2038773095476472, "grad_norm": 22.625, "learning_rate": 4.960526699433408e-06, "loss": 1.5081733465194702, "step": 1120 }, { "epoch": 0.20424137617183943, "grad_norm": 18.0, "learning_rate": 4.960375090948385e-06, "loss": 1.9400660991668701, "step": 1122 }, { "epoch": 0.20460544279603168, "grad_norm": 28.125, "learning_rate": 4.960223194786261e-06, "loss": 1.49504816532135, "step": 1124 }, { "epoch": 0.2049695094202239, "grad_norm": 9.625, "learning_rate": 4.96007101096933e-06, "loss": 1.4075953960418701, "step": 1126 }, { "epoch": 0.20533357604441613, "grad_norm": 11.1875, "learning_rate": 4.959918539519919e-06, "loss": 0.9709517955780029, "step": 1128 }, { "epoch": 0.20569764266860835, "grad_norm": 3.90625, "learning_rate": 4.959765780460406e-06, "loss": 1.1713272333145142, "step": 1130 }, { "epoch": 0.20606170929280057, "grad_norm": 5.6875, "learning_rate": 4.959612733813207e-06, "loss": 0.9374972581863403, "step": 1132 }, { "epoch": 0.20642577591699282, "grad_norm": 12.3125, "learning_rate": 4.959459399600781e-06, "loss": 1.1903777122497559, "step": 1134 }, { "epoch": 0.20678984254118504, "grad_norm": 33.25, "learning_rate": 4.959305777845629e-06, "loss": 1.498299479484558, "step": 1136 }, { "epoch": 0.20715390916537726, "grad_norm": 12.9375, "learning_rate": 4.959151868570295e-06, "loss": 1.6460087299346924, "step": 1138 }, { "epoch": 0.2075179757895695, "grad_norm": 14.25, "learning_rate": 4.958997671797363e-06, "loss": 1.524951696395874, "step": 1140 }, { "epoch": 0.2078820424137617, "grad_norm": 13.1875, "learning_rate": 4.9588431875494626e-06, "loss": 1.4647579193115234, "step": 1142 }, { "epoch": 0.20824610903795396, "grad_norm": 10.0625, "learning_rate": 4.958688415849263e-06, "loss": 1.4470139741897583, "step": 1144 }, { "epoch": 0.20861017566214618, "grad_norm": 9.8125, "learning_rate": 4.958533356719476e-06, "loss": 1.4590744972229004, "step": 1146 }, { "epoch": 0.2089742422863384, "grad_norm": 5.625, "learning_rate": 4.958378010182856e-06, "loss": 1.2866430282592773, "step": 1148 }, { "epoch": 0.20933830891053062, "grad_norm": 7.15625, "learning_rate": 4.958222376262199e-06, "loss": 0.8470004796981812, "step": 1150 }, { "epoch": 0.20970237553472285, "grad_norm": 24.375, "learning_rate": 4.958066454980345e-06, "loss": 1.0619269609451294, "step": 1152 }, { "epoch": 0.21006644215891507, "grad_norm": 10.0, "learning_rate": 4.957910246360175e-06, "loss": 0.6137018203735352, "step": 1154 }, { "epoch": 0.21043050878310732, "grad_norm": 10.1875, "learning_rate": 4.9577537504246095e-06, "loss": 1.5130021572113037, "step": 1156 }, { "epoch": 0.21079457540729954, "grad_norm": 4.21875, "learning_rate": 4.957596967196616e-06, "loss": 0.9950746297836304, "step": 1158 }, { "epoch": 0.21115864203149176, "grad_norm": 8.8125, "learning_rate": 4.957439896699201e-06, "loss": 1.4392368793487549, "step": 1160 }, { "epoch": 0.21152270865568398, "grad_norm": 11.25, "learning_rate": 4.957282538955413e-06, "loss": 1.4692758321762085, "step": 1162 }, { "epoch": 0.2118867752798762, "grad_norm": 136.0, "learning_rate": 4.957124893988347e-06, "loss": 1.6144437789916992, "step": 1164 }, { "epoch": 0.21225084190406845, "grad_norm": 48.5, "learning_rate": 4.9569669618211316e-06, "loss": 0.7389823198318481, "step": 1166 }, { "epoch": 0.21261490852826068, "grad_norm": 5.75, "learning_rate": 4.956808742476948e-06, "loss": 1.1057299375534058, "step": 1168 }, { "epoch": 0.2129789751524529, "grad_norm": 30.0, "learning_rate": 4.9566502359790095e-06, "loss": 1.6763724088668823, "step": 1170 }, { "epoch": 0.21334304177664512, "grad_norm": 9.8125, "learning_rate": 4.9564914423505784e-06, "loss": 1.589447259902954, "step": 1172 }, { "epoch": 0.21370710840083734, "grad_norm": 8.125, "learning_rate": 4.956332361614958e-06, "loss": 1.524857521057129, "step": 1174 }, { "epoch": 0.2140711750250296, "grad_norm": 4.875, "learning_rate": 4.9561729937954925e-06, "loss": 1.4731521606445312, "step": 1176 }, { "epoch": 0.2144352416492218, "grad_norm": 17.875, "learning_rate": 4.956013338915568e-06, "loss": 1.0429184436798096, "step": 1178 }, { "epoch": 0.21479930827341404, "grad_norm": 7.03125, "learning_rate": 4.955853396998611e-06, "loss": 1.3547178506851196, "step": 1180 }, { "epoch": 0.21516337489760626, "grad_norm": 35.5, "learning_rate": 4.955693168068095e-06, "loss": 1.6174616813659668, "step": 1182 }, { "epoch": 0.21552744152179848, "grad_norm": 40.0, "learning_rate": 4.955532652147533e-06, "loss": 2.0516693592071533, "step": 1184 }, { "epoch": 0.21589150814599073, "grad_norm": 13.0, "learning_rate": 4.9553718492604794e-06, "loss": 1.570616602897644, "step": 1186 }, { "epoch": 0.21625557477018295, "grad_norm": 11.0625, "learning_rate": 4.955210759430531e-06, "loss": 1.4672772884368896, "step": 1188 }, { "epoch": 0.21661964139437517, "grad_norm": 14.1875, "learning_rate": 4.9550493826813285e-06, "loss": 1.432410478591919, "step": 1190 }, { "epoch": 0.2169837080185674, "grad_norm": 11.125, "learning_rate": 4.954887719036551e-06, "loss": 1.6189162731170654, "step": 1192 }, { "epoch": 0.21734777464275962, "grad_norm": 16.75, "learning_rate": 4.954725768519924e-06, "loss": 1.359072208404541, "step": 1194 }, { "epoch": 0.21771184126695184, "grad_norm": 8.8125, "learning_rate": 4.954563531155211e-06, "loss": 1.412648320198059, "step": 1196 }, { "epoch": 0.2180759078911441, "grad_norm": 13.5, "learning_rate": 4.9544010069662215e-06, "loss": 1.4863377809524536, "step": 1198 }, { "epoch": 0.2184399745153363, "grad_norm": 8.625, "learning_rate": 4.954238195976805e-06, "loss": 1.2107902765274048, "step": 1200 }, { "epoch": 0.21880404113952853, "grad_norm": 13.1875, "learning_rate": 4.954075098210853e-06, "loss": 0.8278253078460693, "step": 1202 }, { "epoch": 0.21916810776372075, "grad_norm": 11.375, "learning_rate": 4.9539117136923e-06, "loss": 0.6940520405769348, "step": 1204 }, { "epoch": 0.21953217438791298, "grad_norm": 8.6875, "learning_rate": 4.953748042445121e-06, "loss": 1.216526985168457, "step": 1206 }, { "epoch": 0.21989624101210523, "grad_norm": 12.5, "learning_rate": 4.953584084493335e-06, "loss": 1.0924643278121948, "step": 1208 }, { "epoch": 0.22026030763629745, "grad_norm": 17.75, "learning_rate": 4.953419839861001e-06, "loss": 1.6490846872329712, "step": 1210 }, { "epoch": 0.22062437426048967, "grad_norm": 37.25, "learning_rate": 4.953255308572224e-06, "loss": 1.464174747467041, "step": 1212 }, { "epoch": 0.2209884408846819, "grad_norm": 12.8125, "learning_rate": 4.953090490651143e-06, "loss": 1.4101375341415405, "step": 1214 }, { "epoch": 0.2213525075088741, "grad_norm": 15.875, "learning_rate": 4.952925386121951e-06, "loss": 1.4389835596084595, "step": 1216 }, { "epoch": 0.22171657413306636, "grad_norm": 21.25, "learning_rate": 4.952759995008871e-06, "loss": 1.1927437782287598, "step": 1218 }, { "epoch": 0.22208064075725858, "grad_norm": 4.75, "learning_rate": 4.952594317336176e-06, "loss": 1.148517370223999, "step": 1220 }, { "epoch": 0.2224447073814508, "grad_norm": 9.0625, "learning_rate": 4.952428353128178e-06, "loss": 1.1994445323944092, "step": 1222 }, { "epoch": 0.22280877400564303, "grad_norm": 147.0, "learning_rate": 4.952262102409232e-06, "loss": 0.9708088040351868, "step": 1224 }, { "epoch": 0.22317284062983525, "grad_norm": 13.75, "learning_rate": 4.952095565203735e-06, "loss": 1.394553780555725, "step": 1226 }, { "epoch": 0.2235369072540275, "grad_norm": 9.0, "learning_rate": 4.9519287415361235e-06, "loss": 1.3822214603424072, "step": 1228 }, { "epoch": 0.22390097387821972, "grad_norm": 10.3125, "learning_rate": 4.9517616314308814e-06, "loss": 1.3066985607147217, "step": 1230 }, { "epoch": 0.22426504050241194, "grad_norm": 17.0, "learning_rate": 4.951594234912528e-06, "loss": 1.6092782020568848, "step": 1232 }, { "epoch": 0.22462910712660417, "grad_norm": 10.0, "learning_rate": 4.9514265520056306e-06, "loss": 1.3286519050598145, "step": 1234 }, { "epoch": 0.2249931737507964, "grad_norm": 8.4375, "learning_rate": 4.9512585827347945e-06, "loss": 1.2676196098327637, "step": 1236 }, { "epoch": 0.2253572403749886, "grad_norm": 16.625, "learning_rate": 4.95109032712467e-06, "loss": 0.9929633140563965, "step": 1238 }, { "epoch": 0.22572130699918086, "grad_norm": 17.125, "learning_rate": 4.950921785199947e-06, "loss": 1.0534217357635498, "step": 1240 }, { "epoch": 0.22608537362337308, "grad_norm": 12.6875, "learning_rate": 4.950752956985358e-06, "loss": 1.6984401941299438, "step": 1242 }, { "epoch": 0.2264494402475653, "grad_norm": 15.125, "learning_rate": 4.950583842505679e-06, "loss": 1.2302775382995605, "step": 1244 }, { "epoch": 0.22681350687175753, "grad_norm": 12.3125, "learning_rate": 4.950414441785725e-06, "loss": 1.1167516708374023, "step": 1246 }, { "epoch": 0.22717757349594975, "grad_norm": 8.4375, "learning_rate": 4.950244754850357e-06, "loss": 1.4290108680725098, "step": 1248 }, { "epoch": 0.227541640120142, "grad_norm": 6.40625, "learning_rate": 4.950074781724473e-06, "loss": 1.1943378448486328, "step": 1250 }, { "epoch": 0.22790570674433422, "grad_norm": 20.0, "learning_rate": 4.94990452243302e-06, "loss": 0.8593637943267822, "step": 1252 }, { "epoch": 0.22826977336852644, "grad_norm": 10.25, "learning_rate": 4.94973397700098e-06, "loss": 0.977672815322876, "step": 1254 }, { "epoch": 0.22863383999271866, "grad_norm": 12.875, "learning_rate": 4.94956314545338e-06, "loss": 1.5777567625045776, "step": 1256 }, { "epoch": 0.22899790661691088, "grad_norm": 13.375, "learning_rate": 4.949392027815288e-06, "loss": 1.8342951536178589, "step": 1258 }, { "epoch": 0.22936197324110313, "grad_norm": 13.0625, "learning_rate": 4.949220624111819e-06, "loss": 1.4170873165130615, "step": 1260 }, { "epoch": 0.22972603986529536, "grad_norm": 8.3125, "learning_rate": 4.949048934368122e-06, "loss": 1.3099360466003418, "step": 1262 }, { "epoch": 0.23009010648948758, "grad_norm": 11.3125, "learning_rate": 4.948876958609391e-06, "loss": 1.5055506229400635, "step": 1264 }, { "epoch": 0.2304541731136798, "grad_norm": 10.0625, "learning_rate": 4.948704696860866e-06, "loss": 1.63662588596344, "step": 1266 }, { "epoch": 0.23081823973787202, "grad_norm": 7.59375, "learning_rate": 4.948532149147823e-06, "loss": 1.3518517017364502, "step": 1268 }, { "epoch": 0.23118230636206427, "grad_norm": 46.25, "learning_rate": 4.948359315495585e-06, "loss": 1.1541385650634766, "step": 1270 }, { "epoch": 0.2315463729862565, "grad_norm": 12.1875, "learning_rate": 4.948186195929513e-06, "loss": 1.5124117136001587, "step": 1272 }, { "epoch": 0.23191043961044872, "grad_norm": 7.75, "learning_rate": 4.9480127904750134e-06, "loss": 0.8574434518814087, "step": 1274 }, { "epoch": 0.23227450623464094, "grad_norm": 18.75, "learning_rate": 4.947839099157529e-06, "loss": 1.3625457286834717, "step": 1276 }, { "epoch": 0.23263857285883316, "grad_norm": 12.875, "learning_rate": 4.9476651220025525e-06, "loss": 1.0178558826446533, "step": 1278 }, { "epoch": 0.23300263948302538, "grad_norm": 9.0625, "learning_rate": 4.947490859035612e-06, "loss": 1.6901695728302002, "step": 1280 }, { "epoch": 0.23336670610721763, "grad_norm": 7.25, "learning_rate": 4.94731631028228e-06, "loss": 1.5088622570037842, "step": 1282 }, { "epoch": 0.23373077273140985, "grad_norm": 10.75, "learning_rate": 4.947141475768171e-06, "loss": 1.4696831703186035, "step": 1284 }, { "epoch": 0.23409483935560207, "grad_norm": 17.75, "learning_rate": 4.946966355518943e-06, "loss": 1.5505940914154053, "step": 1286 }, { "epoch": 0.2344589059797943, "grad_norm": 33.0, "learning_rate": 4.946790949560291e-06, "loss": 1.0533626079559326, "step": 1288 }, { "epoch": 0.23482297260398652, "grad_norm": 6.40625, "learning_rate": 4.9466152579179575e-06, "loss": 0.5825610756874084, "step": 1290 }, { "epoch": 0.23518703922817877, "grad_norm": 6.625, "learning_rate": 4.946439280617724e-06, "loss": 1.3869107961654663, "step": 1292 }, { "epoch": 0.235551105852371, "grad_norm": 6.71875, "learning_rate": 4.946263017685414e-06, "loss": 1.0499608516693115, "step": 1294 }, { "epoch": 0.2359151724765632, "grad_norm": 44.0, "learning_rate": 4.946086469146895e-06, "loss": 1.1648304462432861, "step": 1296 }, { "epoch": 0.23627923910075543, "grad_norm": 19.875, "learning_rate": 4.945909635028071e-06, "loss": 0.7895115613937378, "step": 1298 }, { "epoch": 0.23664330572494766, "grad_norm": 13.875, "learning_rate": 4.945732515354896e-06, "loss": 1.5313125848770142, "step": 1300 }, { "epoch": 0.2370073723491399, "grad_norm": 7.71875, "learning_rate": 4.945555110153358e-06, "loss": 1.4455209970474243, "step": 1302 }, { "epoch": 0.23737143897333213, "grad_norm": 7.65625, "learning_rate": 4.945377419449494e-06, "loss": 1.4758248329162598, "step": 1304 }, { "epoch": 0.23773550559752435, "grad_norm": 24.125, "learning_rate": 4.945199443269377e-06, "loss": 1.713889718055725, "step": 1306 }, { "epoch": 0.23809957222171657, "grad_norm": 13.1875, "learning_rate": 4.945021181639126e-06, "loss": 1.8196473121643066, "step": 1308 }, { "epoch": 0.2384636388459088, "grad_norm": 2.703125, "learning_rate": 4.944842634584897e-06, "loss": 0.8688795566558838, "step": 1310 }, { "epoch": 0.23882770547010101, "grad_norm": 15.3125, "learning_rate": 4.9446638021328944e-06, "loss": 1.2760381698608398, "step": 1312 }, { "epoch": 0.23919177209429326, "grad_norm": 13.5625, "learning_rate": 4.94448468430936e-06, "loss": 1.4693682193756104, "step": 1314 }, { "epoch": 0.2395558387184855, "grad_norm": 16.375, "learning_rate": 4.944305281140578e-06, "loss": 1.5682768821716309, "step": 1316 }, { "epoch": 0.2399199053426777, "grad_norm": 4.90625, "learning_rate": 4.9441255926528755e-06, "loss": 1.3700590133666992, "step": 1318 }, { "epoch": 0.24028397196686993, "grad_norm": 11.25, "learning_rate": 4.943945618872621e-06, "loss": 0.9841552376747131, "step": 1320 }, { "epoch": 0.24064803859106215, "grad_norm": 92.0, "learning_rate": 4.943765359826226e-06, "loss": 1.9470609426498413, "step": 1322 }, { "epoch": 0.2410121052152544, "grad_norm": 27.375, "learning_rate": 4.943584815540141e-06, "loss": 1.7613348960876465, "step": 1324 }, { "epoch": 0.24137617183944662, "grad_norm": 12.1875, "learning_rate": 4.9434039860408615e-06, "loss": 1.5088882446289062, "step": 1326 }, { "epoch": 0.24174023846363885, "grad_norm": 8.125, "learning_rate": 4.943222871354922e-06, "loss": 1.4682915210723877, "step": 1328 }, { "epoch": 0.24210430508783107, "grad_norm": 9.0625, "learning_rate": 4.943041471508902e-06, "loss": 1.6899994611740112, "step": 1330 }, { "epoch": 0.2424683717120233, "grad_norm": 13.75, "learning_rate": 4.94285978652942e-06, "loss": 1.736012578010559, "step": 1332 }, { "epoch": 0.24283243833621554, "grad_norm": 28.375, "learning_rate": 4.942677816443139e-06, "loss": 1.5456054210662842, "step": 1334 }, { "epoch": 0.24319650496040776, "grad_norm": 10.125, "learning_rate": 4.942495561276761e-06, "loss": 1.5503977537155151, "step": 1336 }, { "epoch": 0.24356057158459998, "grad_norm": 12.875, "learning_rate": 4.942313021057031e-06, "loss": 1.6288011074066162, "step": 1338 }, { "epoch": 0.2439246382087922, "grad_norm": 15.6875, "learning_rate": 4.9421301958107385e-06, "loss": 1.4899564981460571, "step": 1340 }, { "epoch": 0.24428870483298443, "grad_norm": 19.5, "learning_rate": 4.941947085564709e-06, "loss": 1.503650188446045, "step": 1342 }, { "epoch": 0.24465277145717668, "grad_norm": 4.375, "learning_rate": 4.941763690345814e-06, "loss": 1.3934472799301147, "step": 1344 }, { "epoch": 0.2450168380813689, "grad_norm": 11.3125, "learning_rate": 4.941580010180969e-06, "loss": 1.5408638715744019, "step": 1346 }, { "epoch": 0.24538090470556112, "grad_norm": 15.125, "learning_rate": 4.941396045097124e-06, "loss": 1.8255137205123901, "step": 1348 }, { "epoch": 0.24574497132975334, "grad_norm": 13.0, "learning_rate": 4.941211795121278e-06, "loss": 1.5376935005187988, "step": 1350 }, { "epoch": 0.24610903795394556, "grad_norm": 16.125, "learning_rate": 4.941027260280468e-06, "loss": 1.6967862844467163, "step": 1352 }, { "epoch": 0.24647310457813779, "grad_norm": 8.9375, "learning_rate": 4.940842440601774e-06, "loss": 1.4979592561721802, "step": 1354 }, { "epoch": 0.24683717120233004, "grad_norm": 5.3125, "learning_rate": 4.940657336112317e-06, "loss": 1.3248778581619263, "step": 1356 }, { "epoch": 0.24720123782652226, "grad_norm": 14.4375, "learning_rate": 4.9404719468392615e-06, "loss": 1.4855530261993408, "step": 1358 }, { "epoch": 0.24756530445071448, "grad_norm": 24.625, "learning_rate": 4.940286272809811e-06, "loss": 1.5398468971252441, "step": 1360 }, { "epoch": 0.2479293710749067, "grad_norm": 5.5, "learning_rate": 4.940100314051214e-06, "loss": 0.8858805298805237, "step": 1362 }, { "epoch": 0.24829343769909892, "grad_norm": 10.5, "learning_rate": 4.9399140705907575e-06, "loss": 1.1802723407745361, "step": 1364 }, { "epoch": 0.24865750432329117, "grad_norm": 10.5, "learning_rate": 4.939727542455774e-06, "loss": 1.6216623783111572, "step": 1366 }, { "epoch": 0.2490215709474834, "grad_norm": 8.0, "learning_rate": 4.939540729673634e-06, "loss": 1.5625545978546143, "step": 1368 }, { "epoch": 0.24938563757167562, "grad_norm": 14.25, "learning_rate": 4.939353632271752e-06, "loss": 1.4306856393814087, "step": 1370 }, { "epoch": 0.24974970419586784, "grad_norm": 15.8125, "learning_rate": 4.939166250277584e-06, "loss": 1.5217489004135132, "step": 1372 }, { "epoch": 0.2501137708200601, "grad_norm": 9.0625, "learning_rate": 4.938978583718629e-06, "loss": 1.3186440467834473, "step": 1374 }, { "epoch": 0.2504778374442523, "grad_norm": 7.5625, "learning_rate": 4.9387906326224235e-06, "loss": 1.5230623483657837, "step": 1376 }, { "epoch": 0.25084190406844453, "grad_norm": 22.25, "learning_rate": 4.93860239701655e-06, "loss": 1.6378216743469238, "step": 1378 }, { "epoch": 0.2512059706926367, "grad_norm": 10.3125, "learning_rate": 4.93841387692863e-06, "loss": 1.406336784362793, "step": 1380 }, { "epoch": 0.251570037316829, "grad_norm": 6.65625, "learning_rate": 4.938225072386332e-06, "loss": 1.6698787212371826, "step": 1382 }, { "epoch": 0.2519341039410212, "grad_norm": 8.125, "learning_rate": 4.9380359834173575e-06, "loss": 1.4710549116134644, "step": 1384 }, { "epoch": 0.2522981705652134, "grad_norm": 18.25, "learning_rate": 4.937846610049457e-06, "loss": 1.6586592197418213, "step": 1386 }, { "epoch": 0.25266223718940567, "grad_norm": 8.0, "learning_rate": 4.93765695231042e-06, "loss": 1.4598597288131714, "step": 1388 }, { "epoch": 0.25302630381359786, "grad_norm": 12.3125, "learning_rate": 4.937467010228079e-06, "loss": 1.583903431892395, "step": 1390 }, { "epoch": 0.2533903704377901, "grad_norm": 12.75, "learning_rate": 4.9372767838303035e-06, "loss": 1.6516391038894653, "step": 1392 }, { "epoch": 0.25375443706198236, "grad_norm": 13.0625, "learning_rate": 4.937086273145014e-06, "loss": 2.0364017486572266, "step": 1394 }, { "epoch": 0.25411850368617456, "grad_norm": 14.125, "learning_rate": 4.936895478200162e-06, "loss": 1.8258267641067505, "step": 1396 }, { "epoch": 0.2544825703103668, "grad_norm": 12.5625, "learning_rate": 4.936704399023749e-06, "loss": 1.6076844930648804, "step": 1398 }, { "epoch": 0.254846636934559, "grad_norm": 8.3125, "learning_rate": 4.936513035643814e-06, "loss": 1.4629310369491577, "step": 1400 }, { "epoch": 0.25521070355875125, "grad_norm": 36.0, "learning_rate": 4.93632138808844e-06, "loss": 1.5793068408966064, "step": 1402 }, { "epoch": 0.2555747701829435, "grad_norm": 48.5, "learning_rate": 4.936129456385748e-06, "loss": 1.6921769380569458, "step": 1404 }, { "epoch": 0.2559388368071357, "grad_norm": 7.3125, "learning_rate": 4.935937240563906e-06, "loss": 1.2959438562393188, "step": 1406 }, { "epoch": 0.25630290343132794, "grad_norm": 11.5625, "learning_rate": 4.935744740651119e-06, "loss": 1.4843640327453613, "step": 1408 }, { "epoch": 0.25666697005552014, "grad_norm": 7.90625, "learning_rate": 4.935551956675636e-06, "loss": 1.3071551322937012, "step": 1410 }, { "epoch": 0.2570310366797124, "grad_norm": 8.0625, "learning_rate": 4.9353588886657486e-06, "loss": 1.5213901996612549, "step": 1412 }, { "epoch": 0.25739510330390464, "grad_norm": 10.4375, "learning_rate": 4.935165536649788e-06, "loss": 1.3940701484680176, "step": 1414 }, { "epoch": 0.25775916992809683, "grad_norm": 7.8125, "learning_rate": 4.934971900656125e-06, "loss": 1.3361718654632568, "step": 1416 }, { "epoch": 0.2581232365522891, "grad_norm": 21.25, "learning_rate": 4.934777980713178e-06, "loss": 1.164435625076294, "step": 1418 }, { "epoch": 0.2584873031764813, "grad_norm": 8.3125, "learning_rate": 4.934583776849404e-06, "loss": 1.395206332206726, "step": 1420 }, { "epoch": 0.2588513698006735, "grad_norm": 10.6875, "learning_rate": 4.934389289093301e-06, "loss": 1.7019124031066895, "step": 1422 }, { "epoch": 0.2592154364248658, "grad_norm": 18.625, "learning_rate": 4.93419451747341e-06, "loss": 2.0475809574127197, "step": 1424 }, { "epoch": 0.25957950304905797, "grad_norm": 12.5625, "learning_rate": 4.933999462018311e-06, "loss": 1.5055116415023804, "step": 1426 }, { "epoch": 0.2599435696732502, "grad_norm": 16.125, "learning_rate": 4.933804122756628e-06, "loss": 1.5948069095611572, "step": 1428 }, { "epoch": 0.2603076362974424, "grad_norm": 5.25, "learning_rate": 4.933608499717029e-06, "loss": 1.4785643815994263, "step": 1430 }, { "epoch": 0.26067170292163466, "grad_norm": 23.75, "learning_rate": 4.933412592928218e-06, "loss": 0.9416211247444153, "step": 1432 }, { "epoch": 0.2610357695458269, "grad_norm": 19.375, "learning_rate": 4.933216402418943e-06, "loss": 0.8640943765640259, "step": 1434 }, { "epoch": 0.2613998361700191, "grad_norm": 8.5, "learning_rate": 4.933019928217997e-06, "loss": 1.7342121601104736, "step": 1436 }, { "epoch": 0.26176390279421136, "grad_norm": 6.5, "learning_rate": 4.932823170354211e-06, "loss": 1.2391496896743774, "step": 1438 }, { "epoch": 0.26212796941840355, "grad_norm": 11.0, "learning_rate": 4.932626128856457e-06, "loss": 1.4514696598052979, "step": 1440 }, { "epoch": 0.2624920360425958, "grad_norm": 4.09375, "learning_rate": 4.932428803753651e-06, "loss": 1.6537638902664185, "step": 1442 }, { "epoch": 0.26285610266678805, "grad_norm": 5.65625, "learning_rate": 4.9322311950747495e-06, "loss": 1.1202993392944336, "step": 1444 }, { "epoch": 0.26322016929098024, "grad_norm": 12.125, "learning_rate": 4.93203330284875e-06, "loss": 1.2976114749908447, "step": 1446 }, { "epoch": 0.2635842359151725, "grad_norm": 10.75, "learning_rate": 4.931835127104694e-06, "loss": 1.507428526878357, "step": 1448 }, { "epoch": 0.2639483025393647, "grad_norm": 9.3125, "learning_rate": 4.931636667871662e-06, "loss": 1.4791808128356934, "step": 1450 }, { "epoch": 0.26431236916355694, "grad_norm": 13.8125, "learning_rate": 4.931437925178777e-06, "loss": 1.5374300479888916, "step": 1452 }, { "epoch": 0.26467643578774913, "grad_norm": 4.28125, "learning_rate": 4.931238899055204e-06, "loss": 1.3075952529907227, "step": 1454 }, { "epoch": 0.2650405024119414, "grad_norm": 15.1875, "learning_rate": 4.931039589530149e-06, "loss": 1.0934590101242065, "step": 1456 }, { "epoch": 0.26540456903613363, "grad_norm": 24.75, "learning_rate": 4.93083999663286e-06, "loss": 1.858839750289917, "step": 1458 }, { "epoch": 0.2657686356603258, "grad_norm": 64.5, "learning_rate": 4.930640120392628e-06, "loss": 1.4588422775268555, "step": 1460 }, { "epoch": 0.2661327022845181, "grad_norm": 19.625, "learning_rate": 4.930439960838781e-06, "loss": 1.5768492221832275, "step": 1462 }, { "epoch": 0.26649676890871027, "grad_norm": 5.96875, "learning_rate": 4.930239518000693e-06, "loss": 1.5265140533447266, "step": 1464 }, { "epoch": 0.2668608355329025, "grad_norm": 9.4375, "learning_rate": 4.93003879190778e-06, "loss": 1.426647424697876, "step": 1466 }, { "epoch": 0.26722490215709477, "grad_norm": 24.0, "learning_rate": 4.929837782589494e-06, "loss": 1.9441301822662354, "step": 1468 }, { "epoch": 0.26758896878128696, "grad_norm": 9.3125, "learning_rate": 4.9296364900753345e-06, "loss": 1.1998927593231201, "step": 1470 }, { "epoch": 0.2679530354054792, "grad_norm": 12.0625, "learning_rate": 4.929434914394842e-06, "loss": 1.5073827505111694, "step": 1472 }, { "epoch": 0.2683171020296714, "grad_norm": 9.5625, "learning_rate": 4.929233055577594e-06, "loss": 1.5349888801574707, "step": 1474 }, { "epoch": 0.26868116865386366, "grad_norm": 11.75, "learning_rate": 4.9290309136532136e-06, "loss": 1.5648607015609741, "step": 1476 }, { "epoch": 0.2690452352780559, "grad_norm": 10.8125, "learning_rate": 4.928828488651363e-06, "loss": 1.2843973636627197, "step": 1478 }, { "epoch": 0.2694093019022481, "grad_norm": 8.0, "learning_rate": 4.928625780601751e-06, "loss": 1.7909092903137207, "step": 1480 }, { "epoch": 0.26977336852644035, "grad_norm": 27.0, "learning_rate": 4.928422789534121e-06, "loss": 1.2462284564971924, "step": 1482 }, { "epoch": 0.27013743515063254, "grad_norm": 15.125, "learning_rate": 4.9282195154782605e-06, "loss": 1.5644384622573853, "step": 1484 }, { "epoch": 0.2705015017748248, "grad_norm": 18.25, "learning_rate": 4.928015958464002e-06, "loss": 1.4136170148849487, "step": 1486 }, { "epoch": 0.27086556839901704, "grad_norm": 10.375, "learning_rate": 4.927812118521215e-06, "loss": 1.8244026899337769, "step": 1488 }, { "epoch": 0.27122963502320924, "grad_norm": 14.625, "learning_rate": 4.927607995679812e-06, "loss": 1.8218878507614136, "step": 1490 }, { "epoch": 0.2715937016474015, "grad_norm": 15.75, "learning_rate": 4.927403589969747e-06, "loss": 1.861344575881958, "step": 1492 }, { "epoch": 0.2719577682715937, "grad_norm": 10.125, "learning_rate": 4.927198901421018e-06, "loss": 1.6647491455078125, "step": 1494 }, { "epoch": 0.27232183489578593, "grad_norm": 25.75, "learning_rate": 4.926993930063658e-06, "loss": 1.1363540887832642, "step": 1496 }, { "epoch": 0.2726859015199782, "grad_norm": 39.0, "learning_rate": 4.92678867592775e-06, "loss": 1.2200970649719238, "step": 1498 }, { "epoch": 0.2730499681441704, "grad_norm": 7.625, "learning_rate": 4.926583139043412e-06, "loss": 1.4953429698944092, "step": 1500 }, { "epoch": 0.2734140347683626, "grad_norm": 3.609375, "learning_rate": 4.926377319440806e-06, "loss": 0.9606925845146179, "step": 1502 }, { "epoch": 0.2737781013925548, "grad_norm": 16.125, "learning_rate": 4.926171217150135e-06, "loss": 1.4604928493499756, "step": 1504 }, { "epoch": 0.27414216801674707, "grad_norm": 34.25, "learning_rate": 4.925964832201644e-06, "loss": 1.8967225551605225, "step": 1506 }, { "epoch": 0.2745062346409393, "grad_norm": 15.25, "learning_rate": 4.925758164625619e-06, "loss": 1.5837507247924805, "step": 1508 }, { "epoch": 0.2748703012651315, "grad_norm": 9.875, "learning_rate": 4.925551214452389e-06, "loss": 1.5635753870010376, "step": 1510 }, { "epoch": 0.27523436788932376, "grad_norm": 9.3125, "learning_rate": 4.92534398171232e-06, "loss": 1.6564198732376099, "step": 1512 }, { "epoch": 0.27559843451351596, "grad_norm": 27.375, "learning_rate": 4.925136466435826e-06, "loss": 2.018310546875, "step": 1514 }, { "epoch": 0.2759625011377082, "grad_norm": 112.5, "learning_rate": 4.9249286686533575e-06, "loss": 1.6500873565673828, "step": 1516 }, { "epoch": 0.27632656776190045, "grad_norm": 12.0, "learning_rate": 4.924720588395406e-06, "loss": 1.3833949565887451, "step": 1518 }, { "epoch": 0.27669063438609265, "grad_norm": 5.28125, "learning_rate": 4.924512225692509e-06, "loss": 1.544885277748108, "step": 1520 }, { "epoch": 0.2770547010102849, "grad_norm": 15.0, "learning_rate": 4.924303580575244e-06, "loss": 1.1007860898971558, "step": 1522 }, { "epoch": 0.2774187676344771, "grad_norm": 33.5, "learning_rate": 4.924094653074226e-06, "loss": 1.2831106185913086, "step": 1524 }, { "epoch": 0.27778283425866934, "grad_norm": 11.625, "learning_rate": 4.9238854432201144e-06, "loss": 1.512636423110962, "step": 1526 }, { "epoch": 0.27814690088286154, "grad_norm": 9.9375, "learning_rate": 4.9236759510436125e-06, "loss": 1.6301493644714355, "step": 1528 }, { "epoch": 0.2785109675070538, "grad_norm": 64.5, "learning_rate": 4.92346617657546e-06, "loss": 1.8329832553863525, "step": 1530 }, { "epoch": 0.27887503413124604, "grad_norm": 33.5, "learning_rate": 4.923256119846441e-06, "loss": 1.811387300491333, "step": 1532 }, { "epoch": 0.27923910075543823, "grad_norm": 13.75, "learning_rate": 4.923045780887381e-06, "loss": 1.1624234914779663, "step": 1534 }, { "epoch": 0.2796031673796305, "grad_norm": 8.5625, "learning_rate": 4.922835159729145e-06, "loss": 1.4665863513946533, "step": 1536 }, { "epoch": 0.2799672340038227, "grad_norm": 12.5625, "learning_rate": 4.922624256402644e-06, "loss": 1.372377634048462, "step": 1538 }, { "epoch": 0.2803313006280149, "grad_norm": 8.25, "learning_rate": 4.922413070938823e-06, "loss": 0.9352389574050903, "step": 1540 }, { "epoch": 0.2806953672522072, "grad_norm": 12.5, "learning_rate": 4.922201603368676e-06, "loss": 1.4835054874420166, "step": 1542 }, { "epoch": 0.28105943387639937, "grad_norm": 9.9375, "learning_rate": 4.921989853723234e-06, "loss": 1.3877606391906738, "step": 1544 }, { "epoch": 0.2814235005005916, "grad_norm": 11.6875, "learning_rate": 4.921777822033569e-06, "loss": 0.9911926984786987, "step": 1546 }, { "epoch": 0.2817875671247838, "grad_norm": 11.0625, "learning_rate": 4.921565508330797e-06, "loss": 1.6913743019104004, "step": 1548 }, { "epoch": 0.28215163374897606, "grad_norm": 22.125, "learning_rate": 4.921352912646075e-06, "loss": 1.3811343908309937, "step": 1550 }, { "epoch": 0.2825157003731683, "grad_norm": 36.75, "learning_rate": 4.921140035010599e-06, "loss": 1.4110978841781616, "step": 1552 }, { "epoch": 0.2828797669973605, "grad_norm": 13.1875, "learning_rate": 4.920926875455608e-06, "loss": 1.5414402484893799, "step": 1554 }, { "epoch": 0.28324383362155275, "grad_norm": 23.25, "learning_rate": 4.920713434012384e-06, "loss": 1.4706974029541016, "step": 1556 }, { "epoch": 0.28360790024574495, "grad_norm": 9.9375, "learning_rate": 4.920499710712247e-06, "loss": 1.5283700227737427, "step": 1558 }, { "epoch": 0.2839719668699372, "grad_norm": 31.0, "learning_rate": 4.92028570558656e-06, "loss": 1.2847185134887695, "step": 1560 }, { "epoch": 0.28433603349412945, "grad_norm": 4.5625, "learning_rate": 4.9200714186667284e-06, "loss": 1.0840245485305786, "step": 1562 }, { "epoch": 0.28470010011832164, "grad_norm": 7.53125, "learning_rate": 4.919856849984198e-06, "loss": 1.274387240409851, "step": 1564 }, { "epoch": 0.2850641667425139, "grad_norm": 10.6875, "learning_rate": 4.9196419995704545e-06, "loss": 1.4173680543899536, "step": 1566 }, { "epoch": 0.2854282333667061, "grad_norm": 13.4375, "learning_rate": 4.919426867457028e-06, "loss": 1.5290443897247314, "step": 1568 }, { "epoch": 0.28579229999089834, "grad_norm": 7.71875, "learning_rate": 4.919211453675486e-06, "loss": 1.6210980415344238, "step": 1570 }, { "epoch": 0.2861563666150906, "grad_norm": 33.5, "learning_rate": 4.918995758257443e-06, "loss": 1.1563513278961182, "step": 1572 }, { "epoch": 0.2865204332392828, "grad_norm": 16.625, "learning_rate": 4.918779781234548e-06, "loss": 1.3691879510879517, "step": 1574 }, { "epoch": 0.28688449986347503, "grad_norm": 9.75, "learning_rate": 4.918563522638498e-06, "loss": 1.3036433458328247, "step": 1576 }, { "epoch": 0.2872485664876672, "grad_norm": 9.25, "learning_rate": 4.918346982501025e-06, "loss": 1.6119672060012817, "step": 1578 }, { "epoch": 0.2876126331118595, "grad_norm": 42.25, "learning_rate": 4.918130160853906e-06, "loss": 1.3039236068725586, "step": 1580 }, { "epoch": 0.2879766997360517, "grad_norm": 28.75, "learning_rate": 4.917913057728961e-06, "loss": 1.6524569988250732, "step": 1582 }, { "epoch": 0.2883407663602439, "grad_norm": 8.375, "learning_rate": 4.917695673158046e-06, "loss": 1.3654242753982544, "step": 1584 }, { "epoch": 0.28870483298443617, "grad_norm": 4.40625, "learning_rate": 4.9174780071730635e-06, "loss": 1.0268429517745972, "step": 1586 }, { "epoch": 0.28906889960862836, "grad_norm": 78.0, "learning_rate": 4.917260059805954e-06, "loss": 1.1582659482955933, "step": 1588 }, { "epoch": 0.2894329662328206, "grad_norm": 4.78125, "learning_rate": 4.917041831088702e-06, "loss": 1.3455580472946167, "step": 1590 }, { "epoch": 0.28979703285701286, "grad_norm": 29.375, "learning_rate": 4.916823321053329e-06, "loss": 1.1064903736114502, "step": 1592 }, { "epoch": 0.29016109948120505, "grad_norm": 12.3125, "learning_rate": 4.916604529731902e-06, "loss": 1.4881951808929443, "step": 1594 }, { "epoch": 0.2905251661053973, "grad_norm": 8.6875, "learning_rate": 4.916385457156528e-06, "loss": 1.4490782022476196, "step": 1596 }, { "epoch": 0.2908892327295895, "grad_norm": 7.71875, "learning_rate": 4.916166103359353e-06, "loss": 1.5107438564300537, "step": 1598 }, { "epoch": 0.29125329935378175, "grad_norm": 12.3125, "learning_rate": 4.91594646837257e-06, "loss": 1.3751193284988403, "step": 1600 }, { "epoch": 0.291617365977974, "grad_norm": 18.625, "learning_rate": 4.915726552228406e-06, "loss": 1.193555474281311, "step": 1602 }, { "epoch": 0.2919814326021662, "grad_norm": 9.375, "learning_rate": 4.915506354959135e-06, "loss": 0.6499952077865601, "step": 1604 }, { "epoch": 0.29234549922635844, "grad_norm": 5.125, "learning_rate": 4.915285876597069e-06, "loss": 1.266210675239563, "step": 1606 }, { "epoch": 0.29270956585055063, "grad_norm": 16.625, "learning_rate": 4.9150651171745635e-06, "loss": 1.6331934928894043, "step": 1608 }, { "epoch": 0.2930736324747429, "grad_norm": 18.375, "learning_rate": 4.914844076724012e-06, "loss": 1.424376130104065, "step": 1610 }, { "epoch": 0.2934376990989351, "grad_norm": 8.4375, "learning_rate": 4.914622755277852e-06, "loss": 1.4336001873016357, "step": 1612 }, { "epoch": 0.29380176572312733, "grad_norm": 8.3125, "learning_rate": 4.9144011528685635e-06, "loss": 1.5310535430908203, "step": 1614 }, { "epoch": 0.2941658323473196, "grad_norm": 6.875, "learning_rate": 4.9141792695286625e-06, "loss": 1.3885955810546875, "step": 1616 }, { "epoch": 0.29452989897151177, "grad_norm": 6.375, "learning_rate": 4.913957105290712e-06, "loss": 1.2195767164230347, "step": 1618 }, { "epoch": 0.294893965595704, "grad_norm": 4.9375, "learning_rate": 4.913734660187314e-06, "loss": 1.2497265338897705, "step": 1620 }, { "epoch": 0.2952580322198962, "grad_norm": 7.53125, "learning_rate": 4.913511934251109e-06, "loss": 1.3131662607192993, "step": 1622 }, { "epoch": 0.29562209884408847, "grad_norm": 8.75, "learning_rate": 4.913288927514782e-06, "loss": 1.388770580291748, "step": 1624 }, { "epoch": 0.2959861654682807, "grad_norm": 7.46875, "learning_rate": 4.91306564001106e-06, "loss": 1.2544684410095215, "step": 1626 }, { "epoch": 0.2963502320924729, "grad_norm": 12.25, "learning_rate": 4.912842071772708e-06, "loss": 1.6028367280960083, "step": 1628 }, { "epoch": 0.29671429871666516, "grad_norm": 16.75, "learning_rate": 4.912618222832534e-06, "loss": 1.8615212440490723, "step": 1630 }, { "epoch": 0.29707836534085735, "grad_norm": 9.25, "learning_rate": 4.912394093223386e-06, "loss": 1.0779528617858887, "step": 1632 }, { "epoch": 0.2974424319650496, "grad_norm": 27.375, "learning_rate": 4.912169682978156e-06, "loss": 1.3994189500808716, "step": 1634 }, { "epoch": 0.29780649858924185, "grad_norm": 7.9375, "learning_rate": 4.911944992129773e-06, "loss": 0.26486337184906006, "step": 1636 }, { "epoch": 0.29817056521343405, "grad_norm": 45.5, "learning_rate": 4.911720020711212e-06, "loss": 0.5341527462005615, "step": 1638 }, { "epoch": 0.2985346318376263, "grad_norm": 79.5, "learning_rate": 4.911494768755487e-06, "loss": 1.3533393144607544, "step": 1640 }, { "epoch": 0.2988986984618185, "grad_norm": 20.125, "learning_rate": 4.91126923629565e-06, "loss": 1.0230789184570312, "step": 1642 }, { "epoch": 0.29926276508601074, "grad_norm": 7.34375, "learning_rate": 4.911043423364797e-06, "loss": 1.595827341079712, "step": 1644 }, { "epoch": 0.299626831710203, "grad_norm": 7.5, "learning_rate": 4.9108173299960685e-06, "loss": 1.4891338348388672, "step": 1646 }, { "epoch": 0.2999908983343952, "grad_norm": 5.53125, "learning_rate": 4.910590956222639e-06, "loss": 0.9356772899627686, "step": 1648 }, { "epoch": 0.30035496495858743, "grad_norm": 7.65625, "learning_rate": 4.9103643020777304e-06, "loss": 1.5626338720321655, "step": 1650 }, { "epoch": 0.30071903158277963, "grad_norm": 8.3125, "learning_rate": 4.910137367594601e-06, "loss": 1.3338781595230103, "step": 1652 }, { "epoch": 0.3010830982069719, "grad_norm": 12.3125, "learning_rate": 4.909910152806556e-06, "loss": 1.1236662864685059, "step": 1654 }, { "epoch": 0.3014471648311641, "grad_norm": 21.625, "learning_rate": 4.9096826577469355e-06, "loss": 1.4933905601501465, "step": 1656 }, { "epoch": 0.3018112314553563, "grad_norm": 15.5, "learning_rate": 4.9094548824491254e-06, "loss": 1.4233638048171997, "step": 1658 }, { "epoch": 0.30217529807954857, "grad_norm": 26.625, "learning_rate": 4.909226826946548e-06, "loss": 0.7125585079193115, "step": 1660 }, { "epoch": 0.30253936470374077, "grad_norm": 33.25, "learning_rate": 4.908998491272673e-06, "loss": 1.870632290840149, "step": 1662 }, { "epoch": 0.302903431327933, "grad_norm": 9.4375, "learning_rate": 4.908769875461005e-06, "loss": 1.4414547681808472, "step": 1664 }, { "epoch": 0.30326749795212526, "grad_norm": 23.5, "learning_rate": 4.908540979545092e-06, "loss": 1.5465025901794434, "step": 1666 }, { "epoch": 0.30363156457631746, "grad_norm": 11.5, "learning_rate": 4.9083118035585266e-06, "loss": 1.4594249725341797, "step": 1668 }, { "epoch": 0.3039956312005097, "grad_norm": 21.25, "learning_rate": 4.908082347534937e-06, "loss": 1.4396207332611084, "step": 1670 }, { "epoch": 0.3043596978247019, "grad_norm": 11.75, "learning_rate": 4.907852611507995e-06, "loss": 0.8985010981559753, "step": 1672 }, { "epoch": 0.30472376444889415, "grad_norm": 14.125, "learning_rate": 4.907622595511416e-06, "loss": 1.1935596466064453, "step": 1674 }, { "epoch": 0.3050878310730864, "grad_norm": 5.5, "learning_rate": 4.90739229957895e-06, "loss": 0.6197656393051147, "step": 1676 }, { "epoch": 0.3054518976972786, "grad_norm": 7.1875, "learning_rate": 4.907161723744395e-06, "loss": 1.3275530338287354, "step": 1678 }, { "epoch": 0.30581596432147085, "grad_norm": 17.75, "learning_rate": 4.906930868041586e-06, "loss": 1.3283809423446655, "step": 1680 }, { "epoch": 0.30618003094566304, "grad_norm": 7.3125, "learning_rate": 4.906699732504401e-06, "loss": 1.6180342435836792, "step": 1682 }, { "epoch": 0.3065440975698553, "grad_norm": 5.8125, "learning_rate": 4.906468317166756e-06, "loss": 0.9833647608757019, "step": 1684 }, { "epoch": 0.3069081641940475, "grad_norm": 8.875, "learning_rate": 4.9062366220626125e-06, "loss": 1.3363556861877441, "step": 1686 }, { "epoch": 0.30727223081823973, "grad_norm": 8.5, "learning_rate": 4.9060046472259695e-06, "loss": 1.4134596586227417, "step": 1688 }, { "epoch": 0.307636297442432, "grad_norm": 5.84375, "learning_rate": 4.905772392690869e-06, "loss": 1.3034048080444336, "step": 1690 }, { "epoch": 0.3080003640666242, "grad_norm": 14.3125, "learning_rate": 4.905539858491394e-06, "loss": 1.1930062770843506, "step": 1692 }, { "epoch": 0.3083644306908164, "grad_norm": 14.8125, "learning_rate": 4.9053070446616666e-06, "loss": 1.4095170497894287, "step": 1694 }, { "epoch": 0.3087284973150086, "grad_norm": 36.0, "learning_rate": 4.905073951235853e-06, "loss": 1.3340262174606323, "step": 1696 }, { "epoch": 0.30909256393920087, "grad_norm": 20.125, "learning_rate": 4.9048405782481566e-06, "loss": 0.7319252490997314, "step": 1698 }, { "epoch": 0.3094566305633931, "grad_norm": 12.6875, "learning_rate": 4.904606925732826e-06, "loss": 1.0980385541915894, "step": 1700 }, { "epoch": 0.3098206971875853, "grad_norm": 16.0, "learning_rate": 4.904372993724146e-06, "loss": 1.401090383529663, "step": 1702 }, { "epoch": 0.31018476381177756, "grad_norm": 11.75, "learning_rate": 4.904138782256448e-06, "loss": 1.603826880455017, "step": 1704 }, { "epoch": 0.31054883043596976, "grad_norm": 13.125, "learning_rate": 4.9039042913641e-06, "loss": 1.4457886219024658, "step": 1706 }, { "epoch": 0.310912897060162, "grad_norm": 6.75, "learning_rate": 4.903669521081513e-06, "loss": 1.2897664308547974, "step": 1708 }, { "epoch": 0.31127696368435426, "grad_norm": 27.125, "learning_rate": 4.90343447144314e-06, "loss": 1.230724811553955, "step": 1710 }, { "epoch": 0.31164103030854645, "grad_norm": 10.3125, "learning_rate": 4.9031991424834716e-06, "loss": 1.1595288515090942, "step": 1712 }, { "epoch": 0.3120050969327387, "grad_norm": 6.5, "learning_rate": 4.902963534237042e-06, "loss": 1.276283621788025, "step": 1714 }, { "epoch": 0.3123691635569309, "grad_norm": 16.75, "learning_rate": 4.902727646738424e-06, "loss": 1.395148515701294, "step": 1716 }, { "epoch": 0.31273323018112315, "grad_norm": 7.59375, "learning_rate": 4.902491480022238e-06, "loss": 1.2297887802124023, "step": 1718 }, { "epoch": 0.3130972968053154, "grad_norm": 7.28125, "learning_rate": 4.9022550341231355e-06, "loss": 1.1767199039459229, "step": 1720 }, { "epoch": 0.3134613634295076, "grad_norm": 13.875, "learning_rate": 4.902018309075816e-06, "loss": 1.6155898571014404, "step": 1722 }, { "epoch": 0.31382543005369984, "grad_norm": 13.375, "learning_rate": 4.9017813049150185e-06, "loss": 1.811079502105713, "step": 1724 }, { "epoch": 0.31418949667789203, "grad_norm": 12.125, "learning_rate": 4.901544021675521e-06, "loss": 1.4806177616119385, "step": 1726 }, { "epoch": 0.3145535633020843, "grad_norm": 9.1875, "learning_rate": 4.9013064593921456e-06, "loss": 1.4905201196670532, "step": 1728 }, { "epoch": 0.31491762992627653, "grad_norm": 6.46875, "learning_rate": 4.901068618099752e-06, "loss": 1.1806972026824951, "step": 1730 }, { "epoch": 0.3152816965504687, "grad_norm": 24.5, "learning_rate": 4.900830497833243e-06, "loss": 1.4952486753463745, "step": 1732 }, { "epoch": 0.315645763174661, "grad_norm": 7.8125, "learning_rate": 4.9005920986275625e-06, "loss": 1.4671142101287842, "step": 1734 }, { "epoch": 0.31600982979885317, "grad_norm": 22.875, "learning_rate": 4.900353420517693e-06, "loss": 1.0870821475982666, "step": 1736 }, { "epoch": 0.3163738964230454, "grad_norm": 12.125, "learning_rate": 4.900114463538661e-06, "loss": 1.3239197731018066, "step": 1738 }, { "epoch": 0.31673796304723767, "grad_norm": 27.375, "learning_rate": 4.899875227725532e-06, "loss": 1.5688568353652954, "step": 1740 }, { "epoch": 0.31710202967142986, "grad_norm": 8.875, "learning_rate": 4.899635713113412e-06, "loss": 1.7790281772613525, "step": 1742 }, { "epoch": 0.3174660962956221, "grad_norm": 28.5, "learning_rate": 4.899395919737451e-06, "loss": 1.7641477584838867, "step": 1744 }, { "epoch": 0.3178301629198143, "grad_norm": 14.125, "learning_rate": 4.899155847632836e-06, "loss": 1.9671759605407715, "step": 1746 }, { "epoch": 0.31819422954400656, "grad_norm": 6.9375, "learning_rate": 4.898915496834796e-06, "loss": 1.2605907917022705, "step": 1748 }, { "epoch": 0.3185582961681988, "grad_norm": 6.71875, "learning_rate": 4.898674867378603e-06, "loss": 1.093799114227295, "step": 1750 }, { "epoch": 0.318922362792391, "grad_norm": 10.875, "learning_rate": 4.898433959299569e-06, "loss": 1.577582836151123, "step": 1752 }, { "epoch": 0.31928642941658325, "grad_norm": 16.5, "learning_rate": 4.898192772633043e-06, "loss": 1.3264565467834473, "step": 1754 }, { "epoch": 0.31965049604077544, "grad_norm": 14.0, "learning_rate": 4.897951307414423e-06, "loss": 1.5290167331695557, "step": 1756 }, { "epoch": 0.3200145626649677, "grad_norm": 30.125, "learning_rate": 4.897709563679138e-06, "loss": 1.2932648658752441, "step": 1758 }, { "epoch": 0.3203786292891599, "grad_norm": 8.3125, "learning_rate": 4.897467541462666e-06, "loss": 1.2476685047149658, "step": 1760 }, { "epoch": 0.32074269591335214, "grad_norm": 6.25, "learning_rate": 4.897225240800523e-06, "loss": 1.104599118232727, "step": 1762 }, { "epoch": 0.3211067625375444, "grad_norm": 7.28125, "learning_rate": 4.896982661728263e-06, "loss": 1.6615452766418457, "step": 1764 }, { "epoch": 0.3214708291617366, "grad_norm": 10.5, "learning_rate": 4.896739804281486e-06, "loss": 1.4220013618469238, "step": 1766 }, { "epoch": 0.32183489578592883, "grad_norm": 5.09375, "learning_rate": 4.89649666849583e-06, "loss": 1.2345436811447144, "step": 1768 }, { "epoch": 0.322198962410121, "grad_norm": 10.4375, "learning_rate": 4.896253254406973e-06, "loss": 1.4038641452789307, "step": 1770 }, { "epoch": 0.3225630290343133, "grad_norm": 12.125, "learning_rate": 4.8960095620506364e-06, "loss": 1.2185895442962646, "step": 1772 }, { "epoch": 0.3229270956585055, "grad_norm": 12.375, "learning_rate": 4.89576559146258e-06, "loss": 0.5059472322463989, "step": 1774 }, { "epoch": 0.3232911622826977, "grad_norm": 9.25, "learning_rate": 4.895521342678606e-06, "loss": 1.336120367050171, "step": 1776 }, { "epoch": 0.32365522890688997, "grad_norm": 8.5, "learning_rate": 4.895276815734558e-06, "loss": 1.6235952377319336, "step": 1778 }, { "epoch": 0.32401929553108216, "grad_norm": 18.125, "learning_rate": 4.895032010666316e-06, "loss": 1.5352451801300049, "step": 1780 }, { "epoch": 0.3243833621552744, "grad_norm": 19.625, "learning_rate": 4.894786927509808e-06, "loss": 1.733658790588379, "step": 1782 }, { "epoch": 0.32474742877946666, "grad_norm": 7.71875, "learning_rate": 4.894541566300996e-06, "loss": 1.5690175294876099, "step": 1784 }, { "epoch": 0.32511149540365886, "grad_norm": 10.625, "learning_rate": 4.894295927075888e-06, "loss": 1.2554556131362915, "step": 1786 }, { "epoch": 0.3254755620278511, "grad_norm": 21.25, "learning_rate": 4.894050009870529e-06, "loss": 1.7130169868469238, "step": 1788 }, { "epoch": 0.3258396286520433, "grad_norm": 22.125, "learning_rate": 4.893803814721007e-06, "loss": 1.9033994674682617, "step": 1790 }, { "epoch": 0.32620369527623555, "grad_norm": 14.3125, "learning_rate": 4.8935573416634515e-06, "loss": 1.6412822008132935, "step": 1792 }, { "epoch": 0.3265677619004278, "grad_norm": 13.5625, "learning_rate": 4.8933105907340285e-06, "loss": 1.238384485244751, "step": 1794 }, { "epoch": 0.32693182852462, "grad_norm": 17.875, "learning_rate": 4.893063561968951e-06, "loss": 0.8862287402153015, "step": 1796 }, { "epoch": 0.32729589514881224, "grad_norm": 15.625, "learning_rate": 4.892816255404465e-06, "loss": 0.7366995811462402, "step": 1798 }, { "epoch": 0.32765996177300444, "grad_norm": 14.875, "learning_rate": 4.8925686710768665e-06, "loss": 1.824641227722168, "step": 1800 }, { "epoch": 0.3280240283971967, "grad_norm": 10.75, "learning_rate": 4.892320809022484e-06, "loss": 1.4951683282852173, "step": 1802 }, { "epoch": 0.32838809502138894, "grad_norm": 12.1875, "learning_rate": 4.892072669277692e-06, "loss": 1.5310840606689453, "step": 1804 }, { "epoch": 0.32875216164558113, "grad_norm": 5.25, "learning_rate": 4.8918242518789046e-06, "loss": 1.3562496900558472, "step": 1806 }, { "epoch": 0.3291162282697734, "grad_norm": 5.21875, "learning_rate": 4.891575556862574e-06, "loss": 1.089478611946106, "step": 1808 }, { "epoch": 0.3294802948939656, "grad_norm": 7.09375, "learning_rate": 4.891326584265198e-06, "loss": 0.8965445756912231, "step": 1810 }, { "epoch": 0.3298443615181578, "grad_norm": 32.0, "learning_rate": 4.89107733412331e-06, "loss": 1.2015230655670166, "step": 1812 }, { "epoch": 0.3302084281423501, "grad_norm": 34.25, "learning_rate": 4.890827806473486e-06, "loss": 1.742699146270752, "step": 1814 }, { "epoch": 0.33057249476654227, "grad_norm": 13.3125, "learning_rate": 4.890578001352345e-06, "loss": 1.6372833251953125, "step": 1816 }, { "epoch": 0.3309365613907345, "grad_norm": 10.1875, "learning_rate": 4.890327918796543e-06, "loss": 1.4791233539581299, "step": 1818 }, { "epoch": 0.3313006280149267, "grad_norm": 12.0625, "learning_rate": 4.890077558842782e-06, "loss": 1.93343186378479, "step": 1820 }, { "epoch": 0.33166469463911896, "grad_norm": 12.25, "learning_rate": 4.889826921527797e-06, "loss": 1.443381905555725, "step": 1822 }, { "epoch": 0.3320287612633112, "grad_norm": 12.0625, "learning_rate": 4.889576006888372e-06, "loss": 1.3324389457702637, "step": 1824 }, { "epoch": 0.3323928278875034, "grad_norm": 223.0, "learning_rate": 4.8893248149613235e-06, "loss": 0.8466244339942932, "step": 1826 }, { "epoch": 0.33275689451169566, "grad_norm": 9.0625, "learning_rate": 4.889073345783517e-06, "loss": 0.9138334393501282, "step": 1828 }, { "epoch": 0.33312096113588785, "grad_norm": 8.8125, "learning_rate": 4.888821599391852e-06, "loss": 1.0970096588134766, "step": 1830 }, { "epoch": 0.3334850277600801, "grad_norm": 8.5625, "learning_rate": 4.888569575823272e-06, "loss": 1.583977222442627, "step": 1832 }, { "epoch": 0.33384909438427235, "grad_norm": 163.0, "learning_rate": 4.8883172751147615e-06, "loss": 1.3237519264221191, "step": 1834 }, { "epoch": 0.33421316100846454, "grad_norm": 11.5625, "learning_rate": 4.888064697303342e-06, "loss": 1.1429455280303955, "step": 1836 }, { "epoch": 0.3345772276326568, "grad_norm": 37.0, "learning_rate": 4.88781184242608e-06, "loss": 1.8034274578094482, "step": 1838 }, { "epoch": 0.334941294256849, "grad_norm": 2.421875, "learning_rate": 4.8875587105200816e-06, "loss": 0.9290924668312073, "step": 1840 }, { "epoch": 0.33530536088104124, "grad_norm": 18.25, "learning_rate": 4.8873053016224916e-06, "loss": 1.313586711883545, "step": 1842 }, { "epoch": 0.33566942750523343, "grad_norm": 9.9375, "learning_rate": 4.887051615770497e-06, "loss": 1.783752202987671, "step": 1844 }, { "epoch": 0.3360334941294257, "grad_norm": 36.0, "learning_rate": 4.886797653001326e-06, "loss": 1.207167625427246, "step": 1846 }, { "epoch": 0.33639756075361793, "grad_norm": 9.1875, "learning_rate": 4.886543413352245e-06, "loss": 1.6194645166397095, "step": 1848 }, { "epoch": 0.3367616273778101, "grad_norm": 13.25, "learning_rate": 4.886288896860565e-06, "loss": 1.6892048120498657, "step": 1850 }, { "epoch": 0.3371256940020024, "grad_norm": 12.25, "learning_rate": 4.886034103563633e-06, "loss": 1.4255638122558594, "step": 1852 }, { "epoch": 0.33748976062619457, "grad_norm": 22.0, "learning_rate": 4.885779033498838e-06, "loss": 1.5247515439987183, "step": 1854 }, { "epoch": 0.3378538272503868, "grad_norm": 12.0, "learning_rate": 4.885523686703615e-06, "loss": 1.8602932691574097, "step": 1856 }, { "epoch": 0.33821789387457907, "grad_norm": 8.9375, "learning_rate": 4.8852680632154305e-06, "loss": 1.2452037334442139, "step": 1858 }, { "epoch": 0.33858196049877126, "grad_norm": 18.375, "learning_rate": 4.8850121630718e-06, "loss": 0.8291829228401184, "step": 1860 }, { "epoch": 0.3389460271229635, "grad_norm": 13.6875, "learning_rate": 4.884755986310271e-06, "loss": 1.5668028593063354, "step": 1862 }, { "epoch": 0.3393100937471557, "grad_norm": 18.25, "learning_rate": 4.8844995329684416e-06, "loss": 1.7037612199783325, "step": 1864 }, { "epoch": 0.33967416037134796, "grad_norm": 11.6875, "learning_rate": 4.884242803083943e-06, "loss": 1.2408268451690674, "step": 1866 }, { "epoch": 0.3400382269955402, "grad_norm": 21.875, "learning_rate": 4.883985796694448e-06, "loss": 1.200797200202942, "step": 1868 }, { "epoch": 0.3404022936197324, "grad_norm": 10.875, "learning_rate": 4.883728513837672e-06, "loss": 1.2831562757492065, "step": 1870 }, { "epoch": 0.34076636024392465, "grad_norm": 14.4375, "learning_rate": 4.883470954551373e-06, "loss": 1.3389482498168945, "step": 1872 }, { "epoch": 0.34113042686811684, "grad_norm": 22.875, "learning_rate": 4.883213118873342e-06, "loss": 1.7483201026916504, "step": 1874 }, { "epoch": 0.3414944934923091, "grad_norm": 55.75, "learning_rate": 4.882955006841419e-06, "loss": 2.0661890506744385, "step": 1876 }, { "epoch": 0.34185856011650134, "grad_norm": 7.5, "learning_rate": 4.88269661849348e-06, "loss": 1.3901770114898682, "step": 1878 }, { "epoch": 0.34222262674069354, "grad_norm": 19.625, "learning_rate": 4.882437953867441e-06, "loss": 1.3201402425765991, "step": 1880 }, { "epoch": 0.3425866933648858, "grad_norm": 20.75, "learning_rate": 4.882179013001262e-06, "loss": 2.316580295562744, "step": 1882 }, { "epoch": 0.342950759989078, "grad_norm": 32.0, "learning_rate": 4.8819197959329404e-06, "loss": 1.2262027263641357, "step": 1884 }, { "epoch": 0.34331482661327023, "grad_norm": 11.4375, "learning_rate": 4.881660302700516e-06, "loss": 0.9581204652786255, "step": 1886 }, { "epoch": 0.3436788932374625, "grad_norm": 20.375, "learning_rate": 4.881400533342068e-06, "loss": 1.5597262382507324, "step": 1888 }, { "epoch": 0.3440429598616547, "grad_norm": 22.0, "learning_rate": 4.881140487895715e-06, "loss": 1.6273586750030518, "step": 1890 }, { "epoch": 0.3444070264858469, "grad_norm": 16.875, "learning_rate": 4.8808801663996195e-06, "loss": 1.4734547138214111, "step": 1892 }, { "epoch": 0.3447710931100391, "grad_norm": 14.375, "learning_rate": 4.880619568891982e-06, "loss": 1.6394697427749634, "step": 1894 }, { "epoch": 0.34513515973423137, "grad_norm": 21.875, "learning_rate": 4.880358695411045e-06, "loss": 1.5059329271316528, "step": 1896 }, { "epoch": 0.3454992263584236, "grad_norm": 6.28125, "learning_rate": 4.880097545995089e-06, "loss": 0.9710061550140381, "step": 1898 }, { "epoch": 0.3458632929826158, "grad_norm": 26.0, "learning_rate": 4.879836120682438e-06, "loss": 0.9650970697402954, "step": 1900 }, { "epoch": 0.34622735960680806, "grad_norm": 16.5, "learning_rate": 4.879574419511456e-06, "loss": 0.8568592071533203, "step": 1902 }, { "epoch": 0.34659142623100025, "grad_norm": 8.5, "learning_rate": 4.879312442520543e-06, "loss": 1.436322808265686, "step": 1904 }, { "epoch": 0.3469554928551925, "grad_norm": 16.125, "learning_rate": 4.879050189748147e-06, "loss": 1.510069727897644, "step": 1906 }, { "epoch": 0.34731955947938475, "grad_norm": 15.4375, "learning_rate": 4.878787661232749e-06, "loss": 1.3854396343231201, "step": 1908 }, { "epoch": 0.34768362610357695, "grad_norm": 12.0625, "learning_rate": 4.878524857012877e-06, "loss": 1.4114618301391602, "step": 1910 }, { "epoch": 0.3480476927277692, "grad_norm": 21.625, "learning_rate": 4.878261777127095e-06, "loss": 1.2796521186828613, "step": 1912 }, { "epoch": 0.3484117593519614, "grad_norm": 2.859375, "learning_rate": 4.877998421614009e-06, "loss": 0.7274551391601562, "step": 1914 }, { "epoch": 0.34877582597615364, "grad_norm": 4.9375, "learning_rate": 4.877734790512265e-06, "loss": 1.4244027137756348, "step": 1916 }, { "epoch": 0.34913989260034584, "grad_norm": 5.3125, "learning_rate": 4.877470883860551e-06, "loss": 1.1214369535446167, "step": 1918 }, { "epoch": 0.3495039592245381, "grad_norm": 11.3125, "learning_rate": 4.877206701697594e-06, "loss": 1.326357364654541, "step": 1920 }, { "epoch": 0.34986802584873034, "grad_norm": 7.28125, "learning_rate": 4.8769422440621606e-06, "loss": 1.282468557357788, "step": 1922 }, { "epoch": 0.35023209247292253, "grad_norm": 7.0625, "learning_rate": 4.876677510993058e-06, "loss": 0.9851529598236084, "step": 1924 }, { "epoch": 0.3505961590971148, "grad_norm": 16.5, "learning_rate": 4.876412502529138e-06, "loss": 1.5737724304199219, "step": 1926 }, { "epoch": 0.350960225721307, "grad_norm": 9.625, "learning_rate": 4.876147218709287e-06, "loss": 1.3451647758483887, "step": 1928 }, { "epoch": 0.3513242923454992, "grad_norm": 11.125, "learning_rate": 4.875881659572436e-06, "loss": 1.3311004638671875, "step": 1930 }, { "epoch": 0.3516883589696915, "grad_norm": 7.34375, "learning_rate": 4.875615825157553e-06, "loss": 1.3036038875579834, "step": 1932 }, { "epoch": 0.35205242559388367, "grad_norm": 17.375, "learning_rate": 4.875349715503648e-06, "loss": 1.5920460224151611, "step": 1934 }, { "epoch": 0.3524164922180759, "grad_norm": 15.875, "learning_rate": 4.875083330649774e-06, "loss": 1.7447134256362915, "step": 1936 }, { "epoch": 0.3527805588422681, "grad_norm": 23.375, "learning_rate": 4.874816670635019e-06, "loss": 1.7228376865386963, "step": 1938 }, { "epoch": 0.35314462546646036, "grad_norm": 25.375, "learning_rate": 4.874549735498516e-06, "loss": 1.0123932361602783, "step": 1940 }, { "epoch": 0.3535086920906526, "grad_norm": 19.0, "learning_rate": 4.8742825252794354e-06, "loss": 1.7268915176391602, "step": 1942 }, { "epoch": 0.3538727587148448, "grad_norm": 22.75, "learning_rate": 4.874015040016991e-06, "loss": 2.2342190742492676, "step": 1944 }, { "epoch": 0.35423682533903705, "grad_norm": 24.0, "learning_rate": 4.8737472797504345e-06, "loss": 1.5835553407669067, "step": 1946 }, { "epoch": 0.35460089196322925, "grad_norm": 9.25, "learning_rate": 4.873479244519058e-06, "loss": 1.56566321849823, "step": 1948 }, { "epoch": 0.3549649585874215, "grad_norm": 21.625, "learning_rate": 4.873210934362195e-06, "loss": 1.720061182975769, "step": 1950 }, { "epoch": 0.35532902521161375, "grad_norm": 16.875, "learning_rate": 4.8729423493192185e-06, "loss": 1.7763686180114746, "step": 1952 }, { "epoch": 0.35569309183580594, "grad_norm": 11.5625, "learning_rate": 4.872673489429542e-06, "loss": 1.5924266576766968, "step": 1954 }, { "epoch": 0.3560571584599982, "grad_norm": 11.875, "learning_rate": 4.872404354732621e-06, "loss": 1.2784820795059204, "step": 1956 }, { "epoch": 0.3564212250841904, "grad_norm": 15.3125, "learning_rate": 4.87213494526795e-06, "loss": 1.552825927734375, "step": 1958 }, { "epoch": 0.35678529170838263, "grad_norm": 15.5, "learning_rate": 4.8718652610750615e-06, "loss": 1.4479613304138184, "step": 1960 }, { "epoch": 0.3571493583325749, "grad_norm": 34.75, "learning_rate": 4.871595302193533e-06, "loss": 1.6408932209014893, "step": 1962 }, { "epoch": 0.3575134249567671, "grad_norm": 16.375, "learning_rate": 4.871325068662978e-06, "loss": 1.9525874853134155, "step": 1964 }, { "epoch": 0.35787749158095933, "grad_norm": 7.625, "learning_rate": 4.871054560523053e-06, "loss": 1.3724558353424072, "step": 1966 }, { "epoch": 0.3582415582051515, "grad_norm": 20.0, "learning_rate": 4.870783777813454e-06, "loss": 1.6303695440292358, "step": 1968 }, { "epoch": 0.35860562482934377, "grad_norm": 8.8125, "learning_rate": 4.870512720573918e-06, "loss": 1.210604190826416, "step": 1970 }, { "epoch": 0.358969691453536, "grad_norm": 12.25, "learning_rate": 4.87024138884422e-06, "loss": 0.6176000833511353, "step": 1972 }, { "epoch": 0.3593337580777282, "grad_norm": 14.1875, "learning_rate": 4.869969782664178e-06, "loss": 1.2780102491378784, "step": 1974 }, { "epoch": 0.35969782470192047, "grad_norm": 13.0, "learning_rate": 4.869697902073648e-06, "loss": 1.7059063911437988, "step": 1976 }, { "epoch": 0.36006189132611266, "grad_norm": 14.375, "learning_rate": 4.869425747112528e-06, "loss": 1.1936110258102417, "step": 1978 }, { "epoch": 0.3604259579503049, "grad_norm": 21.125, "learning_rate": 4.869153317820757e-06, "loss": 1.492185354232788, "step": 1980 }, { "epoch": 0.36079002457449716, "grad_norm": 8.0, "learning_rate": 4.8688806142383105e-06, "loss": 1.5416659116744995, "step": 1982 }, { "epoch": 0.36115409119868935, "grad_norm": 11.375, "learning_rate": 4.868607636405208e-06, "loss": 1.7989057302474976, "step": 1984 }, { "epoch": 0.3615181578228816, "grad_norm": 25.375, "learning_rate": 4.868334384361508e-06, "loss": 1.651280164718628, "step": 1986 }, { "epoch": 0.3618822244470738, "grad_norm": 13.625, "learning_rate": 4.8680608581473085e-06, "loss": 1.0221983194351196, "step": 1988 }, { "epoch": 0.36224629107126605, "grad_norm": 22.875, "learning_rate": 4.867787057802749e-06, "loss": 1.549391269683838, "step": 1990 }, { "epoch": 0.3626103576954583, "grad_norm": 11.6875, "learning_rate": 4.867512983368009e-06, "loss": 0.7112131714820862, "step": 1992 }, { "epoch": 0.3629744243196505, "grad_norm": 12.0, "learning_rate": 4.867238634883305e-06, "loss": 1.44002366065979, "step": 1994 }, { "epoch": 0.36333849094384274, "grad_norm": 11.8125, "learning_rate": 4.8669640123889e-06, "loss": 1.8782907724380493, "step": 1996 }, { "epoch": 0.36370255756803493, "grad_norm": 11.1875, "learning_rate": 4.866689115925093e-06, "loss": 1.585323452949524, "step": 1998 }, { "epoch": 0.3640666241922272, "grad_norm": 10.1875, "learning_rate": 4.866413945532221e-06, "loss": 1.1778922080993652, "step": 2000 }, { "epoch": 0.3644306908164194, "grad_norm": 17.875, "learning_rate": 4.866138501250669e-06, "loss": 0.9781596660614014, "step": 2002 }, { "epoch": 0.36479475744061163, "grad_norm": 7.90625, "learning_rate": 4.865862783120853e-06, "loss": 1.3375802040100098, "step": 2004 }, { "epoch": 0.3651588240648039, "grad_norm": 2.953125, "learning_rate": 4.865586791183236e-06, "loss": 1.044349193572998, "step": 2006 }, { "epoch": 0.36552289068899607, "grad_norm": 8.8125, "learning_rate": 4.865310525478318e-06, "loss": 1.2266474962234497, "step": 2008 }, { "epoch": 0.3658869573131883, "grad_norm": 10.5625, "learning_rate": 4.865033986046639e-06, "loss": 1.4242433309555054, "step": 2010 }, { "epoch": 0.3662510239373805, "grad_norm": 9.0625, "learning_rate": 4.8647571729287814e-06, "loss": 1.3591541051864624, "step": 2012 }, { "epoch": 0.36661509056157277, "grad_norm": 8.8125, "learning_rate": 4.864480086165365e-06, "loss": 1.4692587852478027, "step": 2014 }, { "epoch": 0.366979157185765, "grad_norm": 6.03125, "learning_rate": 4.864202725797053e-06, "loss": 1.2231907844543457, "step": 2016 }, { "epoch": 0.3673432238099572, "grad_norm": 19.25, "learning_rate": 4.863925091864547e-06, "loss": 1.5058244466781616, "step": 2018 }, { "epoch": 0.36770729043414946, "grad_norm": 11.0, "learning_rate": 4.863647184408585e-06, "loss": 1.4587091207504272, "step": 2020 }, { "epoch": 0.36807135705834165, "grad_norm": 19.125, "learning_rate": 4.8633690034699536e-06, "loss": 1.0562090873718262, "step": 2022 }, { "epoch": 0.3684354236825339, "grad_norm": 5.78125, "learning_rate": 4.863090549089472e-06, "loss": 1.3666481971740723, "step": 2024 }, { "epoch": 0.36879949030672615, "grad_norm": 11.375, "learning_rate": 4.862811821308002e-06, "loss": 1.3786983489990234, "step": 2026 }, { "epoch": 0.36916355693091835, "grad_norm": 11.25, "learning_rate": 4.862532820166447e-06, "loss": 1.3864390850067139, "step": 2028 }, { "epoch": 0.3695276235551106, "grad_norm": 14.1875, "learning_rate": 4.86225354570575e-06, "loss": 1.4415440559387207, "step": 2030 }, { "epoch": 0.3698916901793028, "grad_norm": 27.875, "learning_rate": 4.8619739979668904e-06, "loss": 1.4659253358840942, "step": 2032 }, { "epoch": 0.37025575680349504, "grad_norm": 6.625, "learning_rate": 4.8616941769908935e-06, "loss": 1.4282618761062622, "step": 2034 }, { "epoch": 0.3706198234276873, "grad_norm": 7.875, "learning_rate": 4.86141408281882e-06, "loss": 1.1804873943328857, "step": 2036 }, { "epoch": 0.3709838900518795, "grad_norm": 5.90625, "learning_rate": 4.861133715491773e-06, "loss": 1.314592957496643, "step": 2038 }, { "epoch": 0.37134795667607173, "grad_norm": 7.125, "learning_rate": 4.860853075050899e-06, "loss": 1.4239892959594727, "step": 2040 }, { "epoch": 0.3717120233002639, "grad_norm": 13.375, "learning_rate": 4.8605721615373744e-06, "loss": 1.5178523063659668, "step": 2042 }, { "epoch": 0.3720760899244562, "grad_norm": 30.5, "learning_rate": 4.8602909749924265e-06, "loss": 1.6570950746536255, "step": 2044 }, { "epoch": 0.3724401565486484, "grad_norm": 12.625, "learning_rate": 4.8600095154573164e-06, "loss": 1.35731840133667, "step": 2046 }, { "epoch": 0.3728042231728406, "grad_norm": 23.25, "learning_rate": 4.859727782973349e-06, "loss": 1.615955114364624, "step": 2048 }, { "epoch": 0.37316828979703287, "grad_norm": 14.625, "learning_rate": 4.859445777581866e-06, "loss": 1.263051152229309, "step": 2050 }, { "epoch": 0.37353235642122506, "grad_norm": 26.75, "learning_rate": 4.859163499324251e-06, "loss": 1.0694100856781006, "step": 2052 }, { "epoch": 0.3738964230454173, "grad_norm": 9.3125, "learning_rate": 4.858880948241926e-06, "loss": 1.2078285217285156, "step": 2054 }, { "epoch": 0.37426048966960956, "grad_norm": 13.4375, "learning_rate": 4.858598124376356e-06, "loss": 1.5817310810089111, "step": 2056 }, { "epoch": 0.37462455629380176, "grad_norm": 7.0, "learning_rate": 4.858315027769044e-06, "loss": 1.4974950551986694, "step": 2058 }, { "epoch": 0.374988622917994, "grad_norm": 17.75, "learning_rate": 4.8580316584615315e-06, "loss": 1.8317619562149048, "step": 2060 }, { "epoch": 0.3753526895421862, "grad_norm": 17.625, "learning_rate": 4.857748016495404e-06, "loss": 1.7796279191970825, "step": 2062 }, { "epoch": 0.37571675616637845, "grad_norm": 15.0, "learning_rate": 4.857464101912282e-06, "loss": 1.926640510559082, "step": 2064 }, { "epoch": 0.3760808227905707, "grad_norm": 8.3125, "learning_rate": 4.8571799147538335e-06, "loss": 1.4101649522781372, "step": 2066 }, { "epoch": 0.3764448894147629, "grad_norm": 17.0, "learning_rate": 4.856895455061757e-06, "loss": 1.7048012018203735, "step": 2068 }, { "epoch": 0.37680895603895515, "grad_norm": 103.5, "learning_rate": 4.856610722877799e-06, "loss": 1.936248779296875, "step": 2070 }, { "epoch": 0.37717302266314734, "grad_norm": 13.0, "learning_rate": 4.856325718243742e-06, "loss": 1.5187864303588867, "step": 2072 }, { "epoch": 0.3775370892873396, "grad_norm": 8.4375, "learning_rate": 4.856040441201407e-06, "loss": 1.6555273532867432, "step": 2074 }, { "epoch": 0.3779011559115318, "grad_norm": 12.4375, "learning_rate": 4.85575489179266e-06, "loss": 0.9584940075874329, "step": 2076 }, { "epoch": 0.37826522253572403, "grad_norm": 10.0625, "learning_rate": 4.8554690700594034e-06, "loss": 0.6771257519721985, "step": 2078 }, { "epoch": 0.3786292891599163, "grad_norm": 2.71875, "learning_rate": 4.855182976043581e-06, "loss": 1.0531089305877686, "step": 2080 }, { "epoch": 0.3789933557841085, "grad_norm": 11.25, "learning_rate": 4.854896609787174e-06, "loss": 1.087500810623169, "step": 2082 }, { "epoch": 0.3793574224083007, "grad_norm": 20.125, "learning_rate": 4.854609971332208e-06, "loss": 1.4262564182281494, "step": 2084 }, { "epoch": 0.3797214890324929, "grad_norm": 10.25, "learning_rate": 4.854323060720743e-06, "loss": 1.5136618614196777, "step": 2086 }, { "epoch": 0.38008555565668517, "grad_norm": 33.5, "learning_rate": 4.854035877994886e-06, "loss": 1.4735723733901978, "step": 2088 }, { "epoch": 0.3804496222808774, "grad_norm": 6.75, "learning_rate": 4.8537484231967766e-06, "loss": 0.898356556892395, "step": 2090 }, { "epoch": 0.3808136889050696, "grad_norm": 13.0625, "learning_rate": 4.853460696368599e-06, "loss": 1.290900468826294, "step": 2092 }, { "epoch": 0.38117775552926186, "grad_norm": 10.625, "learning_rate": 4.853172697552575e-06, "loss": 1.8041828870773315, "step": 2094 }, { "epoch": 0.38154182215345406, "grad_norm": 9.8125, "learning_rate": 4.8528844267909685e-06, "loss": 1.5967042446136475, "step": 2096 }, { "epoch": 0.3819058887776463, "grad_norm": 8.5625, "learning_rate": 4.8525958841260815e-06, "loss": 1.62123703956604, "step": 2098 }, { "epoch": 0.38226995540183856, "grad_norm": 9.25, "learning_rate": 4.852307069600256e-06, "loss": 1.348247766494751, "step": 2100 }, { "epoch": 0.38263402202603075, "grad_norm": 14.875, "learning_rate": 4.852017983255874e-06, "loss": 1.467861294746399, "step": 2102 }, { "epoch": 0.382998088650223, "grad_norm": 18.625, "learning_rate": 4.8517286251353606e-06, "loss": 1.3362497091293335, "step": 2104 }, { "epoch": 0.3833621552744152, "grad_norm": 13.0, "learning_rate": 4.851438995281173e-06, "loss": 1.0168746709823608, "step": 2106 }, { "epoch": 0.38372622189860744, "grad_norm": 11.3125, "learning_rate": 4.8511490937358185e-06, "loss": 1.527422547340393, "step": 2108 }, { "epoch": 0.3840902885227997, "grad_norm": 8.8125, "learning_rate": 4.850858920541836e-06, "loss": 1.5718482732772827, "step": 2110 }, { "epoch": 0.3844543551469919, "grad_norm": 10.1875, "learning_rate": 4.850568475741807e-06, "loss": 1.1038970947265625, "step": 2112 }, { "epoch": 0.38481842177118414, "grad_norm": 23.875, "learning_rate": 4.850277759378355e-06, "loss": 1.102405309677124, "step": 2114 }, { "epoch": 0.38518248839537633, "grad_norm": 17.0, "learning_rate": 4.849986771494139e-06, "loss": 1.7098726034164429, "step": 2116 }, { "epoch": 0.3855465550195686, "grad_norm": 8.125, "learning_rate": 4.849695512131864e-06, "loss": 1.6488326787948608, "step": 2118 }, { "epoch": 0.38591062164376083, "grad_norm": 78.0, "learning_rate": 4.849403981334267e-06, "loss": 1.7363027334213257, "step": 2120 }, { "epoch": 0.386274688267953, "grad_norm": 19.0, "learning_rate": 4.8491121791441295e-06, "loss": 1.879165530204773, "step": 2122 }, { "epoch": 0.3866387548921453, "grad_norm": 13.625, "learning_rate": 4.848820105604275e-06, "loss": 1.5834972858428955, "step": 2124 }, { "epoch": 0.38700282151633747, "grad_norm": 10.0, "learning_rate": 4.8485277607575636e-06, "loss": 1.5458858013153076, "step": 2126 }, { "epoch": 0.3873668881405297, "grad_norm": 8.6875, "learning_rate": 4.848235144646893e-06, "loss": 1.4922245740890503, "step": 2128 }, { "epoch": 0.38773095476472197, "grad_norm": 5.3125, "learning_rate": 4.847942257315206e-06, "loss": 1.1346415281295776, "step": 2130 }, { "epoch": 0.38809502138891416, "grad_norm": 10.75, "learning_rate": 4.847649098805482e-06, "loss": 1.6507272720336914, "step": 2132 }, { "epoch": 0.3884590880131064, "grad_norm": 15.625, "learning_rate": 4.847355669160739e-06, "loss": 1.5805333852767944, "step": 2134 }, { "epoch": 0.3888231546372986, "grad_norm": 14.6875, "learning_rate": 4.84706196842404e-06, "loss": 1.5614655017852783, "step": 2136 }, { "epoch": 0.38918722126149086, "grad_norm": 14.1875, "learning_rate": 4.846767996638482e-06, "loss": 1.829200029373169, "step": 2138 }, { "epoch": 0.3895512878856831, "grad_norm": 12.25, "learning_rate": 4.846473753847204e-06, "loss": 1.190377950668335, "step": 2140 }, { "epoch": 0.3899153545098753, "grad_norm": 13.9375, "learning_rate": 4.8461792400933874e-06, "loss": 1.2341125011444092, "step": 2142 }, { "epoch": 0.39027942113406755, "grad_norm": 14.1875, "learning_rate": 4.8458844554202474e-06, "loss": 1.4373302459716797, "step": 2144 }, { "epoch": 0.39064348775825974, "grad_norm": 15.5, "learning_rate": 4.8455893998710455e-06, "loss": 1.9258980751037598, "step": 2146 }, { "epoch": 0.391007554382452, "grad_norm": 12.0, "learning_rate": 4.8452940734890785e-06, "loss": 1.5115244388580322, "step": 2148 }, { "epoch": 0.39137162100664424, "grad_norm": 9.5, "learning_rate": 4.844998476317685e-06, "loss": 1.146047592163086, "step": 2150 }, { "epoch": 0.39173568763083644, "grad_norm": 7.78125, "learning_rate": 4.844702608400241e-06, "loss": 1.5512419939041138, "step": 2152 }, { "epoch": 0.3920997542550287, "grad_norm": 7.46875, "learning_rate": 4.844406469780166e-06, "loss": 1.4744033813476562, "step": 2154 }, { "epoch": 0.3924638208792209, "grad_norm": 8.25, "learning_rate": 4.844110060500916e-06, "loss": 1.34213125705719, "step": 2156 }, { "epoch": 0.39282788750341313, "grad_norm": 13.875, "learning_rate": 4.843813380605989e-06, "loss": 1.1744656562805176, "step": 2158 }, { "epoch": 0.3931919541276053, "grad_norm": 17.625, "learning_rate": 4.84351643013892e-06, "loss": 2.003048896789551, "step": 2160 }, { "epoch": 0.3935560207517976, "grad_norm": 15.375, "learning_rate": 4.843219209143286e-06, "loss": 1.7268306016921997, "step": 2162 }, { "epoch": 0.3939200873759898, "grad_norm": 54.75, "learning_rate": 4.842921717662703e-06, "loss": 1.2337154150009155, "step": 2164 }, { "epoch": 0.394284154000182, "grad_norm": 17.25, "learning_rate": 4.842623955740826e-06, "loss": 1.8911058902740479, "step": 2166 }, { "epoch": 0.39464822062437427, "grad_norm": 10.3125, "learning_rate": 4.842325923421353e-06, "loss": 1.064198613166809, "step": 2168 }, { "epoch": 0.39501228724856646, "grad_norm": 8.125, "learning_rate": 4.842027620748014e-06, "loss": 1.4203481674194336, "step": 2170 }, { "epoch": 0.3953763538727587, "grad_norm": 10.375, "learning_rate": 4.841729047764589e-06, "loss": 1.555790901184082, "step": 2172 }, { "epoch": 0.39574042049695096, "grad_norm": 25.25, "learning_rate": 4.84143020451489e-06, "loss": 0.8090052604675293, "step": 2174 }, { "epoch": 0.39610448712114316, "grad_norm": 9.4375, "learning_rate": 4.8411310910427704e-06, "loss": 0.7757471203804016, "step": 2176 }, { "epoch": 0.3964685537453354, "grad_norm": 9.75, "learning_rate": 4.840831707392125e-06, "loss": 1.5251039266586304, "step": 2178 }, { "epoch": 0.3968326203695276, "grad_norm": 19.375, "learning_rate": 4.840532053606887e-06, "loss": 1.5991133451461792, "step": 2180 }, { "epoch": 0.39719668699371985, "grad_norm": 12.5625, "learning_rate": 4.840232129731028e-06, "loss": 1.8887784481048584, "step": 2182 }, { "epoch": 0.3975607536179121, "grad_norm": 11.8125, "learning_rate": 4.839931935808563e-06, "loss": 1.771515130996704, "step": 2184 }, { "epoch": 0.3979248202421043, "grad_norm": 47.0, "learning_rate": 4.839631471883542e-06, "loss": 0.9939151406288147, "step": 2186 }, { "epoch": 0.39828888686629654, "grad_norm": 8.125, "learning_rate": 4.83933073800006e-06, "loss": 1.5242818593978882, "step": 2188 }, { "epoch": 0.39865295349048874, "grad_norm": 11.8125, "learning_rate": 4.839029734202244e-06, "loss": 1.2641445398330688, "step": 2190 }, { "epoch": 0.399017020114681, "grad_norm": 4.28125, "learning_rate": 4.838728460534269e-06, "loss": 1.1251391172409058, "step": 2192 }, { "epoch": 0.39938108673887324, "grad_norm": 16.25, "learning_rate": 4.838426917040343e-06, "loss": 1.315170168876648, "step": 2194 }, { "epoch": 0.39974515336306543, "grad_norm": 100.5, "learning_rate": 4.838125103764717e-06, "loss": 1.6331381797790527, "step": 2196 }, { "epoch": 0.4001092199872577, "grad_norm": 10.25, "learning_rate": 4.837823020751683e-06, "loss": 0.7378606200218201, "step": 2198 }, { "epoch": 0.4004732866114499, "grad_norm": 14.5625, "learning_rate": 4.837520668045568e-06, "loss": 1.553528904914856, "step": 2200 }, { "epoch": 0.4008373532356421, "grad_norm": 9.0, "learning_rate": 4.837218045690741e-06, "loss": 1.4627171754837036, "step": 2202 }, { "epoch": 0.4012014198598344, "grad_norm": 8.375, "learning_rate": 4.8369151537316125e-06, "loss": 1.2701606750488281, "step": 2204 }, { "epoch": 0.40156548648402657, "grad_norm": 5.03125, "learning_rate": 4.836611992212629e-06, "loss": 1.3050543069839478, "step": 2206 }, { "epoch": 0.4019295531082188, "grad_norm": 7.3125, "learning_rate": 4.836308561178279e-06, "loss": 1.1303834915161133, "step": 2208 }, { "epoch": 0.402293619732411, "grad_norm": 14.3125, "learning_rate": 4.836004860673089e-06, "loss": 1.367389440536499, "step": 2210 }, { "epoch": 0.40265768635660326, "grad_norm": 36.0, "learning_rate": 4.835700890741627e-06, "loss": 1.5948712825775146, "step": 2212 }, { "epoch": 0.4030217529807955, "grad_norm": 20.25, "learning_rate": 4.835396651428499e-06, "loss": 1.7656869888305664, "step": 2214 }, { "epoch": 0.4033858196049877, "grad_norm": 16.75, "learning_rate": 4.835092142778349e-06, "loss": 1.8960367441177368, "step": 2216 }, { "epoch": 0.40374988622917996, "grad_norm": 22.0, "learning_rate": 4.834787364835866e-06, "loss": 2.0225956439971924, "step": 2218 }, { "epoch": 0.40411395285337215, "grad_norm": 9.25, "learning_rate": 4.834482317645772e-06, "loss": 1.5366857051849365, "step": 2220 }, { "epoch": 0.4044780194775644, "grad_norm": 8.375, "learning_rate": 4.834177001252832e-06, "loss": 1.5475716590881348, "step": 2222 }, { "epoch": 0.40484208610175665, "grad_norm": 8.625, "learning_rate": 4.833871415701852e-06, "loss": 1.2871226072311401, "step": 2224 }, { "epoch": 0.40520615272594884, "grad_norm": 9.0, "learning_rate": 4.833565561037672e-06, "loss": 1.3163812160491943, "step": 2226 }, { "epoch": 0.4055702193501411, "grad_norm": 11.5625, "learning_rate": 4.833259437305178e-06, "loss": 1.3300800323486328, "step": 2228 }, { "epoch": 0.4059342859743333, "grad_norm": 9.4375, "learning_rate": 4.83295304454929e-06, "loss": 1.154212236404419, "step": 2230 }, { "epoch": 0.40629835259852554, "grad_norm": 10.625, "learning_rate": 4.832646382814972e-06, "loss": 1.492110252380371, "step": 2232 }, { "epoch": 0.40666241922271773, "grad_norm": 13.3125, "learning_rate": 4.8323394521472235e-06, "loss": 1.158155083656311, "step": 2234 }, { "epoch": 0.40702648584691, "grad_norm": 18.25, "learning_rate": 4.832032252591087e-06, "loss": 1.679713249206543, "step": 2236 }, { "epoch": 0.40739055247110223, "grad_norm": 7.625, "learning_rate": 4.831724784191641e-06, "loss": 1.5359445810317993, "step": 2238 }, { "epoch": 0.4077546190952944, "grad_norm": 7.65625, "learning_rate": 4.831417046994007e-06, "loss": 1.4562760591506958, "step": 2240 }, { "epoch": 0.4081186857194867, "grad_norm": 9.875, "learning_rate": 4.831109041043344e-06, "loss": 1.4791457653045654, "step": 2242 }, { "epoch": 0.40848275234367887, "grad_norm": 9.5, "learning_rate": 4.830800766384849e-06, "loss": 1.4202136993408203, "step": 2244 }, { "epoch": 0.4088468189678711, "grad_norm": 22.375, "learning_rate": 4.83049222306376e-06, "loss": 1.6754508018493652, "step": 2246 }, { "epoch": 0.40921088559206337, "grad_norm": 9.8125, "learning_rate": 4.830183411125358e-06, "loss": 1.6363577842712402, "step": 2248 }, { "epoch": 0.40957495221625556, "grad_norm": 3.265625, "learning_rate": 4.829874330614956e-06, "loss": 1.4338135719299316, "step": 2250 }, { "epoch": 0.4099390188404478, "grad_norm": 7.21875, "learning_rate": 4.829564981577913e-06, "loss": 1.124242901802063, "step": 2252 }, { "epoch": 0.41030308546464, "grad_norm": 15.75, "learning_rate": 4.829255364059623e-06, "loss": 1.6867132186889648, "step": 2254 }, { "epoch": 0.41066715208883225, "grad_norm": 18.125, "learning_rate": 4.828945478105521e-06, "loss": 1.5656864643096924, "step": 2256 }, { "epoch": 0.4110312187130245, "grad_norm": 11.8125, "learning_rate": 4.828635323761083e-06, "loss": 1.4033061265945435, "step": 2258 }, { "epoch": 0.4113952853372167, "grad_norm": 87.0, "learning_rate": 4.828324901071823e-06, "loss": 1.8032457828521729, "step": 2260 }, { "epoch": 0.41175935196140895, "grad_norm": 8.75, "learning_rate": 4.828014210083292e-06, "loss": 1.4799344539642334, "step": 2262 }, { "epoch": 0.41212341858560114, "grad_norm": 12.125, "learning_rate": 4.8277032508410835e-06, "loss": 1.2214703559875488, "step": 2264 }, { "epoch": 0.4124874852097934, "grad_norm": 19.0, "learning_rate": 4.8273920233908304e-06, "loss": 1.7928133010864258, "step": 2266 }, { "epoch": 0.41285155183398564, "grad_norm": 10.3125, "learning_rate": 4.827080527778204e-06, "loss": 1.4186478853225708, "step": 2268 }, { "epoch": 0.41321561845817784, "grad_norm": 13.875, "learning_rate": 4.8267687640489135e-06, "loss": 1.2705228328704834, "step": 2270 }, { "epoch": 0.4135796850823701, "grad_norm": 13.4375, "learning_rate": 4.826456732248711e-06, "loss": 1.5323959589004517, "step": 2272 }, { "epoch": 0.4139437517065623, "grad_norm": 16.25, "learning_rate": 4.8261444324233865e-06, "loss": 1.7160745859146118, "step": 2274 }, { "epoch": 0.41430781833075453, "grad_norm": 8.25, "learning_rate": 4.825831864618765e-06, "loss": 1.673983097076416, "step": 2276 }, { "epoch": 0.4146718849549468, "grad_norm": 9.6875, "learning_rate": 4.8255190288807175e-06, "loss": 1.2232874631881714, "step": 2278 }, { "epoch": 0.415035951579139, "grad_norm": 12.625, "learning_rate": 4.825205925255151e-06, "loss": 1.3800568580627441, "step": 2280 }, { "epoch": 0.4154000182033312, "grad_norm": 13.4375, "learning_rate": 4.824892553788012e-06, "loss": 1.5487409830093384, "step": 2282 }, { "epoch": 0.4157640848275234, "grad_norm": 46.25, "learning_rate": 4.8245789145252865e-06, "loss": 1.6988344192504883, "step": 2284 }, { "epoch": 0.41612815145171567, "grad_norm": 5.8125, "learning_rate": 4.824265007512999e-06, "loss": 1.2878892421722412, "step": 2286 }, { "epoch": 0.4164922180759079, "grad_norm": 13.4375, "learning_rate": 4.823950832797215e-06, "loss": 1.5087294578552246, "step": 2288 }, { "epoch": 0.4168562847001001, "grad_norm": 5.9375, "learning_rate": 4.823636390424038e-06, "loss": 1.465741515159607, "step": 2290 }, { "epoch": 0.41722035132429236, "grad_norm": 6.09375, "learning_rate": 4.823321680439611e-06, "loss": 1.1088500022888184, "step": 2292 }, { "epoch": 0.41758441794848455, "grad_norm": 8.375, "learning_rate": 4.8230067028901175e-06, "loss": 1.4532020092010498, "step": 2294 }, { "epoch": 0.4179484845726768, "grad_norm": 5.28125, "learning_rate": 4.822691457821777e-06, "loss": 1.3000138998031616, "step": 2296 }, { "epoch": 0.41831255119686905, "grad_norm": 5.78125, "learning_rate": 4.822375945280854e-06, "loss": 1.268698811531067, "step": 2298 }, { "epoch": 0.41867661782106125, "grad_norm": 7.0625, "learning_rate": 4.822060165313645e-06, "loss": 1.36043381690979, "step": 2300 }, { "epoch": 0.4190406844452535, "grad_norm": 13.5, "learning_rate": 4.821744117966491e-06, "loss": 1.572618842124939, "step": 2302 }, { "epoch": 0.4194047510694457, "grad_norm": 11.25, "learning_rate": 4.8214278032857706e-06, "loss": 1.7182421684265137, "step": 2304 }, { "epoch": 0.41976881769363794, "grad_norm": 9.8125, "learning_rate": 4.821111221317901e-06, "loss": 1.4769285917282104, "step": 2306 }, { "epoch": 0.42013288431783014, "grad_norm": 28.0, "learning_rate": 4.820794372109342e-06, "loss": 1.5118043422698975, "step": 2308 }, { "epoch": 0.4204969509420224, "grad_norm": 17.625, "learning_rate": 4.820477255706586e-06, "loss": 1.294557809829712, "step": 2310 }, { "epoch": 0.42086101756621463, "grad_norm": 5.09375, "learning_rate": 4.820159872156172e-06, "loss": 0.9314954280853271, "step": 2312 }, { "epoch": 0.42122508419040683, "grad_norm": 3.0625, "learning_rate": 4.819842221504671e-06, "loss": 1.0919597148895264, "step": 2314 }, { "epoch": 0.4215891508145991, "grad_norm": 26.25, "learning_rate": 4.8195243037987e-06, "loss": 1.3483877182006836, "step": 2316 }, { "epoch": 0.4219532174387913, "grad_norm": 16.875, "learning_rate": 4.819206119084913e-06, "loss": 0.7155640125274658, "step": 2318 }, { "epoch": 0.4223172840629835, "grad_norm": 63.25, "learning_rate": 4.8188876674099995e-06, "loss": 1.4566911458969116, "step": 2320 }, { "epoch": 0.42268135068717577, "grad_norm": 5.28125, "learning_rate": 4.818568948820692e-06, "loss": 1.2753733396530151, "step": 2322 }, { "epoch": 0.42304541731136797, "grad_norm": 12.9375, "learning_rate": 4.81824996336376e-06, "loss": 0.9926548004150391, "step": 2324 }, { "epoch": 0.4234094839355602, "grad_norm": 21.125, "learning_rate": 4.817930711086017e-06, "loss": 1.6045634746551514, "step": 2326 }, { "epoch": 0.4237735505597524, "grad_norm": 9.75, "learning_rate": 4.817611192034308e-06, "loss": 1.9188514947891235, "step": 2328 }, { "epoch": 0.42413761718394466, "grad_norm": 3.0625, "learning_rate": 4.817291406255524e-06, "loss": 1.0702840089797974, "step": 2330 }, { "epoch": 0.4245016838081369, "grad_norm": 11.0625, "learning_rate": 4.81697135379659e-06, "loss": 1.0890417098999023, "step": 2332 }, { "epoch": 0.4248657504323291, "grad_norm": 15.4375, "learning_rate": 4.816651034704474e-06, "loss": 1.7874891757965088, "step": 2334 }, { "epoch": 0.42522981705652135, "grad_norm": 11.25, "learning_rate": 4.81633044902618e-06, "loss": 1.5855598449707031, "step": 2336 }, { "epoch": 0.42559388368071355, "grad_norm": 11.0, "learning_rate": 4.816009596808754e-06, "loss": 1.596388578414917, "step": 2338 }, { "epoch": 0.4259579503049058, "grad_norm": 53.5, "learning_rate": 4.815688478099279e-06, "loss": 0.751289963722229, "step": 2340 }, { "epoch": 0.42632201692909805, "grad_norm": 147.0, "learning_rate": 4.815367092944878e-06, "loss": 0.5301092863082886, "step": 2342 }, { "epoch": 0.42668608355329024, "grad_norm": 25.375, "learning_rate": 4.8150454413927154e-06, "loss": 0.9436086416244507, "step": 2344 }, { "epoch": 0.4270501501774825, "grad_norm": 11.75, "learning_rate": 4.814723523489987e-06, "loss": 1.4477030038833618, "step": 2346 }, { "epoch": 0.4274142168016747, "grad_norm": 9.3125, "learning_rate": 4.814401339283937e-06, "loss": 1.7095439434051514, "step": 2348 }, { "epoch": 0.42777828342586693, "grad_norm": 9.0, "learning_rate": 4.814078888821844e-06, "loss": 1.930063247680664, "step": 2350 }, { "epoch": 0.4281423500500592, "grad_norm": 16.5, "learning_rate": 4.813756172151026e-06, "loss": 1.5254054069519043, "step": 2352 }, { "epoch": 0.4285064166742514, "grad_norm": 8.5625, "learning_rate": 4.8134331893188405e-06, "loss": 1.5094444751739502, "step": 2354 }, { "epoch": 0.4288704832984436, "grad_norm": 3.453125, "learning_rate": 4.813109940372682e-06, "loss": 1.1158978939056396, "step": 2356 }, { "epoch": 0.4292345499226358, "grad_norm": 5.125, "learning_rate": 4.812786425359989e-06, "loss": 0.9549667239189148, "step": 2358 }, { "epoch": 0.42959861654682807, "grad_norm": 6.65625, "learning_rate": 4.812462644328234e-06, "loss": 1.4353636503219604, "step": 2360 }, { "epoch": 0.4299626831710203, "grad_norm": 9.3125, "learning_rate": 4.812138597324932e-06, "loss": 1.4717034101486206, "step": 2362 }, { "epoch": 0.4303267497952125, "grad_norm": 5.8125, "learning_rate": 4.8118142843976345e-06, "loss": 1.329596996307373, "step": 2364 }, { "epoch": 0.43069081641940477, "grad_norm": 6.8125, "learning_rate": 4.811489705593933e-06, "loss": 1.4275367259979248, "step": 2366 }, { "epoch": 0.43105488304359696, "grad_norm": 7.40625, "learning_rate": 4.811164860961459e-06, "loss": 1.4369772672653198, "step": 2368 }, { "epoch": 0.4314189496677892, "grad_norm": 7.0, "learning_rate": 4.810839750547882e-06, "loss": 1.3342591524124146, "step": 2370 }, { "epoch": 0.43178301629198146, "grad_norm": 21.5, "learning_rate": 4.81051437440091e-06, "loss": 1.489044189453125, "step": 2372 }, { "epoch": 0.43214708291617365, "grad_norm": 7.78125, "learning_rate": 4.810188732568291e-06, "loss": 1.158496379852295, "step": 2374 }, { "epoch": 0.4325111495403659, "grad_norm": 10.375, "learning_rate": 4.809862825097811e-06, "loss": 0.47940826416015625, "step": 2376 }, { "epoch": 0.4328752161645581, "grad_norm": 8.125, "learning_rate": 4.8095366520372955e-06, "loss": 1.3868757486343384, "step": 2378 }, { "epoch": 0.43323928278875035, "grad_norm": 8.3125, "learning_rate": 4.809210213434611e-06, "loss": 1.3867754936218262, "step": 2380 }, { "epoch": 0.4336033494129426, "grad_norm": 21.5, "learning_rate": 4.8088835093376595e-06, "loss": 1.4974238872528076, "step": 2382 }, { "epoch": 0.4339674160371348, "grad_norm": 10.8125, "learning_rate": 4.808556539794383e-06, "loss": 1.639586091041565, "step": 2384 }, { "epoch": 0.43433148266132704, "grad_norm": 7.46875, "learning_rate": 4.808229304852765e-06, "loss": 1.469184398651123, "step": 2386 }, { "epoch": 0.43469554928551923, "grad_norm": 41.75, "learning_rate": 4.807901804560824e-06, "loss": 1.4129558801651, "step": 2388 }, { "epoch": 0.4350596159097115, "grad_norm": 16.125, "learning_rate": 4.80757403896662e-06, "loss": 1.7425603866577148, "step": 2390 }, { "epoch": 0.4354236825339037, "grad_norm": 12.125, "learning_rate": 4.807246008118251e-06, "loss": 2.0718836784362793, "step": 2392 }, { "epoch": 0.4357877491580959, "grad_norm": 9.6875, "learning_rate": 4.806917712063856e-06, "loss": 1.6627055406570435, "step": 2394 }, { "epoch": 0.4361518157822882, "grad_norm": 13.1875, "learning_rate": 4.8065891508516074e-06, "loss": 1.447837471961975, "step": 2396 }, { "epoch": 0.43651588240648037, "grad_norm": 8.6875, "learning_rate": 4.806260324529722e-06, "loss": 1.491906762123108, "step": 2398 }, { "epoch": 0.4368799490306726, "grad_norm": 14.75, "learning_rate": 4.805931233146457e-06, "loss": 1.6257590055465698, "step": 2400 }, { "epoch": 0.4372440156548648, "grad_norm": 11.8125, "learning_rate": 4.8056018767501e-06, "loss": 1.4428527355194092, "step": 2402 }, { "epoch": 0.43760808227905706, "grad_norm": 16.5, "learning_rate": 4.805272255388985e-06, "loss": 1.5536551475524902, "step": 2404 }, { "epoch": 0.4379721489032493, "grad_norm": 27.25, "learning_rate": 4.804942369111484e-06, "loss": 1.7104015350341797, "step": 2406 }, { "epoch": 0.4383362155274415, "grad_norm": 13.1875, "learning_rate": 4.804612217966005e-06, "loss": 1.110038161277771, "step": 2408 }, { "epoch": 0.43870028215163376, "grad_norm": 11.9375, "learning_rate": 4.804281802000995e-06, "loss": 1.2847239971160889, "step": 2410 }, { "epoch": 0.43906434877582595, "grad_norm": 21.5, "learning_rate": 4.8039511212649436e-06, "loss": 1.7150689363479614, "step": 2412 }, { "epoch": 0.4394284154000182, "grad_norm": 10.875, "learning_rate": 4.803620175806377e-06, "loss": 1.3355991840362549, "step": 2414 }, { "epoch": 0.43979248202421045, "grad_norm": 10.625, "learning_rate": 4.803288965673857e-06, "loss": 2.004887580871582, "step": 2416 }, { "epoch": 0.44015654864840265, "grad_norm": 9.3125, "learning_rate": 4.80295749091599e-06, "loss": 1.3342925310134888, "step": 2418 }, { "epoch": 0.4405206152725949, "grad_norm": 5.1875, "learning_rate": 4.802625751581419e-06, "loss": 1.015660285949707, "step": 2420 }, { "epoch": 0.4408846818967871, "grad_norm": 22.875, "learning_rate": 4.8022937477188235e-06, "loss": 2.085942268371582, "step": 2422 }, { "epoch": 0.44124874852097934, "grad_norm": 11.25, "learning_rate": 4.801961479376925e-06, "loss": 1.2945802211761475, "step": 2424 }, { "epoch": 0.4416128151451716, "grad_norm": 12.0, "learning_rate": 4.801628946604482e-06, "loss": 1.389954686164856, "step": 2426 }, { "epoch": 0.4419768817693638, "grad_norm": 6.0625, "learning_rate": 4.801296149450293e-06, "loss": 1.0629265308380127, "step": 2428 }, { "epoch": 0.44234094839355603, "grad_norm": 31.0, "learning_rate": 4.800963087963193e-06, "loss": 1.8067104816436768, "step": 2430 }, { "epoch": 0.4427050150177482, "grad_norm": 13.875, "learning_rate": 4.800629762192058e-06, "loss": 1.1785645484924316, "step": 2432 }, { "epoch": 0.4430690816419405, "grad_norm": 7.59375, "learning_rate": 4.800296172185804e-06, "loss": 1.1710501909255981, "step": 2434 }, { "epoch": 0.4434331482661327, "grad_norm": 11.875, "learning_rate": 4.799962317993382e-06, "loss": 1.5772453546524048, "step": 2436 }, { "epoch": 0.4437972148903249, "grad_norm": 75.5, "learning_rate": 4.799628199663785e-06, "loss": 1.7493717670440674, "step": 2438 }, { "epoch": 0.44416128151451717, "grad_norm": 25.375, "learning_rate": 4.7992938172460434e-06, "loss": 1.71174955368042, "step": 2440 }, { "epoch": 0.44452534813870936, "grad_norm": 6.59375, "learning_rate": 4.798959170789225e-06, "loss": 1.2972800731658936, "step": 2442 }, { "epoch": 0.4448894147629016, "grad_norm": 10.875, "learning_rate": 4.798624260342439e-06, "loss": 1.3126376867294312, "step": 2444 }, { "epoch": 0.44525348138709386, "grad_norm": 22.5, "learning_rate": 4.798289085954833e-06, "loss": 1.248924732208252, "step": 2446 }, { "epoch": 0.44561754801128606, "grad_norm": 11.0, "learning_rate": 4.79795364767559e-06, "loss": 1.35087251663208, "step": 2448 }, { "epoch": 0.4459816146354783, "grad_norm": 18.5, "learning_rate": 4.7976179455539365e-06, "loss": 1.3028209209442139, "step": 2450 }, { "epoch": 0.4463456812596705, "grad_norm": 14.9375, "learning_rate": 4.7972819796391335e-06, "loss": 2.157160758972168, "step": 2452 }, { "epoch": 0.44670974788386275, "grad_norm": 12.1875, "learning_rate": 4.796945749980485e-06, "loss": 1.3547319173812866, "step": 2454 }, { "epoch": 0.447073814508055, "grad_norm": 14.625, "learning_rate": 4.79660925662733e-06, "loss": 1.5041558742523193, "step": 2456 }, { "epoch": 0.4474378811322472, "grad_norm": 8.75, "learning_rate": 4.796272499629048e-06, "loss": 1.4152911901474, "step": 2458 }, { "epoch": 0.44780194775643944, "grad_norm": 6.78125, "learning_rate": 4.795935479035055e-06, "loss": 1.4617106914520264, "step": 2460 }, { "epoch": 0.44816601438063164, "grad_norm": 30.25, "learning_rate": 4.795598194894809e-06, "loss": 1.2192132472991943, "step": 2462 }, { "epoch": 0.4485300810048239, "grad_norm": 32.0, "learning_rate": 4.795260647257805e-06, "loss": 1.2421975135803223, "step": 2464 }, { "epoch": 0.4488941476290161, "grad_norm": 12.9375, "learning_rate": 4.794922836173576e-06, "loss": 1.8442548513412476, "step": 2466 }, { "epoch": 0.44925821425320833, "grad_norm": 5.75, "learning_rate": 4.794584761691696e-06, "loss": 1.2030386924743652, "step": 2468 }, { "epoch": 0.4496222808774006, "grad_norm": 5.78125, "learning_rate": 4.794246423861776e-06, "loss": 1.364159107208252, "step": 2470 }, { "epoch": 0.4499863475015928, "grad_norm": 5.90625, "learning_rate": 4.793907822733463e-06, "loss": 1.3991097211837769, "step": 2472 }, { "epoch": 0.450350414125785, "grad_norm": 11.5625, "learning_rate": 4.793568958356448e-06, "loss": 1.4109032154083252, "step": 2474 }, { "epoch": 0.4507144807499772, "grad_norm": 11.5, "learning_rate": 4.793229830780456e-06, "loss": 1.5175824165344238, "step": 2476 }, { "epoch": 0.45107854737416947, "grad_norm": 11.0625, "learning_rate": 4.792890440055255e-06, "loss": 1.4294145107269287, "step": 2478 }, { "epoch": 0.4514426139983617, "grad_norm": 9.625, "learning_rate": 4.792550786230647e-06, "loss": 1.3272117376327515, "step": 2480 }, { "epoch": 0.4518066806225539, "grad_norm": 22.875, "learning_rate": 4.792210869356476e-06, "loss": 1.4949346780776978, "step": 2482 }, { "epoch": 0.45217074724674616, "grad_norm": 8.0, "learning_rate": 4.791870689482623e-06, "loss": 1.4751938581466675, "step": 2484 }, { "epoch": 0.45253481387093836, "grad_norm": 6.65625, "learning_rate": 4.791530246659007e-06, "loss": 1.1491219997406006, "step": 2486 }, { "epoch": 0.4528988804951306, "grad_norm": 20.25, "learning_rate": 4.791189540935589e-06, "loss": 1.4928953647613525, "step": 2488 }, { "epoch": 0.45326294711932286, "grad_norm": 4.5, "learning_rate": 4.790848572362365e-06, "loss": 1.0747045278549194, "step": 2490 }, { "epoch": 0.45362701374351505, "grad_norm": 7.25, "learning_rate": 4.790507340989371e-06, "loss": 1.4724012613296509, "step": 2492 }, { "epoch": 0.4539910803677073, "grad_norm": 17.875, "learning_rate": 4.79016584686668e-06, "loss": 1.091583490371704, "step": 2494 }, { "epoch": 0.4543551469918995, "grad_norm": 57.0, "learning_rate": 4.789824090044407e-06, "loss": 0.5528209209442139, "step": 2496 }, { "epoch": 0.45471921361609174, "grad_norm": 12.375, "learning_rate": 4.789482070572702e-06, "loss": 1.4132084846496582, "step": 2498 }, { "epoch": 0.455083280240284, "grad_norm": 15.25, "learning_rate": 4.789139788501755e-06, "loss": 1.5109962224960327, "step": 2500 }, { "epoch": 0.4554473468644762, "grad_norm": 10.875, "learning_rate": 4.788797243881794e-06, "loss": 1.7004756927490234, "step": 2502 }, { "epoch": 0.45581141348866844, "grad_norm": 6.125, "learning_rate": 4.788454436763088e-06, "loss": 1.1646795272827148, "step": 2504 }, { "epoch": 0.45617548011286063, "grad_norm": 11.0625, "learning_rate": 4.788111367195941e-06, "loss": 1.6641459465026855, "step": 2506 }, { "epoch": 0.4565395467370529, "grad_norm": 11.1875, "learning_rate": 4.7877680352306965e-06, "loss": 2.0821268558502197, "step": 2508 }, { "epoch": 0.45690361336124513, "grad_norm": 22.25, "learning_rate": 4.78742444091774e-06, "loss": 1.363267421722412, "step": 2510 }, { "epoch": 0.4572676799854373, "grad_norm": 18.375, "learning_rate": 4.78708058430749e-06, "loss": 1.6625890731811523, "step": 2512 }, { "epoch": 0.4576317466096296, "grad_norm": 15.6875, "learning_rate": 4.786736465450405e-06, "loss": 1.4096591472625732, "step": 2514 }, { "epoch": 0.45799581323382177, "grad_norm": 6.90625, "learning_rate": 4.786392084396986e-06, "loss": 1.2796412706375122, "step": 2516 }, { "epoch": 0.458359879858014, "grad_norm": 8.125, "learning_rate": 4.786047441197769e-06, "loss": 1.179935336112976, "step": 2518 }, { "epoch": 0.45872394648220627, "grad_norm": 5.96875, "learning_rate": 4.785702535903326e-06, "loss": 1.589585542678833, "step": 2520 }, { "epoch": 0.45908801310639846, "grad_norm": 16.0, "learning_rate": 4.785357368564275e-06, "loss": 1.413062572479248, "step": 2522 }, { "epoch": 0.4594520797305907, "grad_norm": 5.84375, "learning_rate": 4.785011939231265e-06, "loss": 1.2646909952163696, "step": 2524 }, { "epoch": 0.4598161463547829, "grad_norm": 14.3125, "learning_rate": 4.784666247954986e-06, "loss": 1.3635499477386475, "step": 2526 }, { "epoch": 0.46018021297897516, "grad_norm": 34.5, "learning_rate": 4.784320294786168e-06, "loss": 1.175112009048462, "step": 2528 }, { "epoch": 0.4605442796031674, "grad_norm": 10.875, "learning_rate": 4.783974079775579e-06, "loss": 1.0801090002059937, "step": 2530 }, { "epoch": 0.4609083462273596, "grad_norm": 18.0, "learning_rate": 4.783627602974023e-06, "loss": 1.6281940937042236, "step": 2532 }, { "epoch": 0.46127241285155185, "grad_norm": 31.0, "learning_rate": 4.783280864432344e-06, "loss": 1.5610016584396362, "step": 2534 }, { "epoch": 0.46163647947574404, "grad_norm": 14.1875, "learning_rate": 4.782933864201426e-06, "loss": 1.4834723472595215, "step": 2536 }, { "epoch": 0.4620005460999363, "grad_norm": 5.75, "learning_rate": 4.782586602332188e-06, "loss": 1.5038542747497559, "step": 2538 }, { "epoch": 0.46236461272412854, "grad_norm": 7.21875, "learning_rate": 4.782239078875591e-06, "loss": 1.2938894033432007, "step": 2540 }, { "epoch": 0.46272867934832074, "grad_norm": 36.25, "learning_rate": 4.781891293882632e-06, "loss": 1.5071697235107422, "step": 2542 }, { "epoch": 0.463092745972513, "grad_norm": 12.8125, "learning_rate": 4.781543247404347e-06, "loss": 1.4677107334136963, "step": 2544 }, { "epoch": 0.4634568125967052, "grad_norm": 13.5625, "learning_rate": 4.78119493949181e-06, "loss": 1.4703729152679443, "step": 2546 }, { "epoch": 0.46382087922089743, "grad_norm": 11.0, "learning_rate": 4.780846370196134e-06, "loss": 1.458452820777893, "step": 2548 }, { "epoch": 0.4641849458450896, "grad_norm": 10.0625, "learning_rate": 4.780497539568471e-06, "loss": 1.4700268507003784, "step": 2550 }, { "epoch": 0.4645490124692819, "grad_norm": 7.8125, "learning_rate": 4.78014844766001e-06, "loss": 1.3840463161468506, "step": 2552 }, { "epoch": 0.4649130790934741, "grad_norm": 6.03125, "learning_rate": 4.779799094521979e-06, "loss": 1.3164231777191162, "step": 2554 }, { "epoch": 0.4652771457176663, "grad_norm": 13.8125, "learning_rate": 4.779449480205642e-06, "loss": 1.2951946258544922, "step": 2556 }, { "epoch": 0.46564121234185857, "grad_norm": 34.0, "learning_rate": 4.779099604762306e-06, "loss": 2.0955660343170166, "step": 2558 }, { "epoch": 0.46600527896605076, "grad_norm": 14.6875, "learning_rate": 4.7787494682433136e-06, "loss": 1.104082703590393, "step": 2560 }, { "epoch": 0.466369345590243, "grad_norm": 15.375, "learning_rate": 4.778399070700045e-06, "loss": 1.5023795366287231, "step": 2562 }, { "epoch": 0.46673341221443526, "grad_norm": 6.9375, "learning_rate": 4.77804841218392e-06, "loss": 1.3493629693984985, "step": 2564 }, { "epoch": 0.46709747883862746, "grad_norm": 13.1875, "learning_rate": 4.777697492746397e-06, "loss": 1.1247923374176025, "step": 2566 }, { "epoch": 0.4674615454628197, "grad_norm": 15.0, "learning_rate": 4.77734631243897e-06, "loss": 1.4516193866729736, "step": 2568 }, { "epoch": 0.4678256120870119, "grad_norm": 7.5625, "learning_rate": 4.776994871313175e-06, "loss": 1.3607850074768066, "step": 2570 }, { "epoch": 0.46818967871120415, "grad_norm": 6.46875, "learning_rate": 4.776643169420585e-06, "loss": 1.03755521774292, "step": 2572 }, { "epoch": 0.4685537453353964, "grad_norm": 10.25, "learning_rate": 4.776291206812809e-06, "loss": 1.906078815460205, "step": 2574 }, { "epoch": 0.4689178119595886, "grad_norm": 22.5, "learning_rate": 4.775938983541498e-06, "loss": 1.6876178979873657, "step": 2576 }, { "epoch": 0.46928187858378084, "grad_norm": 6.53125, "learning_rate": 4.775586499658338e-06, "loss": 1.1927977800369263, "step": 2578 }, { "epoch": 0.46964594520797304, "grad_norm": 7.625, "learning_rate": 4.775233755215055e-06, "loss": 1.206666350364685, "step": 2580 }, { "epoch": 0.4700100118321653, "grad_norm": 18.25, "learning_rate": 4.774880750263413e-06, "loss": 1.288048267364502, "step": 2582 }, { "epoch": 0.47037407845635754, "grad_norm": 19.875, "learning_rate": 4.7745274848552135e-06, "loss": 1.356485366821289, "step": 2584 }, { "epoch": 0.47073814508054973, "grad_norm": 51.0, "learning_rate": 4.7741739590422975e-06, "loss": 1.0911235809326172, "step": 2586 }, { "epoch": 0.471102211704742, "grad_norm": 10.0, "learning_rate": 4.773820172876543e-06, "loss": 1.6049859523773193, "step": 2588 }, { "epoch": 0.4714662783289342, "grad_norm": 17.375, "learning_rate": 4.773466126409866e-06, "loss": 1.4031660556793213, "step": 2590 }, { "epoch": 0.4718303449531264, "grad_norm": 3.8125, "learning_rate": 4.773111819694224e-06, "loss": 1.2222685813903809, "step": 2592 }, { "epoch": 0.4721944115773187, "grad_norm": 3.125, "learning_rate": 4.772757252781607e-06, "loss": 1.1153395175933838, "step": 2594 }, { "epoch": 0.47255847820151087, "grad_norm": 7.625, "learning_rate": 4.772402425724047e-06, "loss": 1.441320538520813, "step": 2596 }, { "epoch": 0.4729225448257031, "grad_norm": 17.75, "learning_rate": 4.772047338573615e-06, "loss": 1.4108866453170776, "step": 2598 }, { "epoch": 0.4732866114498953, "grad_norm": 13.5, "learning_rate": 4.771691991382417e-06, "loss": 1.227704405784607, "step": 2600 }, { "epoch": 0.47365067807408756, "grad_norm": 3.28125, "learning_rate": 4.7713363842025995e-06, "loss": 1.2100099325180054, "step": 2602 }, { "epoch": 0.4740147446982798, "grad_norm": 3.734375, "learning_rate": 4.770980517086346e-06, "loss": 0.8995495438575745, "step": 2604 }, { "epoch": 0.474378811322472, "grad_norm": 11.1875, "learning_rate": 4.770624390085878e-06, "loss": 0.4387377202510834, "step": 2606 }, { "epoch": 0.47474287794666425, "grad_norm": 101.5, "learning_rate": 4.7702680032534585e-06, "loss": 0.9370388984680176, "step": 2608 }, { "epoch": 0.47510694457085645, "grad_norm": 8.6875, "learning_rate": 4.769911356641383e-06, "loss": 1.2931002378463745, "step": 2610 }, { "epoch": 0.4754710111950487, "grad_norm": 9.6875, "learning_rate": 4.769554450301987e-06, "loss": 1.5461426973342896, "step": 2612 }, { "epoch": 0.47583507781924095, "grad_norm": 48.25, "learning_rate": 4.769197284287649e-06, "loss": 1.647145390510559, "step": 2614 }, { "epoch": 0.47619914444343314, "grad_norm": 4.75, "learning_rate": 4.768839858650779e-06, "loss": 1.3851596117019653, "step": 2616 }, { "epoch": 0.4765632110676254, "grad_norm": 2.671875, "learning_rate": 4.768482173443828e-06, "loss": 0.8840892314910889, "step": 2618 }, { "epoch": 0.4769272776918176, "grad_norm": 10.625, "learning_rate": 4.768124228719284e-06, "loss": 1.418744683265686, "step": 2620 }, { "epoch": 0.47729134431600984, "grad_norm": 10.9375, "learning_rate": 4.767766024529677e-06, "loss": 1.4615778923034668, "step": 2622 }, { "epoch": 0.47765541094020203, "grad_norm": 12.9375, "learning_rate": 4.767407560927569e-06, "loss": 1.4521212577819824, "step": 2624 }, { "epoch": 0.4780194775643943, "grad_norm": 18.0, "learning_rate": 4.767048837965564e-06, "loss": 1.855870246887207, "step": 2626 }, { "epoch": 0.47838354418858653, "grad_norm": 16.75, "learning_rate": 4.766689855696302e-06, "loss": 1.3619643449783325, "step": 2628 }, { "epoch": 0.4787476108127787, "grad_norm": 8.4375, "learning_rate": 4.766330614172465e-06, "loss": 1.144242525100708, "step": 2630 }, { "epoch": 0.479111677436971, "grad_norm": 8.0, "learning_rate": 4.765971113446769e-06, "loss": 1.4348689317703247, "step": 2632 }, { "epoch": 0.47947574406116317, "grad_norm": 29.375, "learning_rate": 4.765611353571968e-06, "loss": 1.5049371719360352, "step": 2634 }, { "epoch": 0.4798398106853554, "grad_norm": 4.0625, "learning_rate": 4.765251334600858e-06, "loss": 1.0106384754180908, "step": 2636 }, { "epoch": 0.48020387730954767, "grad_norm": 5.8125, "learning_rate": 4.764891056586266e-06, "loss": 0.9415692090988159, "step": 2638 }, { "epoch": 0.48056794393373986, "grad_norm": 18.0, "learning_rate": 4.764530519581066e-06, "loss": 1.248544454574585, "step": 2640 }, { "epoch": 0.4809320105579321, "grad_norm": 10.0, "learning_rate": 4.764169723638163e-06, "loss": 1.505332589149475, "step": 2642 }, { "epoch": 0.4812960771821243, "grad_norm": 4.25, "learning_rate": 4.763808668810501e-06, "loss": 1.4893392324447632, "step": 2644 }, { "epoch": 0.48166014380631655, "grad_norm": 12.1875, "learning_rate": 4.7634473551510666e-06, "loss": 1.3953019380569458, "step": 2646 }, { "epoch": 0.4820242104305088, "grad_norm": 13.25, "learning_rate": 4.763085782712879e-06, "loss": 1.5962615013122559, "step": 2648 }, { "epoch": 0.482388277054701, "grad_norm": 9.9375, "learning_rate": 4.762723951548998e-06, "loss": 1.1721880435943604, "step": 2650 }, { "epoch": 0.48275234367889325, "grad_norm": 7.4375, "learning_rate": 4.76236186171252e-06, "loss": 1.3240480422973633, "step": 2652 }, { "epoch": 0.48311641030308544, "grad_norm": 12.9375, "learning_rate": 4.761999513256582e-06, "loss": 1.4799084663391113, "step": 2654 }, { "epoch": 0.4834804769272777, "grad_norm": 16.375, "learning_rate": 4.761636906234356e-06, "loss": 1.4244959354400635, "step": 2656 }, { "epoch": 0.48384454355146994, "grad_norm": 20.0, "learning_rate": 4.761274040699053e-06, "loss": 1.3574916124343872, "step": 2658 }, { "epoch": 0.48420861017566214, "grad_norm": 9.0625, "learning_rate": 4.760910916703922e-06, "loss": 1.52568781375885, "step": 2660 }, { "epoch": 0.4845726767998544, "grad_norm": 7.59375, "learning_rate": 4.760547534302252e-06, "loss": 1.3339539766311646, "step": 2662 }, { "epoch": 0.4849367434240466, "grad_norm": 8.625, "learning_rate": 4.760183893547364e-06, "loss": 1.5742881298065186, "step": 2664 }, { "epoch": 0.48530081004823883, "grad_norm": 9.375, "learning_rate": 4.759819994492625e-06, "loss": 1.5729178190231323, "step": 2666 }, { "epoch": 0.4856648766724311, "grad_norm": 17.125, "learning_rate": 4.7594558371914325e-06, "loss": 1.6616425514221191, "step": 2668 }, { "epoch": 0.4860289432966233, "grad_norm": 25.375, "learning_rate": 4.7590914216972275e-06, "loss": 1.5070042610168457, "step": 2670 }, { "epoch": 0.4863930099208155, "grad_norm": 17.375, "learning_rate": 4.758726748063483e-06, "loss": 1.5015840530395508, "step": 2672 }, { "epoch": 0.4867570765450077, "grad_norm": 15.5, "learning_rate": 4.758361816343717e-06, "loss": 1.4561421871185303, "step": 2674 }, { "epoch": 0.48712114316919997, "grad_norm": 7.9375, "learning_rate": 4.757996626591481e-06, "loss": 1.4493619203567505, "step": 2676 }, { "epoch": 0.4874852097933922, "grad_norm": 9.0, "learning_rate": 4.757631178860365e-06, "loss": 1.2875139713287354, "step": 2678 }, { "epoch": 0.4878492764175844, "grad_norm": 43.5, "learning_rate": 4.757265473203996e-06, "loss": 1.3142790794372559, "step": 2680 }, { "epoch": 0.48821334304177666, "grad_norm": 16.625, "learning_rate": 4.75689950967604e-06, "loss": 0.9488089680671692, "step": 2682 }, { "epoch": 0.48857740966596885, "grad_norm": 46.75, "learning_rate": 4.756533288330202e-06, "loss": 1.394675374031067, "step": 2684 }, { "epoch": 0.4889414762901611, "grad_norm": 18.375, "learning_rate": 4.756166809220221e-06, "loss": 1.4688191413879395, "step": 2686 }, { "epoch": 0.48930554291435335, "grad_norm": 14.0, "learning_rate": 4.755800072399879e-06, "loss": 1.770665168762207, "step": 2688 }, { "epoch": 0.48966960953854555, "grad_norm": 11.5, "learning_rate": 4.755433077922992e-06, "loss": 1.5805026292800903, "step": 2690 }, { "epoch": 0.4900336761627378, "grad_norm": 17.25, "learning_rate": 4.755065825843413e-06, "loss": 1.336399793624878, "step": 2692 }, { "epoch": 0.49039774278693, "grad_norm": 20.0, "learning_rate": 4.7546983162150394e-06, "loss": 0.9021884202957153, "step": 2694 }, { "epoch": 0.49076180941112224, "grad_norm": 5.75, "learning_rate": 4.754330549091798e-06, "loss": 1.328724980354309, "step": 2696 }, { "epoch": 0.49112587603531443, "grad_norm": 7.03125, "learning_rate": 4.753962524527658e-06, "loss": 1.1296789646148682, "step": 2698 }, { "epoch": 0.4914899426595067, "grad_norm": 9.3125, "learning_rate": 4.753594242576626e-06, "loss": 1.4913991689682007, "step": 2700 }, { "epoch": 0.49185400928369893, "grad_norm": 41.75, "learning_rate": 4.753225703292745e-06, "loss": 1.3883957862854004, "step": 2702 }, { "epoch": 0.49221807590789113, "grad_norm": 7.3125, "learning_rate": 4.7528569067300975e-06, "loss": 1.2255128622055054, "step": 2704 }, { "epoch": 0.4925821425320834, "grad_norm": 13.5, "learning_rate": 4.752487852942803e-06, "loss": 1.5752066373825073, "step": 2706 }, { "epoch": 0.49294620915627557, "grad_norm": 11.625, "learning_rate": 4.752118541985019e-06, "loss": 1.4964804649353027, "step": 2708 }, { "epoch": 0.4933102757804678, "grad_norm": 6.03125, "learning_rate": 4.751748973910939e-06, "loss": 1.1876988410949707, "step": 2710 }, { "epoch": 0.49367434240466007, "grad_norm": 14.0, "learning_rate": 4.751379148774796e-06, "loss": 1.5623489618301392, "step": 2712 }, { "epoch": 0.49403840902885227, "grad_norm": 21.625, "learning_rate": 4.75100906663086e-06, "loss": 1.268662452697754, "step": 2714 }, { "epoch": 0.4944024756530445, "grad_norm": 8.0625, "learning_rate": 4.750638727533442e-06, "loss": 0.7708040475845337, "step": 2716 }, { "epoch": 0.4947665422772367, "grad_norm": 19.25, "learning_rate": 4.750268131536884e-06, "loss": 1.5136632919311523, "step": 2718 }, { "epoch": 0.49513060890142896, "grad_norm": 11.25, "learning_rate": 4.74989727869557e-06, "loss": 1.5816248655319214, "step": 2720 }, { "epoch": 0.4954946755256212, "grad_norm": 7.25, "learning_rate": 4.749526169063923e-06, "loss": 1.1813924312591553, "step": 2722 }, { "epoch": 0.4958587421498134, "grad_norm": 18.0, "learning_rate": 4.7491548026964e-06, "loss": 1.3222821950912476, "step": 2724 }, { "epoch": 0.49622280877400565, "grad_norm": 14.125, "learning_rate": 4.7487831796475e-06, "loss": 1.2531263828277588, "step": 2726 }, { "epoch": 0.49658687539819785, "grad_norm": 13.1875, "learning_rate": 4.748411299971755e-06, "loss": 1.7887446880340576, "step": 2728 }, { "epoch": 0.4969509420223901, "grad_norm": 24.875, "learning_rate": 4.748039163723738e-06, "loss": 1.43979012966156, "step": 2730 }, { "epoch": 0.49731500864658235, "grad_norm": 8.9375, "learning_rate": 4.747666770958057e-06, "loss": 1.304109811782837, "step": 2732 }, { "epoch": 0.49767907527077454, "grad_norm": 5.0625, "learning_rate": 4.74729412172936e-06, "loss": 1.0966064929962158, "step": 2734 }, { "epoch": 0.4980431418949668, "grad_norm": 8.4375, "learning_rate": 4.746921216092332e-06, "loss": 1.529699683189392, "step": 2736 }, { "epoch": 0.498407208519159, "grad_norm": 9.5625, "learning_rate": 4.746548054101695e-06, "loss": 1.3052148818969727, "step": 2738 }, { "epoch": 0.49877127514335123, "grad_norm": 11.3125, "learning_rate": 4.74617463581221e-06, "loss": 1.584709882736206, "step": 2740 }, { "epoch": 0.4991353417675435, "grad_norm": 8.0625, "learning_rate": 4.745800961278673e-06, "loss": 1.7206652164459229, "step": 2742 }, { "epoch": 0.4994994083917357, "grad_norm": 13.4375, "learning_rate": 4.745427030555919e-06, "loss": 1.2229584455490112, "step": 2744 }, { "epoch": 0.4998634750159279, "grad_norm": 10.875, "learning_rate": 4.745052843698824e-06, "loss": 1.817686915397644, "step": 2746 }, { "epoch": 0.5002275416401202, "grad_norm": 15.0625, "learning_rate": 4.744678400762296e-06, "loss": 1.8809374570846558, "step": 2748 }, { "epoch": 0.5005916082643124, "grad_norm": 9.25, "learning_rate": 4.7443037018012815e-06, "loss": 1.4616502523422241, "step": 2750 }, { "epoch": 0.5009556748885046, "grad_norm": 11.625, "learning_rate": 4.74392874687077e-06, "loss": 1.1442909240722656, "step": 2752 }, { "epoch": 0.5013197415126969, "grad_norm": 25.125, "learning_rate": 4.743553536025781e-06, "loss": 1.5182840824127197, "step": 2754 }, { "epoch": 0.5016838081368891, "grad_norm": 19.5, "learning_rate": 4.743178069321377e-06, "loss": 1.602879524230957, "step": 2756 }, { "epoch": 0.5020478747610813, "grad_norm": 14.375, "learning_rate": 4.742802346812656e-06, "loss": 1.6968035697937012, "step": 2758 }, { "epoch": 0.5024119413852735, "grad_norm": 7.375, "learning_rate": 4.742426368554752e-06, "loss": 1.487510085105896, "step": 2760 }, { "epoch": 0.5027760080094658, "grad_norm": 7.96875, "learning_rate": 4.742050134602842e-06, "loss": 1.0827512741088867, "step": 2762 }, { "epoch": 0.503140074633658, "grad_norm": 8.4375, "learning_rate": 4.741673645012134e-06, "loss": 1.2456951141357422, "step": 2764 }, { "epoch": 0.5035041412578501, "grad_norm": 7.375, "learning_rate": 4.741296899837877e-06, "loss": 1.3233991861343384, "step": 2766 }, { "epoch": 0.5038682078820425, "grad_norm": 19.625, "learning_rate": 4.740919899135357e-06, "loss": 1.3172898292541504, "step": 2768 }, { "epoch": 0.5042322745062346, "grad_norm": 4.375, "learning_rate": 4.740542642959897e-06, "loss": 1.117545485496521, "step": 2770 }, { "epoch": 0.5045963411304268, "grad_norm": 12.1875, "learning_rate": 4.740165131366857e-06, "loss": 1.4898500442504883, "step": 2772 }, { "epoch": 0.5049604077546191, "grad_norm": 11.125, "learning_rate": 4.739787364411638e-06, "loss": 1.793648362159729, "step": 2774 }, { "epoch": 0.5053244743788113, "grad_norm": 8.4375, "learning_rate": 4.7394093421496725e-06, "loss": 1.372380256652832, "step": 2776 }, { "epoch": 0.5056885410030035, "grad_norm": 6.9375, "learning_rate": 4.739031064636436e-06, "loss": 1.0014395713806152, "step": 2778 }, { "epoch": 0.5060526076271957, "grad_norm": 12.875, "learning_rate": 4.738652531927438e-06, "loss": 1.356541395187378, "step": 2780 }, { "epoch": 0.506416674251388, "grad_norm": 20.25, "learning_rate": 4.7382737440782265e-06, "loss": 1.4575536251068115, "step": 2782 }, { "epoch": 0.5067807408755802, "grad_norm": 10.9375, "learning_rate": 4.737894701144389e-06, "loss": 1.4169225692749023, "step": 2784 }, { "epoch": 0.5071448074997724, "grad_norm": 10.5625, "learning_rate": 4.737515403181546e-06, "loss": 1.1146185398101807, "step": 2786 }, { "epoch": 0.5075088741239647, "grad_norm": 26.375, "learning_rate": 4.73713585024536e-06, "loss": 1.268887996673584, "step": 2788 }, { "epoch": 0.5078729407481569, "grad_norm": 9.4375, "learning_rate": 4.736756042391527e-06, "loss": 1.5824708938598633, "step": 2790 }, { "epoch": 0.5082370073723491, "grad_norm": 11.0625, "learning_rate": 4.736375979675784e-06, "loss": 1.5918515920639038, "step": 2792 }, { "epoch": 0.5086010739965414, "grad_norm": 14.1875, "learning_rate": 4.735995662153904e-06, "loss": 1.3004698753356934, "step": 2794 }, { "epoch": 0.5089651406207336, "grad_norm": 13.125, "learning_rate": 4.735615089881694e-06, "loss": 1.5352630615234375, "step": 2796 }, { "epoch": 0.5093292072449258, "grad_norm": 9.375, "learning_rate": 4.735234262915004e-06, "loss": 1.4200998544692993, "step": 2798 }, { "epoch": 0.509693273869118, "grad_norm": 7.0, "learning_rate": 4.734853181309719e-06, "loss": 1.2577595710754395, "step": 2800 }, { "epoch": 0.5100573404933103, "grad_norm": 18.0, "learning_rate": 4.734471845121759e-06, "loss": 0.9026685953140259, "step": 2802 }, { "epoch": 0.5104214071175025, "grad_norm": 13.5625, "learning_rate": 4.734090254407086e-06, "loss": 1.338547706604004, "step": 2804 }, { "epoch": 0.5107854737416947, "grad_norm": 17.0, "learning_rate": 4.733708409221695e-06, "loss": 1.5599079132080078, "step": 2806 }, { "epoch": 0.511149540365887, "grad_norm": 23.5, "learning_rate": 4.733326309621622e-06, "loss": 1.8223967552185059, "step": 2808 }, { "epoch": 0.5115136069900792, "grad_norm": 11.5, "learning_rate": 4.7329439556629375e-06, "loss": 1.0494744777679443, "step": 2810 }, { "epoch": 0.5118776736142714, "grad_norm": 5.53125, "learning_rate": 4.732561347401751e-06, "loss": 1.398725152015686, "step": 2812 }, { "epoch": 0.5122417402384637, "grad_norm": 16.375, "learning_rate": 4.732178484894206e-06, "loss": 0.9273154139518738, "step": 2814 }, { "epoch": 0.5126058068626559, "grad_norm": 14.1875, "learning_rate": 4.73179536819649e-06, "loss": 1.0062042474746704, "step": 2816 }, { "epoch": 0.5129698734868481, "grad_norm": 9.125, "learning_rate": 4.731411997364822e-06, "loss": 1.8211826086044312, "step": 2818 }, { "epoch": 0.5133339401110403, "grad_norm": 24.375, "learning_rate": 4.73102837245546e-06, "loss": 1.7762036323547363, "step": 2820 }, { "epoch": 0.5136980067352326, "grad_norm": 21.0, "learning_rate": 4.7306444935247e-06, "loss": 2.1570186614990234, "step": 2822 }, { "epoch": 0.5140620733594248, "grad_norm": 14.875, "learning_rate": 4.730260360628873e-06, "loss": 2.1902740001678467, "step": 2824 }, { "epoch": 0.514426139983617, "grad_norm": 19.375, "learning_rate": 4.7298759738243505e-06, "loss": 1.8290989398956299, "step": 2826 }, { "epoch": 0.5147902066078093, "grad_norm": 22.0, "learning_rate": 4.7294913331675406e-06, "loss": 1.6025868654251099, "step": 2828 }, { "epoch": 0.5151542732320015, "grad_norm": 19.375, "learning_rate": 4.729106438714886e-06, "loss": 0.5601062178611755, "step": 2830 }, { "epoch": 0.5155183398561937, "grad_norm": 13.0625, "learning_rate": 4.7287212905228684e-06, "loss": 1.4542829990386963, "step": 2832 }, { "epoch": 0.5158824064803859, "grad_norm": 8.875, "learning_rate": 4.728335888648008e-06, "loss": 1.4797817468643188, "step": 2834 }, { "epoch": 0.5162464731045782, "grad_norm": 27.875, "learning_rate": 4.72795023314686e-06, "loss": 1.549726963043213, "step": 2836 }, { "epoch": 0.5166105397287704, "grad_norm": 21.0, "learning_rate": 4.727564324076019e-06, "loss": 1.5557734966278076, "step": 2838 }, { "epoch": 0.5169746063529626, "grad_norm": 18.125, "learning_rate": 4.727178161492113e-06, "loss": 0.9821083545684814, "step": 2840 }, { "epoch": 0.5173386729771549, "grad_norm": 10.9375, "learning_rate": 4.726791745451812e-06, "loss": 1.6308296918869019, "step": 2842 }, { "epoch": 0.517702739601347, "grad_norm": 5.90625, "learning_rate": 4.726405076011821e-06, "loss": 1.3690816164016724, "step": 2844 }, { "epoch": 0.5180668062255392, "grad_norm": 6.40625, "learning_rate": 4.72601815322888e-06, "loss": 1.477687120437622, "step": 2846 }, { "epoch": 0.5184308728497315, "grad_norm": 11.0625, "learning_rate": 4.725630977159772e-06, "loss": 1.0053203105926514, "step": 2848 }, { "epoch": 0.5187949394739237, "grad_norm": 12.75, "learning_rate": 4.72524354786131e-06, "loss": 1.4586474895477295, "step": 2850 }, { "epoch": 0.5191590060981159, "grad_norm": 9.75, "learning_rate": 4.72485586539035e-06, "loss": 1.4755802154541016, "step": 2852 }, { "epoch": 0.5195230727223081, "grad_norm": 20.875, "learning_rate": 4.724467929803782e-06, "loss": 1.0338621139526367, "step": 2854 }, { "epoch": 0.5198871393465004, "grad_norm": 9.5, "learning_rate": 4.7240797411585335e-06, "loss": 1.3863259553909302, "step": 2856 }, { "epoch": 0.5202512059706926, "grad_norm": 18.5, "learning_rate": 4.72369129951157e-06, "loss": 1.1654415130615234, "step": 2858 }, { "epoch": 0.5206152725948848, "grad_norm": 14.6875, "learning_rate": 4.723302604919895e-06, "loss": 1.6258399486541748, "step": 2860 }, { "epoch": 0.5209793392190771, "grad_norm": 11.125, "learning_rate": 4.722913657440545e-06, "loss": 1.5496981143951416, "step": 2862 }, { "epoch": 0.5213434058432693, "grad_norm": 20.625, "learning_rate": 4.722524457130599e-06, "loss": 1.591113567352295, "step": 2864 }, { "epoch": 0.5217074724674615, "grad_norm": 10.25, "learning_rate": 4.72213500404717e-06, "loss": 1.7281275987625122, "step": 2866 }, { "epoch": 0.5220715390916538, "grad_norm": 12.5, "learning_rate": 4.721745298247408e-06, "loss": 1.8635038137435913, "step": 2868 }, { "epoch": 0.522435605715846, "grad_norm": 16.625, "learning_rate": 4.721355339788501e-06, "loss": 1.5160801410675049, "step": 2870 }, { "epoch": 0.5227996723400382, "grad_norm": 16.25, "learning_rate": 4.720965128727674e-06, "loss": 1.5367975234985352, "step": 2872 }, { "epoch": 0.5231637389642304, "grad_norm": 10.75, "learning_rate": 4.72057466512219e-06, "loss": 1.5876338481903076, "step": 2874 }, { "epoch": 0.5235278055884227, "grad_norm": 13.375, "learning_rate": 4.720183949029344e-06, "loss": 1.4861215353012085, "step": 2876 }, { "epoch": 0.5238918722126149, "grad_norm": 12.6875, "learning_rate": 4.719792980506477e-06, "loss": 1.0783206224441528, "step": 2878 }, { "epoch": 0.5242559388368071, "grad_norm": 23.125, "learning_rate": 4.719401759610958e-06, "loss": 1.9256478548049927, "step": 2880 }, { "epoch": 0.5246200054609994, "grad_norm": 18.75, "learning_rate": 4.7190102864002005e-06, "loss": 1.8310813903808594, "step": 2882 }, { "epoch": 0.5249840720851916, "grad_norm": 16.25, "learning_rate": 4.71861856093165e-06, "loss": 1.1610815525054932, "step": 2884 }, { "epoch": 0.5253481387093838, "grad_norm": 15.1875, "learning_rate": 4.718226583262791e-06, "loss": 1.5273479223251343, "step": 2886 }, { "epoch": 0.5257122053335761, "grad_norm": 40.75, "learning_rate": 4.717834353451143e-06, "loss": 1.5807608366012573, "step": 2888 }, { "epoch": 0.5260762719577683, "grad_norm": 6.46875, "learning_rate": 4.717441871554266e-06, "loss": 1.322156548500061, "step": 2890 }, { "epoch": 0.5264403385819605, "grad_norm": 40.0, "learning_rate": 4.7170491376297535e-06, "loss": 1.035938024520874, "step": 2892 }, { "epoch": 0.5268044052061527, "grad_norm": 201.0, "learning_rate": 4.716656151735241e-06, "loss": 1.1095421314239502, "step": 2894 }, { "epoch": 0.527168471830345, "grad_norm": 7.09375, "learning_rate": 4.7162629139283935e-06, "loss": 1.397359848022461, "step": 2896 }, { "epoch": 0.5275325384545372, "grad_norm": 6.25, "learning_rate": 4.715869424266919e-06, "loss": 1.2935075759887695, "step": 2898 }, { "epoch": 0.5278966050787294, "grad_norm": 14.375, "learning_rate": 4.71547568280856e-06, "loss": 1.5288337469100952, "step": 2900 }, { "epoch": 0.5282606717029217, "grad_norm": 8.5625, "learning_rate": 4.715081689611097e-06, "loss": 1.5273665189743042, "step": 2902 }, { "epoch": 0.5286247383271139, "grad_norm": 18.0, "learning_rate": 4.714687444732348e-06, "loss": 1.7785556316375732, "step": 2904 }, { "epoch": 0.5289888049513061, "grad_norm": 44.0, "learning_rate": 4.714292948230164e-06, "loss": 1.650891661643982, "step": 2906 }, { "epoch": 0.5293528715754983, "grad_norm": 13.25, "learning_rate": 4.7138982001624365e-06, "loss": 1.902068018913269, "step": 2908 }, { "epoch": 0.5297169381996906, "grad_norm": 19.125, "learning_rate": 4.713503200587095e-06, "loss": 1.5662474632263184, "step": 2910 }, { "epoch": 0.5300810048238828, "grad_norm": 9.8125, "learning_rate": 4.7131079495621035e-06, "loss": 1.4625476598739624, "step": 2912 }, { "epoch": 0.530445071448075, "grad_norm": 11.3125, "learning_rate": 4.712712447145462e-06, "loss": 1.4308027029037476, "step": 2914 }, { "epoch": 0.5308091380722673, "grad_norm": 9.875, "learning_rate": 4.712316693395211e-06, "loss": 1.4272058010101318, "step": 2916 }, { "epoch": 0.5311732046964595, "grad_norm": 12.375, "learning_rate": 4.711920688369424e-06, "loss": 1.459861397743225, "step": 2918 }, { "epoch": 0.5315372713206516, "grad_norm": 8.0, "learning_rate": 4.711524432126214e-06, "loss": 1.523653268814087, "step": 2920 }, { "epoch": 0.531901337944844, "grad_norm": 17.625, "learning_rate": 4.711127924723729e-06, "loss": 1.5347280502319336, "step": 2922 }, { "epoch": 0.5322654045690361, "grad_norm": 17.125, "learning_rate": 4.710731166220157e-06, "loss": 1.5497467517852783, "step": 2924 }, { "epoch": 0.5326294711932283, "grad_norm": 6.21875, "learning_rate": 4.710334156673719e-06, "loss": 0.8407115340232849, "step": 2926 }, { "epoch": 0.5329935378174205, "grad_norm": 23.625, "learning_rate": 4.709936896142675e-06, "loss": 0.366423636674881, "step": 2928 }, { "epoch": 0.5333576044416128, "grad_norm": 9.8125, "learning_rate": 4.709539384685321e-06, "loss": 0.6109656691551208, "step": 2930 }, { "epoch": 0.533721671065805, "grad_norm": 18.625, "learning_rate": 4.70914162235999e-06, "loss": 0.7239855527877808, "step": 2932 }, { "epoch": 0.5340857376899972, "grad_norm": 10.6875, "learning_rate": 4.708743609225053e-06, "loss": 1.124316930770874, "step": 2934 }, { "epoch": 0.5344498043141895, "grad_norm": 14.625, "learning_rate": 4.7083453453389165e-06, "loss": 1.4050620794296265, "step": 2936 }, { "epoch": 0.5348138709383817, "grad_norm": 31.0, "learning_rate": 4.707946830760023e-06, "loss": 1.4761255979537964, "step": 2938 }, { "epoch": 0.5351779375625739, "grad_norm": 9.75, "learning_rate": 4.707548065546854e-06, "loss": 1.5079489946365356, "step": 2940 }, { "epoch": 0.5355420041867662, "grad_norm": 15.625, "learning_rate": 4.7071490497579265e-06, "loss": 1.6067997217178345, "step": 2942 }, { "epoch": 0.5359060708109584, "grad_norm": 7.78125, "learning_rate": 4.706749783451795e-06, "loss": 1.4546140432357788, "step": 2944 }, { "epoch": 0.5362701374351506, "grad_norm": 9.625, "learning_rate": 4.706350266687049e-06, "loss": 1.6595127582550049, "step": 2946 }, { "epoch": 0.5366342040593428, "grad_norm": 8.625, "learning_rate": 4.705950499522317e-06, "loss": 1.277746319770813, "step": 2948 }, { "epoch": 0.5369982706835351, "grad_norm": 12.9375, "learning_rate": 4.705550482016262e-06, "loss": 1.6249651908874512, "step": 2950 }, { "epoch": 0.5373623373077273, "grad_norm": 9.375, "learning_rate": 4.705150214227587e-06, "loss": 1.439868688583374, "step": 2952 }, { "epoch": 0.5377264039319195, "grad_norm": 23.75, "learning_rate": 4.7047496962150285e-06, "loss": 1.6325018405914307, "step": 2954 }, { "epoch": 0.5380904705561118, "grad_norm": 135.0, "learning_rate": 4.70434892803736e-06, "loss": 1.6851948499679565, "step": 2956 }, { "epoch": 0.538454537180304, "grad_norm": 19.125, "learning_rate": 4.7039479097533935e-06, "loss": 1.5004594326019287, "step": 2958 }, { "epoch": 0.5388186038044962, "grad_norm": 6.84375, "learning_rate": 4.703546641421977e-06, "loss": 1.3415923118591309, "step": 2960 }, { "epoch": 0.5391826704286885, "grad_norm": 14.125, "learning_rate": 4.703145123101994e-06, "loss": 1.4607899188995361, "step": 2962 }, { "epoch": 0.5395467370528807, "grad_norm": 14.3125, "learning_rate": 4.702743354852367e-06, "loss": 1.4355204105377197, "step": 2964 }, { "epoch": 0.5399108036770729, "grad_norm": 5.90625, "learning_rate": 4.702341336732054e-06, "loss": 1.134709119796753, "step": 2966 }, { "epoch": 0.5402748703012651, "grad_norm": 10.875, "learning_rate": 4.701939068800048e-06, "loss": 1.3385690450668335, "step": 2968 }, { "epoch": 0.5406389369254574, "grad_norm": 20.625, "learning_rate": 4.701536551115381e-06, "loss": 1.7676000595092773, "step": 2970 }, { "epoch": 0.5410030035496496, "grad_norm": 34.5, "learning_rate": 4.701133783737121e-06, "loss": 1.6422622203826904, "step": 2972 }, { "epoch": 0.5413670701738418, "grad_norm": 59.5, "learning_rate": 4.700730766724372e-06, "loss": 1.8241934776306152, "step": 2974 }, { "epoch": 0.5417311367980341, "grad_norm": 8.6875, "learning_rate": 4.700327500136276e-06, "loss": 1.5284518003463745, "step": 2976 }, { "epoch": 0.5420952034222263, "grad_norm": 12.125, "learning_rate": 4.69992398403201e-06, "loss": 1.5116437673568726, "step": 2978 }, { "epoch": 0.5424592700464185, "grad_norm": 12.4375, "learning_rate": 4.6995202184707885e-06, "loss": 1.5724998712539673, "step": 2980 }, { "epoch": 0.5428233366706107, "grad_norm": 26.25, "learning_rate": 4.699116203511862e-06, "loss": 1.6610677242279053, "step": 2982 }, { "epoch": 0.543187403294803, "grad_norm": 25.5, "learning_rate": 4.6987119392145185e-06, "loss": 2.1597137451171875, "step": 2984 }, { "epoch": 0.5435514699189952, "grad_norm": 23.25, "learning_rate": 4.698307425638083e-06, "loss": 1.7757792472839355, "step": 2986 }, { "epoch": 0.5439155365431874, "grad_norm": 7.15625, "learning_rate": 4.697902662841915e-06, "loss": 1.1460011005401611, "step": 2988 }, { "epoch": 0.5442796031673797, "grad_norm": 9.125, "learning_rate": 4.697497650885413e-06, "loss": 1.556647539138794, "step": 2990 }, { "epoch": 0.5446436697915719, "grad_norm": 16.25, "learning_rate": 4.69709238982801e-06, "loss": 1.4367669820785522, "step": 2992 }, { "epoch": 0.545007736415764, "grad_norm": 27.5, "learning_rate": 4.696686879729176e-06, "loss": 1.2195615768432617, "step": 2994 }, { "epoch": 0.5453718030399564, "grad_norm": 8.125, "learning_rate": 4.6962811206484185e-06, "loss": 0.39954930543899536, "step": 2996 }, { "epoch": 0.5457358696641486, "grad_norm": 8.875, "learning_rate": 4.6958751126452804e-06, "loss": 1.2129322290420532, "step": 2998 }, { "epoch": 0.5460999362883407, "grad_norm": 15.0, "learning_rate": 4.695468855779343e-06, "loss": 1.3926241397857666, "step": 3000 }, { "epoch": 0.5464640029125329, "grad_norm": 10.0, "learning_rate": 4.695062350110221e-06, "loss": 1.4251070022583008, "step": 3002 }, { "epoch": 0.5468280695367252, "grad_norm": 9.0, "learning_rate": 4.694655595697571e-06, "loss": 1.6396093368530273, "step": 3004 }, { "epoch": 0.5471921361609174, "grad_norm": 12.3125, "learning_rate": 4.694248592601077e-06, "loss": 1.3367325067520142, "step": 3006 }, { "epoch": 0.5475562027851096, "grad_norm": 8.5, "learning_rate": 4.693841340880471e-06, "loss": 0.914564847946167, "step": 3008 }, { "epoch": 0.5479202694093019, "grad_norm": 6.53125, "learning_rate": 4.693433840595511e-06, "loss": 1.34604811668396, "step": 3010 }, { "epoch": 0.5482843360334941, "grad_norm": 8.9375, "learning_rate": 4.693026091805999e-06, "loss": 1.4301847219467163, "step": 3012 }, { "epoch": 0.5486484026576863, "grad_norm": 7.6875, "learning_rate": 4.692618094571769e-06, "loss": 1.2869274616241455, "step": 3014 }, { "epoch": 0.5490124692818786, "grad_norm": 11.875, "learning_rate": 4.6922098489526926e-06, "loss": 1.4045214653015137, "step": 3016 }, { "epoch": 0.5493765359060708, "grad_norm": 14.9375, "learning_rate": 4.691801355008679e-06, "loss": 1.9691683053970337, "step": 3018 }, { "epoch": 0.549740602530263, "grad_norm": 8.4375, "learning_rate": 4.691392612799673e-06, "loss": 1.390626072883606, "step": 3020 }, { "epoch": 0.5501046691544552, "grad_norm": 13.1875, "learning_rate": 4.6909836223856555e-06, "loss": 1.4834929704666138, "step": 3022 }, { "epoch": 0.5504687357786475, "grad_norm": 8.375, "learning_rate": 4.690574383826645e-06, "loss": 1.3541334867477417, "step": 3024 }, { "epoch": 0.5508328024028397, "grad_norm": 10.9375, "learning_rate": 4.6901648971826945e-06, "loss": 1.6976135969161987, "step": 3026 }, { "epoch": 0.5511968690270319, "grad_norm": 5.34375, "learning_rate": 4.689755162513895e-06, "loss": 1.4011552333831787, "step": 3028 }, { "epoch": 0.5515609356512242, "grad_norm": 4.15625, "learning_rate": 4.689345179880374e-06, "loss": 1.071542739868164, "step": 3030 }, { "epoch": 0.5519250022754164, "grad_norm": 8.9375, "learning_rate": 4.688934949342293e-06, "loss": 1.0931841135025024, "step": 3032 }, { "epoch": 0.5522890688996086, "grad_norm": 16.375, "learning_rate": 4.688524470959853e-06, "loss": 1.6992199420928955, "step": 3034 }, { "epoch": 0.5526531355238009, "grad_norm": 16.875, "learning_rate": 4.68811374479329e-06, "loss": 1.6201632022857666, "step": 3036 }, { "epoch": 0.5530172021479931, "grad_norm": 46.25, "learning_rate": 4.687702770902877e-06, "loss": 1.4474477767944336, "step": 3038 }, { "epoch": 0.5533812687721853, "grad_norm": 10.5, "learning_rate": 4.687291549348921e-06, "loss": 1.4587504863739014, "step": 3040 }, { "epoch": 0.5537453353963775, "grad_norm": 13.3125, "learning_rate": 4.686880080191769e-06, "loss": 1.2866533994674683, "step": 3042 }, { "epoch": 0.5541094020205698, "grad_norm": 10.375, "learning_rate": 4.686468363491802e-06, "loss": 0.9405273199081421, "step": 3044 }, { "epoch": 0.554473468644762, "grad_norm": 8.25, "learning_rate": 4.686056399309436e-06, "loss": 1.0470237731933594, "step": 3046 }, { "epoch": 0.5548375352689542, "grad_norm": 16.0, "learning_rate": 4.685644187705127e-06, "loss": 1.54508376121521, "step": 3048 }, { "epoch": 0.5552016018931465, "grad_norm": 12.9375, "learning_rate": 4.685231728739364e-06, "loss": 1.7101861238479614, "step": 3050 }, { "epoch": 0.5555656685173387, "grad_norm": 10.5, "learning_rate": 4.684819022472677e-06, "loss": 1.4591268301010132, "step": 3052 }, { "epoch": 0.5559297351415309, "grad_norm": 12.625, "learning_rate": 4.6844060689656235e-06, "loss": 1.0944174528121948, "step": 3054 }, { "epoch": 0.5562938017657231, "grad_norm": 12.1875, "learning_rate": 4.683992868278807e-06, "loss": 0.9923224449157715, "step": 3056 }, { "epoch": 0.5566578683899154, "grad_norm": 124.5, "learning_rate": 4.683579420472861e-06, "loss": 0.8241497278213501, "step": 3058 }, { "epoch": 0.5570219350141076, "grad_norm": 8.0625, "learning_rate": 4.68316572560846e-06, "loss": 1.002344012260437, "step": 3060 }, { "epoch": 0.5573860016382998, "grad_norm": 29.375, "learning_rate": 4.682751783746308e-06, "loss": 1.116881251335144, "step": 3062 }, { "epoch": 0.5577500682624921, "grad_norm": 16.25, "learning_rate": 4.682337594947152e-06, "loss": 1.583069086074829, "step": 3064 }, { "epoch": 0.5581141348866843, "grad_norm": 59.0, "learning_rate": 4.68192315927177e-06, "loss": 1.565364122390747, "step": 3066 }, { "epoch": 0.5584782015108765, "grad_norm": 10.0625, "learning_rate": 4.681508476780984e-06, "loss": 1.595078706741333, "step": 3068 }, { "epoch": 0.5588422681350688, "grad_norm": 6.125, "learning_rate": 4.681093547535641e-06, "loss": 1.391558289527893, "step": 3070 }, { "epoch": 0.559206334759261, "grad_norm": 15.375, "learning_rate": 4.680678371596634e-06, "loss": 1.5937374830245972, "step": 3072 }, { "epoch": 0.5595704013834532, "grad_norm": 10.8125, "learning_rate": 4.680262949024886e-06, "loss": 1.448059320449829, "step": 3074 }, { "epoch": 0.5599344680076453, "grad_norm": 16.75, "learning_rate": 4.679847279881361e-06, "loss": 2.119917869567871, "step": 3076 }, { "epoch": 0.5602985346318377, "grad_norm": 12.6875, "learning_rate": 4.679431364227055e-06, "loss": 1.5274730920791626, "step": 3078 }, { "epoch": 0.5606626012560298, "grad_norm": 11.5, "learning_rate": 4.679015202123003e-06, "loss": 1.4733775854110718, "step": 3080 }, { "epoch": 0.561026667880222, "grad_norm": 10.0, "learning_rate": 4.678598793630274e-06, "loss": 1.4676270484924316, "step": 3082 }, { "epoch": 0.5613907345044143, "grad_norm": 8.0625, "learning_rate": 4.678182138809976e-06, "loss": 1.4255867004394531, "step": 3084 }, { "epoch": 0.5617548011286065, "grad_norm": 16.5, "learning_rate": 4.677765237723247e-06, "loss": 0.8566718697547913, "step": 3086 }, { "epoch": 0.5621188677527987, "grad_norm": 23.875, "learning_rate": 4.677348090431272e-06, "loss": 0.4970046579837799, "step": 3088 }, { "epoch": 0.562482934376991, "grad_norm": 15.1875, "learning_rate": 4.676930696995261e-06, "loss": 1.617612361907959, "step": 3090 }, { "epoch": 0.5628470010011832, "grad_norm": 55.5, "learning_rate": 4.676513057476467e-06, "loss": 1.3165394067764282, "step": 3092 }, { "epoch": 0.5632110676253754, "grad_norm": 5.21875, "learning_rate": 4.676095171936176e-06, "loss": 1.4364819526672363, "step": 3094 }, { "epoch": 0.5635751342495676, "grad_norm": 9.9375, "learning_rate": 4.67567704043571e-06, "loss": 1.0662716627120972, "step": 3096 }, { "epoch": 0.5639392008737599, "grad_norm": 7.15625, "learning_rate": 4.6752586630364305e-06, "loss": 1.6108722686767578, "step": 3098 }, { "epoch": 0.5643032674979521, "grad_norm": 19.5, "learning_rate": 4.674840039799731e-06, "loss": 1.4411096572875977, "step": 3100 }, { "epoch": 0.5646673341221443, "grad_norm": 5.9375, "learning_rate": 4.674421170787043e-06, "loss": 1.2056114673614502, "step": 3102 }, { "epoch": 0.5650314007463366, "grad_norm": 23.625, "learning_rate": 4.674002056059833e-06, "loss": 1.3782093524932861, "step": 3104 }, { "epoch": 0.5653954673705288, "grad_norm": 16.25, "learning_rate": 4.673582695679607e-06, "loss": 1.5344831943511963, "step": 3106 }, { "epoch": 0.565759533994721, "grad_norm": 9.875, "learning_rate": 4.6731630897079025e-06, "loss": 1.3298767805099487, "step": 3108 }, { "epoch": 0.5661236006189133, "grad_norm": 22.125, "learning_rate": 4.672743238206295e-06, "loss": 1.644599437713623, "step": 3110 }, { "epoch": 0.5664876672431055, "grad_norm": 29.0, "learning_rate": 4.672323141236397e-06, "loss": 1.771336555480957, "step": 3112 }, { "epoch": 0.5668517338672977, "grad_norm": 67.5, "learning_rate": 4.671902798859856e-06, "loss": 1.1102098226547241, "step": 3114 }, { "epoch": 0.5672158004914899, "grad_norm": 6.6875, "learning_rate": 4.671482211138353e-06, "loss": 1.4423727989196777, "step": 3116 }, { "epoch": 0.5675798671156822, "grad_norm": 7.84375, "learning_rate": 4.67106137813361e-06, "loss": 1.4640207290649414, "step": 3118 }, { "epoch": 0.5679439337398744, "grad_norm": 6.3125, "learning_rate": 4.670640299907382e-06, "loss": 1.0204105377197266, "step": 3120 }, { "epoch": 0.5683080003640666, "grad_norm": 5.3125, "learning_rate": 4.6702189765214614e-06, "loss": 1.2244899272918701, "step": 3122 }, { "epoch": 0.5686720669882589, "grad_norm": 11.9375, "learning_rate": 4.669797408037674e-06, "loss": 1.266489028930664, "step": 3124 }, { "epoch": 0.5690361336124511, "grad_norm": 7.5, "learning_rate": 4.669375594517884e-06, "loss": 1.2684581279754639, "step": 3126 }, { "epoch": 0.5694002002366433, "grad_norm": 15.5, "learning_rate": 4.668953536023991e-06, "loss": 1.0628567934036255, "step": 3128 }, { "epoch": 0.5697642668608355, "grad_norm": 11.5625, "learning_rate": 4.668531232617931e-06, "loss": 1.6200110912322998, "step": 3130 }, { "epoch": 0.5701283334850278, "grad_norm": 11.5, "learning_rate": 4.668108684361673e-06, "loss": 0.9993836283683777, "step": 3132 }, { "epoch": 0.57049240010922, "grad_norm": 14.4375, "learning_rate": 4.667685891317227e-06, "loss": 1.2345932722091675, "step": 3134 }, { "epoch": 0.5708564667334122, "grad_norm": 9.0625, "learning_rate": 4.667262853546634e-06, "loss": 1.195366382598877, "step": 3136 }, { "epoch": 0.5712205333576045, "grad_norm": 18.75, "learning_rate": 4.666839571111975e-06, "loss": 2.00323224067688, "step": 3138 }, { "epoch": 0.5715845999817967, "grad_norm": 13.75, "learning_rate": 4.666416044075365e-06, "loss": 1.9695429801940918, "step": 3140 }, { "epoch": 0.5719486666059889, "grad_norm": 3.515625, "learning_rate": 4.665992272498952e-06, "loss": 1.1043243408203125, "step": 3142 }, { "epoch": 0.5723127332301812, "grad_norm": 20.625, "learning_rate": 4.665568256444926e-06, "loss": 1.1558351516723633, "step": 3144 }, { "epoch": 0.5726767998543734, "grad_norm": 15.5, "learning_rate": 4.6651439959755076e-06, "loss": 1.2644670009613037, "step": 3146 }, { "epoch": 0.5730408664785656, "grad_norm": 16.75, "learning_rate": 4.664719491152956e-06, "loss": 1.6701910495758057, "step": 3148 }, { "epoch": 0.5734049331027578, "grad_norm": 10.5, "learning_rate": 4.664294742039567e-06, "loss": 1.4925076961517334, "step": 3150 }, { "epoch": 0.5737689997269501, "grad_norm": 3.546875, "learning_rate": 4.663869748697667e-06, "loss": 1.1980005502700806, "step": 3152 }, { "epoch": 0.5741330663511423, "grad_norm": 14.1875, "learning_rate": 4.6634445111896264e-06, "loss": 0.8367608785629272, "step": 3154 }, { "epoch": 0.5744971329753344, "grad_norm": 72.5, "learning_rate": 4.6630190295778446e-06, "loss": 1.0910260677337646, "step": 3156 }, { "epoch": 0.5748611995995268, "grad_norm": 20.125, "learning_rate": 4.662593303924759e-06, "loss": 1.3143784999847412, "step": 3158 }, { "epoch": 0.575225266223719, "grad_norm": 11.5625, "learning_rate": 4.662167334292845e-06, "loss": 1.5937294960021973, "step": 3160 }, { "epoch": 0.5755893328479111, "grad_norm": 24.125, "learning_rate": 4.6617411207446104e-06, "loss": 1.5265504121780396, "step": 3162 }, { "epoch": 0.5759533994721034, "grad_norm": 16.375, "learning_rate": 4.661314663342601e-06, "loss": 0.6745487451553345, "step": 3164 }, { "epoch": 0.5763174660962956, "grad_norm": 14.25, "learning_rate": 4.660887962149397e-06, "loss": 1.5956515073776245, "step": 3166 }, { "epoch": 0.5766815327204878, "grad_norm": 7.03125, "learning_rate": 4.6604610172276164e-06, "loss": 1.4386987686157227, "step": 3168 }, { "epoch": 0.57704559934468, "grad_norm": 4.125, "learning_rate": 4.660033828639909e-06, "loss": 1.2788320779800415, "step": 3170 }, { "epoch": 0.5774096659688723, "grad_norm": 3.71875, "learning_rate": 4.659606396448967e-06, "loss": 1.398795485496521, "step": 3172 }, { "epoch": 0.5777737325930645, "grad_norm": 28.125, "learning_rate": 4.659178720717511e-06, "loss": 1.3583898544311523, "step": 3174 }, { "epoch": 0.5781377992172567, "grad_norm": 15.25, "learning_rate": 4.658750801508302e-06, "loss": 1.8299055099487305, "step": 3176 }, { "epoch": 0.578501865841449, "grad_norm": 14.3125, "learning_rate": 4.658322638884135e-06, "loss": 1.1780740022659302, "step": 3178 }, { "epoch": 0.5788659324656412, "grad_norm": 10.75, "learning_rate": 4.657894232907842e-06, "loss": 1.575628638267517, "step": 3180 }, { "epoch": 0.5792299990898334, "grad_norm": 10.8125, "learning_rate": 4.657465583642287e-06, "loss": 1.254805326461792, "step": 3182 }, { "epoch": 0.5795940657140257, "grad_norm": 5.3125, "learning_rate": 4.657036691150377e-06, "loss": 1.2919539213180542, "step": 3184 }, { "epoch": 0.5799581323382179, "grad_norm": 9.0, "learning_rate": 4.656607555495048e-06, "loss": 1.1025445461273193, "step": 3186 }, { "epoch": 0.5803221989624101, "grad_norm": 18.75, "learning_rate": 4.6561781767392734e-06, "loss": 1.4582033157348633, "step": 3188 }, { "epoch": 0.5806862655866023, "grad_norm": 15.5, "learning_rate": 4.655748554946064e-06, "loss": 1.068711519241333, "step": 3190 }, { "epoch": 0.5810503322107946, "grad_norm": 27.125, "learning_rate": 4.655318690178462e-06, "loss": 1.0355747938156128, "step": 3192 }, { "epoch": 0.5814143988349868, "grad_norm": 11.6875, "learning_rate": 4.654888582499552e-06, "loss": 0.6396878957748413, "step": 3194 }, { "epoch": 0.581778465459179, "grad_norm": 10.875, "learning_rate": 4.65445823197245e-06, "loss": 1.6420068740844727, "step": 3196 }, { "epoch": 0.5821425320833713, "grad_norm": 7.6875, "learning_rate": 4.654027638660306e-06, "loss": 1.3766051530838013, "step": 3198 }, { "epoch": 0.5825065987075635, "grad_norm": 8.3125, "learning_rate": 4.65359680262631e-06, "loss": 1.1669166088104248, "step": 3200 }, { "epoch": 0.5828706653317557, "grad_norm": 9.8125, "learning_rate": 4.6531657239336845e-06, "loss": 1.6351568698883057, "step": 3202 }, { "epoch": 0.583234731955948, "grad_norm": 9.8125, "learning_rate": 4.6527344026456886e-06, "loss": 1.8216816186904907, "step": 3204 }, { "epoch": 0.5835987985801402, "grad_norm": 3.84375, "learning_rate": 4.652302838825617e-06, "loss": 1.052591323852539, "step": 3206 }, { "epoch": 0.5839628652043324, "grad_norm": 19.875, "learning_rate": 4.6518710325368e-06, "loss": 1.7370530366897583, "step": 3208 }, { "epoch": 0.5843269318285246, "grad_norm": 25.75, "learning_rate": 4.651438983842604e-06, "loss": 1.4809801578521729, "step": 3210 }, { "epoch": 0.5846909984527169, "grad_norm": 13.8125, "learning_rate": 4.651006692806429e-06, "loss": 1.9143999814987183, "step": 3212 }, { "epoch": 0.5850550650769091, "grad_norm": 7.53125, "learning_rate": 4.650574159491713e-06, "loss": 1.4653382301330566, "step": 3214 }, { "epoch": 0.5854191317011013, "grad_norm": 5.4375, "learning_rate": 4.650141383961929e-06, "loss": 1.3473073244094849, "step": 3216 }, { "epoch": 0.5857831983252936, "grad_norm": 21.25, "learning_rate": 4.6497083662805845e-06, "loss": 1.5710654258728027, "step": 3218 }, { "epoch": 0.5861472649494858, "grad_norm": 19.375, "learning_rate": 4.6492751065112215e-06, "loss": 1.3432490825653076, "step": 3220 }, { "epoch": 0.586511331573678, "grad_norm": 11.375, "learning_rate": 4.648841604717421e-06, "loss": 1.418708324432373, "step": 3222 }, { "epoch": 0.5868753981978702, "grad_norm": 10.5, "learning_rate": 4.648407860962798e-06, "loss": 1.5353648662567139, "step": 3224 }, { "epoch": 0.5872394648220625, "grad_norm": 7.96875, "learning_rate": 4.647973875311001e-06, "loss": 1.4630835056304932, "step": 3226 }, { "epoch": 0.5876035314462547, "grad_norm": 24.25, "learning_rate": 4.647539647825716e-06, "loss": 1.6073410511016846, "step": 3228 }, { "epoch": 0.5879675980704469, "grad_norm": 29.0, "learning_rate": 4.647105178570666e-06, "loss": 1.8549131155014038, "step": 3230 }, { "epoch": 0.5883316646946392, "grad_norm": 11.0625, "learning_rate": 4.646670467609605e-06, "loss": 1.6139217615127563, "step": 3232 }, { "epoch": 0.5886957313188314, "grad_norm": 7.90625, "learning_rate": 4.646235515006328e-06, "loss": 1.3840563297271729, "step": 3234 }, { "epoch": 0.5890597979430235, "grad_norm": 16.375, "learning_rate": 4.645800320824659e-06, "loss": 1.1495158672332764, "step": 3236 }, { "epoch": 0.5894238645672158, "grad_norm": 11.0, "learning_rate": 4.645364885128463e-06, "loss": 1.3970487117767334, "step": 3238 }, { "epoch": 0.589787931191408, "grad_norm": 7.5, "learning_rate": 4.644929207981639e-06, "loss": 1.2395824193954468, "step": 3240 }, { "epoch": 0.5901519978156002, "grad_norm": 13.5, "learning_rate": 4.644493289448118e-06, "loss": 1.2570314407348633, "step": 3242 }, { "epoch": 0.5905160644397924, "grad_norm": 17.0, "learning_rate": 4.6440571295918735e-06, "loss": 0.7949339747428894, "step": 3244 }, { "epoch": 0.5908801310639847, "grad_norm": 5.78125, "learning_rate": 4.643620728476907e-06, "loss": 1.052629828453064, "step": 3246 }, { "epoch": 0.5912441976881769, "grad_norm": 11.0625, "learning_rate": 4.64318408616726e-06, "loss": 1.5894979238510132, "step": 3248 }, { "epoch": 0.5916082643123691, "grad_norm": 10.6875, "learning_rate": 4.642747202727008e-06, "loss": 1.552390217781067, "step": 3250 }, { "epoch": 0.5919723309365614, "grad_norm": 18.375, "learning_rate": 4.6423100782202615e-06, "loss": 1.6982547044754028, "step": 3252 }, { "epoch": 0.5923363975607536, "grad_norm": 15.375, "learning_rate": 4.641872712711166e-06, "loss": 1.6790783405303955, "step": 3254 }, { "epoch": 0.5927004641849458, "grad_norm": 13.6875, "learning_rate": 4.641435106263904e-06, "loss": 1.7826764583587646, "step": 3256 }, { "epoch": 0.5930645308091381, "grad_norm": 8.9375, "learning_rate": 4.640997258942692e-06, "loss": 1.7601191997528076, "step": 3258 }, { "epoch": 0.5934285974333303, "grad_norm": 10.4375, "learning_rate": 4.640559170811784e-06, "loss": 1.4909051656723022, "step": 3260 }, { "epoch": 0.5937926640575225, "grad_norm": 6.125, "learning_rate": 4.640120841935465e-06, "loss": 1.4465163946151733, "step": 3262 }, { "epoch": 0.5941567306817147, "grad_norm": 16.125, "learning_rate": 4.63968227237806e-06, "loss": 1.105288028717041, "step": 3264 }, { "epoch": 0.594520797305907, "grad_norm": 20.875, "learning_rate": 4.6392434622039265e-06, "loss": 0.7226976752281189, "step": 3266 }, { "epoch": 0.5948848639300992, "grad_norm": 8.375, "learning_rate": 4.638804411477457e-06, "loss": 1.6137361526489258, "step": 3268 }, { "epoch": 0.5952489305542914, "grad_norm": 8.625, "learning_rate": 4.638365120263083e-06, "loss": 1.1538947820663452, "step": 3270 }, { "epoch": 0.5956129971784837, "grad_norm": 80.5, "learning_rate": 4.637925588625266e-06, "loss": 1.5226430892944336, "step": 3272 }, { "epoch": 0.5959770638026759, "grad_norm": 11.0, "learning_rate": 4.637485816628507e-06, "loss": 1.8728686571121216, "step": 3274 }, { "epoch": 0.5963411304268681, "grad_norm": 4.125, "learning_rate": 4.637045804337342e-06, "loss": 0.9120247960090637, "step": 3276 }, { "epoch": 0.5967051970510604, "grad_norm": 21.25, "learning_rate": 4.636605551816339e-06, "loss": 1.2076787948608398, "step": 3278 }, { "epoch": 0.5970692636752526, "grad_norm": 13.6875, "learning_rate": 4.636165059130104e-06, "loss": 1.621565818786621, "step": 3280 }, { "epoch": 0.5974333302994448, "grad_norm": 14.75, "learning_rate": 4.635724326343276e-06, "loss": 1.559403896331787, "step": 3282 }, { "epoch": 0.597797396923637, "grad_norm": 20.75, "learning_rate": 4.6352833535205335e-06, "loss": 0.9151042699813843, "step": 3284 }, { "epoch": 0.5981614635478293, "grad_norm": 14.5, "learning_rate": 4.634842140726586e-06, "loss": 1.8382973670959473, "step": 3286 }, { "epoch": 0.5985255301720215, "grad_norm": 33.25, "learning_rate": 4.634400688026179e-06, "loss": 1.4484329223632812, "step": 3288 }, { "epoch": 0.5988895967962137, "grad_norm": 23.375, "learning_rate": 4.633958995484095e-06, "loss": 1.3155410289764404, "step": 3290 }, { "epoch": 0.599253663420406, "grad_norm": 49.5, "learning_rate": 4.633517063165151e-06, "loss": 1.3144867420196533, "step": 3292 }, { "epoch": 0.5996177300445982, "grad_norm": 8.5, "learning_rate": 4.633074891134197e-06, "loss": 1.6168324947357178, "step": 3294 }, { "epoch": 0.5999817966687904, "grad_norm": 11.5, "learning_rate": 4.632632479456121e-06, "loss": 1.3474645614624023, "step": 3296 }, { "epoch": 0.6003458632929826, "grad_norm": 13.5, "learning_rate": 4.632189828195847e-06, "loss": 1.751720666885376, "step": 3298 }, { "epoch": 0.6007099299171749, "grad_norm": 8.6875, "learning_rate": 4.631746937418328e-06, "loss": 1.4904208183288574, "step": 3300 }, { "epoch": 0.6010739965413671, "grad_norm": 4.65625, "learning_rate": 4.63130380718856e-06, "loss": 1.1204825639724731, "step": 3302 }, { "epoch": 0.6014380631655593, "grad_norm": 7.09375, "learning_rate": 4.63086043757157e-06, "loss": 1.4762994050979614, "step": 3304 }, { "epoch": 0.6018021297897516, "grad_norm": 6.8125, "learning_rate": 4.630416828632418e-06, "loss": 1.1321595907211304, "step": 3306 }, { "epoch": 0.6021661964139438, "grad_norm": 15.0, "learning_rate": 4.6299729804362046e-06, "loss": 1.5416860580444336, "step": 3308 }, { "epoch": 0.602530263038136, "grad_norm": 13.6875, "learning_rate": 4.629528893048063e-06, "loss": 1.5017390251159668, "step": 3310 }, { "epoch": 0.6028943296623283, "grad_norm": 24.875, "learning_rate": 4.629084566533161e-06, "loss": 1.6793529987335205, "step": 3312 }, { "epoch": 0.6032583962865204, "grad_norm": 10.0, "learning_rate": 4.628640000956699e-06, "loss": 1.7469428777694702, "step": 3314 }, { "epoch": 0.6036224629107126, "grad_norm": 3.546875, "learning_rate": 4.628195196383918e-06, "loss": 0.9798278212547302, "step": 3316 }, { "epoch": 0.6039865295349048, "grad_norm": 5.25, "learning_rate": 4.6277501528800905e-06, "loss": 1.0058797597885132, "step": 3318 }, { "epoch": 0.6043505961590971, "grad_norm": 8.125, "learning_rate": 4.6273048705105235e-06, "loss": 1.1809449195861816, "step": 3320 }, { "epoch": 0.6047146627832893, "grad_norm": 33.0, "learning_rate": 4.6268593493405635e-06, "loss": 1.5396674871444702, "step": 3322 }, { "epoch": 0.6050787294074815, "grad_norm": 18.875, "learning_rate": 4.626413589435586e-06, "loss": 1.5441834926605225, "step": 3324 }, { "epoch": 0.6054427960316738, "grad_norm": 16.5, "learning_rate": 4.625967590861007e-06, "loss": 1.3417446613311768, "step": 3326 }, { "epoch": 0.605806862655866, "grad_norm": 24.75, "learning_rate": 4.625521353682272e-06, "loss": 1.8228718042373657, "step": 3328 }, { "epoch": 0.6061709292800582, "grad_norm": 13.5625, "learning_rate": 4.6250748779648665e-06, "loss": 1.5724678039550781, "step": 3330 }, { "epoch": 0.6065349959042505, "grad_norm": 24.125, "learning_rate": 4.624628163774309e-06, "loss": 2.088804244995117, "step": 3332 }, { "epoch": 0.6068990625284427, "grad_norm": 11.375, "learning_rate": 4.624181211176151e-06, "loss": 1.8216822147369385, "step": 3334 }, { "epoch": 0.6072631291526349, "grad_norm": 12.4375, "learning_rate": 4.623734020235984e-06, "loss": 1.444862961769104, "step": 3336 }, { "epoch": 0.6076271957768271, "grad_norm": 27.625, "learning_rate": 4.623286591019429e-06, "loss": 1.5083041191101074, "step": 3338 }, { "epoch": 0.6079912624010194, "grad_norm": 21.25, "learning_rate": 4.622838923592145e-06, "loss": 1.8390361070632935, "step": 3340 }, { "epoch": 0.6083553290252116, "grad_norm": 44.5, "learning_rate": 4.622391018019828e-06, "loss": 1.6136435270309448, "step": 3342 }, { "epoch": 0.6087193956494038, "grad_norm": 6.34375, "learning_rate": 4.621942874368202e-06, "loss": 0.991256594657898, "step": 3344 }, { "epoch": 0.6090834622735961, "grad_norm": 5.1875, "learning_rate": 4.621494492703033e-06, "loss": 0.9313170313835144, "step": 3346 }, { "epoch": 0.6094475288977883, "grad_norm": 10.9375, "learning_rate": 4.6210458730901185e-06, "loss": 0.7289725542068481, "step": 3348 }, { "epoch": 0.6098115955219805, "grad_norm": 17.875, "learning_rate": 4.620597015595291e-06, "loss": 1.5423052310943604, "step": 3350 }, { "epoch": 0.6101756621461728, "grad_norm": 9.75, "learning_rate": 4.62014792028442e-06, "loss": 1.6939454078674316, "step": 3352 }, { "epoch": 0.610539728770365, "grad_norm": 16.125, "learning_rate": 4.6196985872234075e-06, "loss": 1.4564534425735474, "step": 3354 }, { "epoch": 0.6109037953945572, "grad_norm": 9.375, "learning_rate": 4.619249016478191e-06, "loss": 1.2883763313293457, "step": 3356 }, { "epoch": 0.6112678620187494, "grad_norm": 10.0625, "learning_rate": 4.6187992081147436e-06, "loss": 1.3018970489501953, "step": 3358 }, { "epoch": 0.6116319286429417, "grad_norm": 11.6875, "learning_rate": 4.618349162199074e-06, "loss": 1.4381163120269775, "step": 3360 }, { "epoch": 0.6119959952671339, "grad_norm": 10.875, "learning_rate": 4.617898878797221e-06, "loss": 1.4397021532058716, "step": 3362 }, { "epoch": 0.6123600618913261, "grad_norm": 13.3125, "learning_rate": 4.617448357975267e-06, "loss": 1.5341143608093262, "step": 3364 }, { "epoch": 0.6127241285155184, "grad_norm": 9.6875, "learning_rate": 4.61699759979932e-06, "loss": 1.4235416650772095, "step": 3366 }, { "epoch": 0.6130881951397106, "grad_norm": 7.21875, "learning_rate": 4.616546604335529e-06, "loss": 1.3741611242294312, "step": 3368 }, { "epoch": 0.6134522617639028, "grad_norm": 10.625, "learning_rate": 4.616095371650075e-06, "loss": 1.6094715595245361, "step": 3370 }, { "epoch": 0.613816328388095, "grad_norm": 9.1875, "learning_rate": 4.615643901809173e-06, "loss": 1.5990512371063232, "step": 3372 }, { "epoch": 0.6141803950122873, "grad_norm": 25.875, "learning_rate": 4.615192194879078e-06, "loss": 1.5131504535675049, "step": 3374 }, { "epoch": 0.6145444616364795, "grad_norm": 66.0, "learning_rate": 4.614740250926074e-06, "loss": 1.5291842222213745, "step": 3376 }, { "epoch": 0.6149085282606717, "grad_norm": 9.0625, "learning_rate": 4.614288070016482e-06, "loss": 0.9591556787490845, "step": 3378 }, { "epoch": 0.615272594884864, "grad_norm": 4.9375, "learning_rate": 4.613835652216657e-06, "loss": 1.0939677953720093, "step": 3380 }, { "epoch": 0.6156366615090562, "grad_norm": 14.25, "learning_rate": 4.6133829975929915e-06, "loss": 1.055437445640564, "step": 3382 }, { "epoch": 0.6160007281332484, "grad_norm": 17.0, "learning_rate": 4.612930106211908e-06, "loss": 1.5648449659347534, "step": 3384 }, { "epoch": 0.6163647947574407, "grad_norm": 10.6875, "learning_rate": 4.612476978139869e-06, "loss": 1.7257728576660156, "step": 3386 }, { "epoch": 0.6167288613816329, "grad_norm": 9.625, "learning_rate": 4.6120236134433684e-06, "loss": 1.3101181983947754, "step": 3388 }, { "epoch": 0.617092928005825, "grad_norm": 13.3125, "learning_rate": 4.611570012188936e-06, "loss": 1.6583659648895264, "step": 3390 }, { "epoch": 0.6174569946300172, "grad_norm": 11.0, "learning_rate": 4.611116174443134e-06, "loss": 1.964901089668274, "step": 3392 }, { "epoch": 0.6178210612542095, "grad_norm": 14.625, "learning_rate": 4.610662100272564e-06, "loss": 1.2678747177124023, "step": 3394 }, { "epoch": 0.6181851278784017, "grad_norm": 8.1875, "learning_rate": 4.610207789743858e-06, "loss": 0.691560685634613, "step": 3396 }, { "epoch": 0.6185491945025939, "grad_norm": 15.3125, "learning_rate": 4.609753242923683e-06, "loss": 1.3154070377349854, "step": 3398 }, { "epoch": 0.6189132611267862, "grad_norm": 10.375, "learning_rate": 4.609298459878745e-06, "loss": 1.6963614225387573, "step": 3400 }, { "epoch": 0.6192773277509784, "grad_norm": 7.125, "learning_rate": 4.60884344067578e-06, "loss": 0.8437920808792114, "step": 3402 }, { "epoch": 0.6196413943751706, "grad_norm": 6.09375, "learning_rate": 4.608388185381559e-06, "loss": 1.4655256271362305, "step": 3404 }, { "epoch": 0.6200054609993629, "grad_norm": 3.75, "learning_rate": 4.607932694062891e-06, "loss": 1.0056616067886353, "step": 3406 }, { "epoch": 0.6203695276235551, "grad_norm": 9.25, "learning_rate": 4.607476966786616e-06, "loss": 1.0568050146102905, "step": 3408 }, { "epoch": 0.6207335942477473, "grad_norm": 7.5625, "learning_rate": 4.6070210036196115e-06, "loss": 1.2031705379486084, "step": 3410 }, { "epoch": 0.6210976608719395, "grad_norm": 5.59375, "learning_rate": 4.606564804628787e-06, "loss": 1.2960994243621826, "step": 3412 }, { "epoch": 0.6214617274961318, "grad_norm": 7.0625, "learning_rate": 4.606108369881087e-06, "loss": 1.2023874521255493, "step": 3414 }, { "epoch": 0.621825794120324, "grad_norm": 12.4375, "learning_rate": 4.605651699443494e-06, "loss": 1.5052754878997803, "step": 3416 }, { "epoch": 0.6221898607445162, "grad_norm": 4.28125, "learning_rate": 4.605194793383021e-06, "loss": 1.1749440431594849, "step": 3418 }, { "epoch": 0.6225539273687085, "grad_norm": 2.734375, "learning_rate": 4.604737651766718e-06, "loss": 0.8927658200263977, "step": 3420 }, { "epoch": 0.6229179939929007, "grad_norm": 8.1875, "learning_rate": 4.604280274661667e-06, "loss": 1.2033967971801758, "step": 3422 }, { "epoch": 0.6232820606170929, "grad_norm": 12.375, "learning_rate": 4.603822662134988e-06, "loss": 1.5042997598648071, "step": 3424 }, { "epoch": 0.6236461272412852, "grad_norm": 7.15625, "learning_rate": 4.603364814253832e-06, "loss": 1.5573424100875854, "step": 3426 }, { "epoch": 0.6240101938654774, "grad_norm": 9.3125, "learning_rate": 4.602906731085388e-06, "loss": 1.4264090061187744, "step": 3428 }, { "epoch": 0.6243742604896696, "grad_norm": 12.375, "learning_rate": 4.602448412696877e-06, "loss": 1.1589868068695068, "step": 3430 }, { "epoch": 0.6247383271138618, "grad_norm": 16.25, "learning_rate": 4.601989859155557e-06, "loss": 0.8242495656013489, "step": 3432 }, { "epoch": 0.6251023937380541, "grad_norm": 22.0, "learning_rate": 4.601531070528716e-06, "loss": 1.584548830986023, "step": 3434 }, { "epoch": 0.6254664603622463, "grad_norm": 21.125, "learning_rate": 4.601072046883681e-06, "loss": 1.445434808731079, "step": 3436 }, { "epoch": 0.6258305269864385, "grad_norm": 10.0625, "learning_rate": 4.6006127882878115e-06, "loss": 1.94016432762146, "step": 3438 }, { "epoch": 0.6261945936106308, "grad_norm": 42.75, "learning_rate": 4.600153294808503e-06, "loss": 1.3921657800674438, "step": 3440 }, { "epoch": 0.626558660234823, "grad_norm": 30.5, "learning_rate": 4.5996935665131825e-06, "loss": 1.9987621307373047, "step": 3442 }, { "epoch": 0.6269227268590152, "grad_norm": 7.0625, "learning_rate": 4.599233603469314e-06, "loss": 1.3436954021453857, "step": 3444 }, { "epoch": 0.6272867934832074, "grad_norm": 10.0625, "learning_rate": 4.598773405744397e-06, "loss": 1.5192806720733643, "step": 3446 }, { "epoch": 0.6276508601073997, "grad_norm": 42.0, "learning_rate": 4.598312973405959e-06, "loss": 1.377097249031067, "step": 3448 }, { "epoch": 0.6280149267315919, "grad_norm": 6.90625, "learning_rate": 4.597852306521572e-06, "loss": 0.8952192068099976, "step": 3450 }, { "epoch": 0.6283789933557841, "grad_norm": 31.375, "learning_rate": 4.597391405158833e-06, "loss": 1.1780035495758057, "step": 3452 }, { "epoch": 0.6287430599799764, "grad_norm": 110.5, "learning_rate": 4.59693026938538e-06, "loss": 1.4515801668167114, "step": 3454 }, { "epoch": 0.6291071266041686, "grad_norm": 11.25, "learning_rate": 4.596468899268882e-06, "loss": 1.2682180404663086, "step": 3456 }, { "epoch": 0.6294711932283608, "grad_norm": 18.25, "learning_rate": 4.596007294877042e-06, "loss": 1.6006085872650146, "step": 3458 }, { "epoch": 0.6298352598525531, "grad_norm": 8.1875, "learning_rate": 4.595545456277601e-06, "loss": 1.5804389715194702, "step": 3460 }, { "epoch": 0.6301993264767453, "grad_norm": 9.1875, "learning_rate": 4.595083383538329e-06, "loss": 1.1393368244171143, "step": 3462 }, { "epoch": 0.6305633931009375, "grad_norm": 17.875, "learning_rate": 4.594621076727035e-06, "loss": 1.5236291885375977, "step": 3464 }, { "epoch": 0.6309274597251296, "grad_norm": 8.5, "learning_rate": 4.59415853591156e-06, "loss": 1.318071722984314, "step": 3466 }, { "epoch": 0.631291526349322, "grad_norm": 4.75, "learning_rate": 4.593695761159781e-06, "loss": 1.0345380306243896, "step": 3468 }, { "epoch": 0.6316555929735141, "grad_norm": 50.5, "learning_rate": 4.593232752539608e-06, "loss": 1.464630365371704, "step": 3470 }, { "epoch": 0.6320196595977063, "grad_norm": 14.1875, "learning_rate": 4.592769510118985e-06, "loss": 1.8135061264038086, "step": 3472 }, { "epoch": 0.6323837262218986, "grad_norm": 13.4375, "learning_rate": 4.592306033965892e-06, "loss": 1.650680661201477, "step": 3474 }, { "epoch": 0.6327477928460908, "grad_norm": 9.3125, "learning_rate": 4.591842324148341e-06, "loss": 1.8015437126159668, "step": 3476 }, { "epoch": 0.633111859470283, "grad_norm": 10.5, "learning_rate": 4.591378380734381e-06, "loss": 1.535023808479309, "step": 3478 }, { "epoch": 0.6334759260944753, "grad_norm": 34.5, "learning_rate": 4.5909142037920905e-06, "loss": 1.5320165157318115, "step": 3480 }, { "epoch": 0.6338399927186675, "grad_norm": 8.8125, "learning_rate": 4.59044979338959e-06, "loss": 1.3897587060928345, "step": 3482 }, { "epoch": 0.6342040593428597, "grad_norm": 6.09375, "learning_rate": 4.5899851495950274e-06, "loss": 1.4943766593933105, "step": 3484 }, { "epoch": 0.6345681259670519, "grad_norm": 6.5, "learning_rate": 4.589520272476588e-06, "loss": 1.0019128322601318, "step": 3486 }, { "epoch": 0.6349321925912442, "grad_norm": 37.5, "learning_rate": 4.5890551621024906e-06, "loss": 1.3247580528259277, "step": 3488 }, { "epoch": 0.6352962592154364, "grad_norm": 10.25, "learning_rate": 4.588589818540987e-06, "loss": 0.9012529850006104, "step": 3490 }, { "epoch": 0.6356603258396286, "grad_norm": 10.375, "learning_rate": 4.5881242418603656e-06, "loss": 1.4762120246887207, "step": 3492 }, { "epoch": 0.6360243924638209, "grad_norm": 4.78125, "learning_rate": 4.587658432128948e-06, "loss": 1.0838160514831543, "step": 3494 }, { "epoch": 0.6363884590880131, "grad_norm": 19.25, "learning_rate": 4.58719238941509e-06, "loss": 1.242342472076416, "step": 3496 }, { "epoch": 0.6367525257122053, "grad_norm": 9.5, "learning_rate": 4.586726113787182e-06, "loss": 1.5322515964508057, "step": 3498 }, { "epoch": 0.6371165923363976, "grad_norm": 9.9375, "learning_rate": 4.5862596053136465e-06, "loss": 1.404787302017212, "step": 3500 }, { "epoch": 0.6374806589605898, "grad_norm": 10.6875, "learning_rate": 4.585792864062943e-06, "loss": 1.1525084972381592, "step": 3502 }, { "epoch": 0.637844725584782, "grad_norm": 4.78125, "learning_rate": 4.585325890103561e-06, "loss": 1.3715919256210327, "step": 3504 }, { "epoch": 0.6382087922089742, "grad_norm": 11.5625, "learning_rate": 4.584858683504032e-06, "loss": 1.4361039400100708, "step": 3506 }, { "epoch": 0.6385728588331665, "grad_norm": 10.6875, "learning_rate": 4.584391244332913e-06, "loss": 1.5253689289093018, "step": 3508 }, { "epoch": 0.6389369254573587, "grad_norm": 6.6875, "learning_rate": 4.583923572658801e-06, "loss": 1.2777526378631592, "step": 3510 }, { "epoch": 0.6393009920815509, "grad_norm": 12.875, "learning_rate": 4.5834556685503215e-06, "loss": 1.391641616821289, "step": 3512 }, { "epoch": 0.6396650587057432, "grad_norm": 9.4375, "learning_rate": 4.5829875320761416e-06, "loss": 1.3365249633789062, "step": 3514 }, { "epoch": 0.6400291253299354, "grad_norm": 10.625, "learning_rate": 4.582519163304956e-06, "loss": 1.4570097923278809, "step": 3516 }, { "epoch": 0.6403931919541276, "grad_norm": 17.75, "learning_rate": 4.5820505623054966e-06, "loss": 1.6383426189422607, "step": 3518 }, { "epoch": 0.6407572585783198, "grad_norm": 39.0, "learning_rate": 4.5815817291465295e-06, "loss": 1.2982375621795654, "step": 3520 }, { "epoch": 0.6411213252025121, "grad_norm": 19.875, "learning_rate": 4.581112663896853e-06, "loss": 1.855355143547058, "step": 3522 }, { "epoch": 0.6414853918267043, "grad_norm": 8.875, "learning_rate": 4.580643366625301e-06, "loss": 1.6114633083343506, "step": 3524 }, { "epoch": 0.6418494584508965, "grad_norm": 20.625, "learning_rate": 4.5801738374007386e-06, "loss": 1.223724126815796, "step": 3526 }, { "epoch": 0.6422135250750888, "grad_norm": 6.34375, "learning_rate": 4.579704076292072e-06, "loss": 1.3601583242416382, "step": 3528 }, { "epoch": 0.642577591699281, "grad_norm": 15.875, "learning_rate": 4.5792340833682325e-06, "loss": 1.4852311611175537, "step": 3530 }, { "epoch": 0.6429416583234732, "grad_norm": 5.3125, "learning_rate": 4.5787638586981915e-06, "loss": 1.290025234222412, "step": 3532 }, { "epoch": 0.6433057249476655, "grad_norm": 9.8125, "learning_rate": 4.578293402350954e-06, "loss": 1.1747198104858398, "step": 3534 }, { "epoch": 0.6436697915718577, "grad_norm": 21.125, "learning_rate": 4.577822714395555e-06, "loss": 1.4804720878601074, "step": 3536 }, { "epoch": 0.6440338581960499, "grad_norm": 23.125, "learning_rate": 4.577351794901066e-06, "loss": 1.5694031715393066, "step": 3538 }, { "epoch": 0.644397924820242, "grad_norm": 5.625, "learning_rate": 4.576880643936594e-06, "loss": 1.4072412252426147, "step": 3540 }, { "epoch": 0.6447619914444344, "grad_norm": 7.34375, "learning_rate": 4.576409261571278e-06, "loss": 1.5995231866836548, "step": 3542 }, { "epoch": 0.6451260580686266, "grad_norm": 47.5, "learning_rate": 4.575937647874293e-06, "loss": 1.3782302141189575, "step": 3544 }, { "epoch": 0.6454901246928187, "grad_norm": 15.0625, "learning_rate": 4.575465802914844e-06, "loss": 1.2868973016738892, "step": 3546 }, { "epoch": 0.645854191317011, "grad_norm": 12.6875, "learning_rate": 4.574993726762173e-06, "loss": 1.4982761144638062, "step": 3548 }, { "epoch": 0.6462182579412032, "grad_norm": 18.375, "learning_rate": 4.574521419485556e-06, "loss": 1.5792279243469238, "step": 3550 }, { "epoch": 0.6465823245653954, "grad_norm": 11.6875, "learning_rate": 4.5740488811543e-06, "loss": 1.6417756080627441, "step": 3552 }, { "epoch": 0.6469463911895877, "grad_norm": 5.40625, "learning_rate": 4.573576111837752e-06, "loss": 1.3600236177444458, "step": 3554 }, { "epoch": 0.6473104578137799, "grad_norm": 6.84375, "learning_rate": 4.573103111605286e-06, "loss": 1.373063087463379, "step": 3556 }, { "epoch": 0.6476745244379721, "grad_norm": 6.34375, "learning_rate": 4.5726298805263124e-06, "loss": 1.1180996894836426, "step": 3558 }, { "epoch": 0.6480385910621643, "grad_norm": 33.75, "learning_rate": 4.5721564186702785e-06, "loss": 1.0901854038238525, "step": 3560 }, { "epoch": 0.6484026576863566, "grad_norm": 12.375, "learning_rate": 4.571682726106661e-06, "loss": 0.8341667652130127, "step": 3562 }, { "epoch": 0.6487667243105488, "grad_norm": 8.875, "learning_rate": 4.5712088029049725e-06, "loss": 1.074367880821228, "step": 3564 }, { "epoch": 0.649130790934741, "grad_norm": 25.375, "learning_rate": 4.570734649134761e-06, "loss": 1.3583738803863525, "step": 3566 }, { "epoch": 0.6494948575589333, "grad_norm": 19.0, "learning_rate": 4.570260264865605e-06, "loss": 1.3751565217971802, "step": 3568 }, { "epoch": 0.6498589241831255, "grad_norm": 14.375, "learning_rate": 4.569785650167119e-06, "loss": 1.3538877964019775, "step": 3570 }, { "epoch": 0.6502229908073177, "grad_norm": 9.375, "learning_rate": 4.569310805108949e-06, "loss": 1.0558316707611084, "step": 3572 }, { "epoch": 0.65058705743151, "grad_norm": 11.0625, "learning_rate": 4.56883572976078e-06, "loss": 0.9923142194747925, "step": 3574 }, { "epoch": 0.6509511240557022, "grad_norm": 8.5625, "learning_rate": 4.568360424192325e-06, "loss": 1.5509668588638306, "step": 3576 }, { "epoch": 0.6513151906798944, "grad_norm": 7.40625, "learning_rate": 4.567884888473333e-06, "loss": 1.479922890663147, "step": 3578 }, { "epoch": 0.6516792573040866, "grad_norm": 13.75, "learning_rate": 4.567409122673588e-06, "loss": 1.5896632671356201, "step": 3580 }, { "epoch": 0.6520433239282789, "grad_norm": 11.8125, "learning_rate": 4.566933126862907e-06, "loss": 1.7980318069458008, "step": 3582 }, { "epoch": 0.6524073905524711, "grad_norm": 10.375, "learning_rate": 4.566456901111139e-06, "loss": 1.3209612369537354, "step": 3584 }, { "epoch": 0.6527714571766633, "grad_norm": 16.375, "learning_rate": 4.565980445488171e-06, "loss": 1.428575038909912, "step": 3586 }, { "epoch": 0.6531355238008556, "grad_norm": 16.125, "learning_rate": 4.565503760063918e-06, "loss": 1.6164405345916748, "step": 3588 }, { "epoch": 0.6534995904250478, "grad_norm": 10.8125, "learning_rate": 4.565026844908331e-06, "loss": 1.593670129776001, "step": 3590 }, { "epoch": 0.65386365704924, "grad_norm": 7.375, "learning_rate": 4.564549700091399e-06, "loss": 1.7358109951019287, "step": 3592 }, { "epoch": 0.6542277236734323, "grad_norm": 24.625, "learning_rate": 4.564072325683138e-06, "loss": 1.4772859811782837, "step": 3594 }, { "epoch": 0.6545917902976245, "grad_norm": 9.8125, "learning_rate": 4.563594721753602e-06, "loss": 1.0298840999603271, "step": 3596 }, { "epoch": 0.6549558569218167, "grad_norm": 9.5, "learning_rate": 4.563116888372878e-06, "loss": 1.46136474609375, "step": 3598 }, { "epoch": 0.6553199235460089, "grad_norm": 8.25, "learning_rate": 4.562638825611084e-06, "loss": 1.1734012365341187, "step": 3600 }, { "epoch": 0.6556839901702012, "grad_norm": 9.1875, "learning_rate": 4.562160533538377e-06, "loss": 1.4688090085983276, "step": 3602 }, { "epoch": 0.6560480567943934, "grad_norm": 10.9375, "learning_rate": 4.561682012224942e-06, "loss": 1.4228413105010986, "step": 3604 }, { "epoch": 0.6564121234185856, "grad_norm": 17.125, "learning_rate": 4.561203261741e-06, "loss": 1.6375436782836914, "step": 3606 }, { "epoch": 0.6567761900427779, "grad_norm": 23.5, "learning_rate": 4.560724282156806e-06, "loss": 1.8387964963912964, "step": 3608 }, { "epoch": 0.6571402566669701, "grad_norm": 14.5625, "learning_rate": 4.5602450735426494e-06, "loss": 1.4028043746948242, "step": 3610 }, { "epoch": 0.6575043232911623, "grad_norm": 11.625, "learning_rate": 4.5597656359688514e-06, "loss": 1.0829812288284302, "step": 3612 }, { "epoch": 0.6578683899153545, "grad_norm": 10.75, "learning_rate": 4.5592859695057675e-06, "loss": 0.827850341796875, "step": 3614 }, { "epoch": 0.6582324565395468, "grad_norm": 12.0625, "learning_rate": 4.558806074223787e-06, "loss": 1.4451552629470825, "step": 3616 }, { "epoch": 0.658596523163739, "grad_norm": 16.375, "learning_rate": 4.558325950193332e-06, "loss": 1.683427333831787, "step": 3618 }, { "epoch": 0.6589605897879312, "grad_norm": 21.625, "learning_rate": 4.557845597484859e-06, "loss": 1.6295377016067505, "step": 3620 }, { "epoch": 0.6593246564121235, "grad_norm": 9.5625, "learning_rate": 4.5573650161688585e-06, "loss": 0.23497727513313293, "step": 3622 }, { "epoch": 0.6596887230363156, "grad_norm": 26.125, "learning_rate": 4.556884206315853e-06, "loss": 1.352825403213501, "step": 3624 }, { "epoch": 0.6600527896605078, "grad_norm": 12.6875, "learning_rate": 4.556403167996399e-06, "loss": 1.8081603050231934, "step": 3626 }, { "epoch": 0.6604168562847001, "grad_norm": 14.1875, "learning_rate": 4.555921901281089e-06, "loss": 1.4837032556533813, "step": 3628 }, { "epoch": 0.6607809229088923, "grad_norm": 15.875, "learning_rate": 4.5554404062405445e-06, "loss": 1.6521873474121094, "step": 3630 }, { "epoch": 0.6611449895330845, "grad_norm": 20.5, "learning_rate": 4.554958682945425e-06, "loss": 1.604405403137207, "step": 3632 }, { "epoch": 0.6615090561572767, "grad_norm": 19.5, "learning_rate": 4.554476731466419e-06, "loss": 1.7262576818466187, "step": 3634 }, { "epoch": 0.661873122781469, "grad_norm": 15.125, "learning_rate": 4.553994551874254e-06, "loss": 1.9060571193695068, "step": 3636 }, { "epoch": 0.6622371894056612, "grad_norm": 12.0, "learning_rate": 4.5535121442396855e-06, "loss": 1.4974902868270874, "step": 3638 }, { "epoch": 0.6626012560298534, "grad_norm": 13.5625, "learning_rate": 4.553029508633506e-06, "loss": 1.9952466487884521, "step": 3640 }, { "epoch": 0.6629653226540457, "grad_norm": 20.75, "learning_rate": 4.55254664512654e-06, "loss": 1.4698244333267212, "step": 3642 }, { "epoch": 0.6633293892782379, "grad_norm": 19.0, "learning_rate": 4.552063553789645e-06, "loss": 1.2583839893341064, "step": 3644 }, { "epoch": 0.6636934559024301, "grad_norm": 8.0625, "learning_rate": 4.551580234693715e-06, "loss": 0.8891459703445435, "step": 3646 }, { "epoch": 0.6640575225266224, "grad_norm": 17.375, "learning_rate": 4.551096687909672e-06, "loss": 1.5591721534729004, "step": 3648 }, { "epoch": 0.6644215891508146, "grad_norm": 16.5, "learning_rate": 4.550612913508478e-06, "loss": 2.0533902645111084, "step": 3650 }, { "epoch": 0.6647856557750068, "grad_norm": 8.625, "learning_rate": 4.550128911561121e-06, "loss": 1.515520453453064, "step": 3652 }, { "epoch": 0.665149722399199, "grad_norm": 7.71875, "learning_rate": 4.5496446821386285e-06, "loss": 1.4978930950164795, "step": 3654 }, { "epoch": 0.6655137890233913, "grad_norm": 9.4375, "learning_rate": 4.54916022531206e-06, "loss": 1.4474818706512451, "step": 3656 }, { "epoch": 0.6658778556475835, "grad_norm": 12.25, "learning_rate": 4.548675541152506e-06, "loss": 2.0054242610931396, "step": 3658 }, { "epoch": 0.6662419222717757, "grad_norm": 7.34375, "learning_rate": 4.548190629731093e-06, "loss": 1.6663093566894531, "step": 3660 }, { "epoch": 0.666605988895968, "grad_norm": 3.296875, "learning_rate": 4.547705491118979e-06, "loss": 1.0006725788116455, "step": 3662 }, { "epoch": 0.6669700555201602, "grad_norm": 9.5, "learning_rate": 4.547220125387356e-06, "loss": 1.2612828016281128, "step": 3664 }, { "epoch": 0.6673341221443524, "grad_norm": 9.125, "learning_rate": 4.546734532607452e-06, "loss": 1.348300576210022, "step": 3666 }, { "epoch": 0.6676981887685447, "grad_norm": 11.625, "learning_rate": 4.546248712850521e-06, "loss": 1.8749618530273438, "step": 3668 }, { "epoch": 0.6680622553927369, "grad_norm": 16.25, "learning_rate": 4.54576266618786e-06, "loss": 1.9296215772628784, "step": 3670 }, { "epoch": 0.6684263220169291, "grad_norm": 27.375, "learning_rate": 4.54527639269079e-06, "loss": 1.2320010662078857, "step": 3672 }, { "epoch": 0.6687903886411213, "grad_norm": 10.75, "learning_rate": 4.544789892430674e-06, "loss": 1.4485838413238525, "step": 3674 }, { "epoch": 0.6691544552653136, "grad_norm": 12.125, "learning_rate": 4.544303165478902e-06, "loss": 1.2830255031585693, "step": 3676 }, { "epoch": 0.6695185218895058, "grad_norm": 13.125, "learning_rate": 4.543816211906896e-06, "loss": 1.4946999549865723, "step": 3678 }, { "epoch": 0.669882588513698, "grad_norm": 10.3125, "learning_rate": 4.54332903178612e-06, "loss": 1.488081932067871, "step": 3680 }, { "epoch": 0.6702466551378903, "grad_norm": 6.625, "learning_rate": 4.542841625188062e-06, "loss": 1.6220935583114624, "step": 3682 }, { "epoch": 0.6706107217620825, "grad_norm": 16.875, "learning_rate": 4.542353992184248e-06, "loss": 1.7125155925750732, "step": 3684 }, { "epoch": 0.6709747883862747, "grad_norm": 21.0, "learning_rate": 4.541866132846238e-06, "loss": 2.2610013484954834, "step": 3686 }, { "epoch": 0.6713388550104669, "grad_norm": 16.75, "learning_rate": 4.54137804724562e-06, "loss": 1.7678964138031006, "step": 3688 }, { "epoch": 0.6717029216346592, "grad_norm": 34.25, "learning_rate": 4.54088973545402e-06, "loss": 1.7956126928329468, "step": 3690 }, { "epoch": 0.6720669882588514, "grad_norm": 8.125, "learning_rate": 4.540401197543097e-06, "loss": 0.9123656749725342, "step": 3692 }, { "epoch": 0.6724310548830436, "grad_norm": 8.5, "learning_rate": 4.539912433584541e-06, "loss": 1.4581201076507568, "step": 3694 }, { "epoch": 0.6727951215072359, "grad_norm": 9.9375, "learning_rate": 4.539423443650076e-06, "loss": 0.7008066177368164, "step": 3696 }, { "epoch": 0.673159188131428, "grad_norm": 11.125, "learning_rate": 4.538934227811459e-06, "loss": 1.5727577209472656, "step": 3698 }, { "epoch": 0.6735232547556202, "grad_norm": 22.375, "learning_rate": 4.5384447861404805e-06, "loss": 1.6482254266738892, "step": 3700 }, { "epoch": 0.6738873213798126, "grad_norm": 24.0, "learning_rate": 4.537955118708965e-06, "loss": 1.1783530712127686, "step": 3702 }, { "epoch": 0.6742513880040047, "grad_norm": 13.4375, "learning_rate": 4.537465225588769e-06, "loss": 0.6194881200790405, "step": 3704 }, { "epoch": 0.6746154546281969, "grad_norm": 15.9375, "learning_rate": 4.536975106851781e-06, "loss": 1.563307285308838, "step": 3706 }, { "epoch": 0.6749795212523891, "grad_norm": 10.5, "learning_rate": 4.5364847625699245e-06, "loss": 1.803078055381775, "step": 3708 }, { "epoch": 0.6753435878765814, "grad_norm": 7.15625, "learning_rate": 4.535994192815158e-06, "loss": 1.5700092315673828, "step": 3710 }, { "epoch": 0.6757076545007736, "grad_norm": 8.625, "learning_rate": 4.535503397659466e-06, "loss": 1.0744454860687256, "step": 3712 }, { "epoch": 0.6760717211249658, "grad_norm": 17.625, "learning_rate": 4.535012377174875e-06, "loss": 1.1591023206710815, "step": 3714 }, { "epoch": 0.6764357877491581, "grad_norm": 26.625, "learning_rate": 4.534521131433438e-06, "loss": 1.0465067625045776, "step": 3716 }, { "epoch": 0.6767998543733503, "grad_norm": 14.625, "learning_rate": 4.5340296605072446e-06, "loss": 1.2641870975494385, "step": 3718 }, { "epoch": 0.6771639209975425, "grad_norm": 9.75, "learning_rate": 4.533537964468414e-06, "loss": 1.6101582050323486, "step": 3720 }, { "epoch": 0.6775279876217348, "grad_norm": 141.0, "learning_rate": 4.533046043389102e-06, "loss": 2.221773862838745, "step": 3722 }, { "epoch": 0.677892054245927, "grad_norm": 3.515625, "learning_rate": 4.532553897341497e-06, "loss": 1.061731219291687, "step": 3724 }, { "epoch": 0.6782561208701192, "grad_norm": 9.0625, "learning_rate": 4.532061526397818e-06, "loss": 1.4650909900665283, "step": 3726 }, { "epoch": 0.6786201874943114, "grad_norm": 11.8125, "learning_rate": 4.531568930630319e-06, "loss": 1.4499444961547852, "step": 3728 }, { "epoch": 0.6789842541185037, "grad_norm": 15.25, "learning_rate": 4.531076110111286e-06, "loss": 1.5956716537475586, "step": 3730 }, { "epoch": 0.6793483207426959, "grad_norm": 6.84375, "learning_rate": 4.53058306491304e-06, "loss": 1.4599652290344238, "step": 3732 }, { "epoch": 0.6797123873668881, "grad_norm": 9.8125, "learning_rate": 4.530089795107931e-06, "loss": 1.1326687335968018, "step": 3734 }, { "epoch": 0.6800764539910804, "grad_norm": 7.0625, "learning_rate": 4.529596300768346e-06, "loss": 1.2323410511016846, "step": 3736 }, { "epoch": 0.6804405206152726, "grad_norm": 6.78125, "learning_rate": 4.529102581966702e-06, "loss": 0.7173973321914673, "step": 3738 }, { "epoch": 0.6808045872394648, "grad_norm": 19.25, "learning_rate": 4.528608638775451e-06, "loss": 1.355765700340271, "step": 3740 }, { "epoch": 0.6811686538636571, "grad_norm": 17.125, "learning_rate": 4.528114471267079e-06, "loss": 0.8032264113426208, "step": 3742 }, { "epoch": 0.6815327204878493, "grad_norm": 11.0, "learning_rate": 4.527620079514099e-06, "loss": 1.518304705619812, "step": 3744 }, { "epoch": 0.6818967871120415, "grad_norm": 9.8125, "learning_rate": 4.527125463589065e-06, "loss": 1.0924034118652344, "step": 3746 }, { "epoch": 0.6822608537362337, "grad_norm": 21.625, "learning_rate": 4.526630623564557e-06, "loss": 1.5667080879211426, "step": 3748 }, { "epoch": 0.682624920360426, "grad_norm": 9.4375, "learning_rate": 4.5261355595131915e-06, "loss": 1.0316617488861084, "step": 3750 }, { "epoch": 0.6829889869846182, "grad_norm": 11.125, "learning_rate": 4.525640271507619e-06, "loss": 1.391589879989624, "step": 3752 }, { "epoch": 0.6833530536088104, "grad_norm": 14.0625, "learning_rate": 4.525144759620518e-06, "loss": 1.4162417650222778, "step": 3754 }, { "epoch": 0.6837171202330027, "grad_norm": 17.0, "learning_rate": 4.524649023924605e-06, "loss": 1.4036498069763184, "step": 3756 }, { "epoch": 0.6840811868571949, "grad_norm": 18.0, "learning_rate": 4.5241530644926265e-06, "loss": 1.4906201362609863, "step": 3758 }, { "epoch": 0.6844452534813871, "grad_norm": 11.5625, "learning_rate": 4.523656881397364e-06, "loss": 1.3111437559127808, "step": 3760 }, { "epoch": 0.6848093201055793, "grad_norm": 5.34375, "learning_rate": 4.523160474711627e-06, "loss": 1.0921313762664795, "step": 3762 }, { "epoch": 0.6851733867297716, "grad_norm": 7.75, "learning_rate": 4.5226638445082634e-06, "loss": 1.3556110858917236, "step": 3764 }, { "epoch": 0.6855374533539638, "grad_norm": 19.0, "learning_rate": 4.522166990860153e-06, "loss": 1.2254915237426758, "step": 3766 }, { "epoch": 0.685901519978156, "grad_norm": 11.5, "learning_rate": 4.521669913840204e-06, "loss": 1.1682343482971191, "step": 3768 }, { "epoch": 0.6862655866023483, "grad_norm": 13.375, "learning_rate": 4.521172613521363e-06, "loss": 1.7945854663848877, "step": 3770 }, { "epoch": 0.6866296532265405, "grad_norm": 8.375, "learning_rate": 4.520675089976605e-06, "loss": 1.7545654773712158, "step": 3772 }, { "epoch": 0.6869937198507327, "grad_norm": 11.5, "learning_rate": 4.520177343278941e-06, "loss": 1.2681553363800049, "step": 3774 }, { "epoch": 0.687357786474925, "grad_norm": 10.875, "learning_rate": 4.519679373501412e-06, "loss": 1.019344687461853, "step": 3776 }, { "epoch": 0.6877218530991172, "grad_norm": 10.6875, "learning_rate": 4.519181180717093e-06, "loss": 1.5107334852218628, "step": 3778 }, { "epoch": 0.6880859197233093, "grad_norm": 15.875, "learning_rate": 4.5186827649990925e-06, "loss": 1.8209457397460938, "step": 3780 }, { "epoch": 0.6884499863475015, "grad_norm": 18.125, "learning_rate": 4.518184126420553e-06, "loss": 1.4318132400512695, "step": 3782 }, { "epoch": 0.6888140529716938, "grad_norm": 11.75, "learning_rate": 4.517685265054644e-06, "loss": 1.5755144357681274, "step": 3784 }, { "epoch": 0.689178119595886, "grad_norm": 34.5, "learning_rate": 4.517186180974573e-06, "loss": 1.4664556980133057, "step": 3786 }, { "epoch": 0.6895421862200782, "grad_norm": 9.375, "learning_rate": 4.516686874253579e-06, "loss": 1.6866182088851929, "step": 3788 }, { "epoch": 0.6899062528442705, "grad_norm": 44.25, "learning_rate": 4.516187344964932e-06, "loss": 1.4259510040283203, "step": 3790 }, { "epoch": 0.6902703194684627, "grad_norm": 9.375, "learning_rate": 4.515687593181938e-06, "loss": 1.3689099550247192, "step": 3792 }, { "epoch": 0.6906343860926549, "grad_norm": 10.1875, "learning_rate": 4.515187618977931e-06, "loss": 1.239065408706665, "step": 3794 }, { "epoch": 0.6909984527168472, "grad_norm": 47.5, "learning_rate": 4.5146874224262825e-06, "loss": 1.429834246635437, "step": 3796 }, { "epoch": 0.6913625193410394, "grad_norm": 3.890625, "learning_rate": 4.5141870036003925e-06, "loss": 1.1575387716293335, "step": 3798 }, { "epoch": 0.6917265859652316, "grad_norm": 11.625, "learning_rate": 4.513686362573696e-06, "loss": 1.2679401636123657, "step": 3800 }, { "epoch": 0.6920906525894238, "grad_norm": 7.78125, "learning_rate": 4.513185499419661e-06, "loss": 1.0431429147720337, "step": 3802 }, { "epoch": 0.6924547192136161, "grad_norm": 15.0, "learning_rate": 4.512684414211787e-06, "loss": 1.3145463466644287, "step": 3804 }, { "epoch": 0.6928187858378083, "grad_norm": 8.5625, "learning_rate": 4.512183107023603e-06, "loss": 1.825685739517212, "step": 3806 }, { "epoch": 0.6931828524620005, "grad_norm": 28.375, "learning_rate": 4.511681577928678e-06, "loss": 1.2586886882781982, "step": 3808 }, { "epoch": 0.6935469190861928, "grad_norm": 14.4375, "learning_rate": 4.511179827000608e-06, "loss": 0.9188296794891357, "step": 3810 }, { "epoch": 0.693910985710385, "grad_norm": 22.875, "learning_rate": 4.510677854313023e-06, "loss": 1.6138436794281006, "step": 3812 }, { "epoch": 0.6942750523345772, "grad_norm": 3.453125, "learning_rate": 4.510175659939584e-06, "loss": 1.3478995561599731, "step": 3814 }, { "epoch": 0.6946391189587695, "grad_norm": 49.0, "learning_rate": 4.5096732439539885e-06, "loss": 1.5111191272735596, "step": 3816 }, { "epoch": 0.6950031855829617, "grad_norm": 19.5, "learning_rate": 4.509170606429961e-06, "loss": 1.4348554611206055, "step": 3818 }, { "epoch": 0.6953672522071539, "grad_norm": 12.125, "learning_rate": 4.508667747441264e-06, "loss": 1.6349647045135498, "step": 3820 }, { "epoch": 0.6957313188313461, "grad_norm": 13.9375, "learning_rate": 4.508164667061689e-06, "loss": 1.4164223670959473, "step": 3822 }, { "epoch": 0.6960953854555384, "grad_norm": 40.5, "learning_rate": 4.5076613653650614e-06, "loss": 1.674292802810669, "step": 3824 }, { "epoch": 0.6964594520797306, "grad_norm": 8.6875, "learning_rate": 4.5071578424252385e-06, "loss": 1.4369826316833496, "step": 3826 }, { "epoch": 0.6968235187039228, "grad_norm": 11.75, "learning_rate": 4.506654098316109e-06, "loss": 1.206772804260254, "step": 3828 }, { "epoch": 0.6971875853281151, "grad_norm": 14.8125, "learning_rate": 4.506150133111598e-06, "loss": 0.8857887387275696, "step": 3830 }, { "epoch": 0.6975516519523073, "grad_norm": 14.3125, "learning_rate": 4.505645946885659e-06, "loss": 1.3125667572021484, "step": 3832 }, { "epoch": 0.6979157185764995, "grad_norm": 7.34375, "learning_rate": 4.505141539712278e-06, "loss": 1.3718979358673096, "step": 3834 }, { "epoch": 0.6982797852006917, "grad_norm": 9.875, "learning_rate": 4.5046369116654755e-06, "loss": 1.1708085536956787, "step": 3836 }, { "epoch": 0.698643851824884, "grad_norm": 16.75, "learning_rate": 4.504132062819306e-06, "loss": 0.8497837781906128, "step": 3838 }, { "epoch": 0.6990079184490762, "grad_norm": 103.0, "learning_rate": 4.503626993247851e-06, "loss": 0.7867426872253418, "step": 3840 }, { "epoch": 0.6993719850732684, "grad_norm": 4.375, "learning_rate": 4.503121703025227e-06, "loss": 1.2313120365142822, "step": 3842 }, { "epoch": 0.6997360516974607, "grad_norm": 20.0, "learning_rate": 4.502616192225586e-06, "loss": 1.697470784187317, "step": 3844 }, { "epoch": 0.7001001183216529, "grad_norm": 6.59375, "learning_rate": 4.502110460923108e-06, "loss": 1.5227793455123901, "step": 3846 }, { "epoch": 0.7004641849458451, "grad_norm": 10.75, "learning_rate": 4.501604509192008e-06, "loss": 1.295369267463684, "step": 3848 }, { "epoch": 0.7008282515700374, "grad_norm": 23.25, "learning_rate": 4.501098337106532e-06, "loss": 1.2205997705459595, "step": 3850 }, { "epoch": 0.7011923181942296, "grad_norm": 10.625, "learning_rate": 4.5005919447409575e-06, "loss": 1.1970067024230957, "step": 3852 }, { "epoch": 0.7015563848184218, "grad_norm": 12.125, "learning_rate": 4.5000853321695955e-06, "loss": 1.3742592334747314, "step": 3854 }, { "epoch": 0.701920451442614, "grad_norm": 13.0, "learning_rate": 4.499578499466792e-06, "loss": 1.643075942993164, "step": 3856 }, { "epoch": 0.7022845180668063, "grad_norm": 17.125, "learning_rate": 4.499071446706921e-06, "loss": 1.6244299411773682, "step": 3858 }, { "epoch": 0.7026485846909984, "grad_norm": 20.0, "learning_rate": 4.49856417396439e-06, "loss": 1.6272008419036865, "step": 3860 }, { "epoch": 0.7030126513151906, "grad_norm": 9.125, "learning_rate": 4.498056681313639e-06, "loss": 1.2644447088241577, "step": 3862 }, { "epoch": 0.703376717939383, "grad_norm": 9.0625, "learning_rate": 4.497548968829143e-06, "loss": 1.5149474143981934, "step": 3864 }, { "epoch": 0.7037407845635751, "grad_norm": 12.0625, "learning_rate": 4.497041036585404e-06, "loss": 1.5932220220565796, "step": 3866 }, { "epoch": 0.7041048511877673, "grad_norm": 11.9375, "learning_rate": 4.496532884656957e-06, "loss": 1.5577473640441895, "step": 3868 }, { "epoch": 0.7044689178119596, "grad_norm": 13.75, "learning_rate": 4.496024513118378e-06, "loss": 1.675068974494934, "step": 3870 }, { "epoch": 0.7048329844361518, "grad_norm": 5.6875, "learning_rate": 4.495515922044264e-06, "loss": 1.0313233137130737, "step": 3872 }, { "epoch": 0.705197051060344, "grad_norm": 7.84375, "learning_rate": 4.4950071115092474e-06, "loss": 1.648760437965393, "step": 3874 }, { "epoch": 0.7055611176845362, "grad_norm": 7.15625, "learning_rate": 4.494498081587997e-06, "loss": 1.5017939805984497, "step": 3876 }, { "epoch": 0.7059251843087285, "grad_norm": 9.0625, "learning_rate": 4.49398883235521e-06, "loss": 1.4444807767868042, "step": 3878 }, { "epoch": 0.7062892509329207, "grad_norm": 8.375, "learning_rate": 4.493479363885615e-06, "loss": 1.3903720378875732, "step": 3880 }, { "epoch": 0.7066533175571129, "grad_norm": 6.125, "learning_rate": 4.4929696762539774e-06, "loss": 1.3726187944412231, "step": 3882 }, { "epoch": 0.7070173841813052, "grad_norm": 8.75, "learning_rate": 4.49245976953509e-06, "loss": 1.2965294122695923, "step": 3884 }, { "epoch": 0.7073814508054974, "grad_norm": 8.9375, "learning_rate": 4.49194964380378e-06, "loss": 1.366483449935913, "step": 3886 }, { "epoch": 0.7077455174296896, "grad_norm": 29.625, "learning_rate": 4.4914392991349055e-06, "loss": 1.4897915124893188, "step": 3888 }, { "epoch": 0.7081095840538819, "grad_norm": 14.375, "learning_rate": 4.490928735603358e-06, "loss": 1.9999818801879883, "step": 3890 }, { "epoch": 0.7084736506780741, "grad_norm": 23.25, "learning_rate": 4.490417953284062e-06, "loss": 1.395628571510315, "step": 3892 }, { "epoch": 0.7088377173022663, "grad_norm": 6.125, "learning_rate": 4.48990695225197e-06, "loss": 1.1906358003616333, "step": 3894 }, { "epoch": 0.7092017839264585, "grad_norm": 11.625, "learning_rate": 4.4893957325820725e-06, "loss": 1.257643699645996, "step": 3896 }, { "epoch": 0.7095658505506508, "grad_norm": 6.78125, "learning_rate": 4.488884294349386e-06, "loss": 1.3802566528320312, "step": 3898 }, { "epoch": 0.709929917174843, "grad_norm": 29.625, "learning_rate": 4.4883726376289624e-06, "loss": 1.5334763526916504, "step": 3900 }, { "epoch": 0.7102939837990352, "grad_norm": 6.90625, "learning_rate": 4.487860762495888e-06, "loss": 1.1014628410339355, "step": 3902 }, { "epoch": 0.7106580504232275, "grad_norm": 11.1875, "learning_rate": 4.487348669025275e-06, "loss": 0.813360333442688, "step": 3904 }, { "epoch": 0.7110221170474197, "grad_norm": 9.0625, "learning_rate": 4.486836357292273e-06, "loss": 1.1505460739135742, "step": 3906 }, { "epoch": 0.7113861836716119, "grad_norm": 14.1875, "learning_rate": 4.4863238273720625e-06, "loss": 1.627068042755127, "step": 3908 }, { "epoch": 0.7117502502958041, "grad_norm": 32.25, "learning_rate": 4.485811079339852e-06, "loss": 1.280656337738037, "step": 3910 }, { "epoch": 0.7121143169199964, "grad_norm": 8.9375, "learning_rate": 4.485298113270887e-06, "loss": 1.5592602491378784, "step": 3912 }, { "epoch": 0.7124783835441886, "grad_norm": 19.625, "learning_rate": 4.484784929240445e-06, "loss": 1.2622069120407104, "step": 3914 }, { "epoch": 0.7128424501683808, "grad_norm": 18.25, "learning_rate": 4.484271527323831e-06, "loss": 1.570495843887329, "step": 3916 }, { "epoch": 0.7132065167925731, "grad_norm": 9.875, "learning_rate": 4.483757907596386e-06, "loss": 1.0512316226959229, "step": 3918 }, { "epoch": 0.7135705834167653, "grad_norm": 16.125, "learning_rate": 4.48324407013348e-06, "loss": 1.3812226057052612, "step": 3920 }, { "epoch": 0.7139346500409575, "grad_norm": 16.25, "learning_rate": 4.482730015010519e-06, "loss": 0.9664490222930908, "step": 3922 }, { "epoch": 0.7142987166651498, "grad_norm": 25.0, "learning_rate": 4.482215742302937e-06, "loss": 1.4992426633834839, "step": 3924 }, { "epoch": 0.714662783289342, "grad_norm": 7.625, "learning_rate": 4.481701252086201e-06, "loss": 1.301613211631775, "step": 3926 }, { "epoch": 0.7150268499135342, "grad_norm": 18.375, "learning_rate": 4.481186544435812e-06, "loss": 1.4645214080810547, "step": 3928 }, { "epoch": 0.7153909165377264, "grad_norm": 9.6875, "learning_rate": 4.4806716194273e-06, "loss": 1.3729705810546875, "step": 3930 }, { "epoch": 0.7157549831619187, "grad_norm": 7.71875, "learning_rate": 4.480156477136229e-06, "loss": 0.9819673299789429, "step": 3932 }, { "epoch": 0.7161190497861109, "grad_norm": 14.25, "learning_rate": 4.479641117638193e-06, "loss": 1.7443373203277588, "step": 3934 }, { "epoch": 0.716483116410303, "grad_norm": 12.25, "learning_rate": 4.479125541008819e-06, "loss": 1.4857553243637085, "step": 3936 }, { "epoch": 0.7168471830344953, "grad_norm": 13.3125, "learning_rate": 4.478609747323767e-06, "loss": 1.4183406829833984, "step": 3938 }, { "epoch": 0.7172112496586875, "grad_norm": 2.234375, "learning_rate": 4.478093736658725e-06, "loss": 0.9251596927642822, "step": 3940 }, { "epoch": 0.7175753162828797, "grad_norm": 18.125, "learning_rate": 4.477577509089419e-06, "loss": 1.0102472305297852, "step": 3942 }, { "epoch": 0.717939382907072, "grad_norm": 15.5625, "learning_rate": 4.4770610646916025e-06, "loss": 0.7217501401901245, "step": 3944 }, { "epoch": 0.7183034495312642, "grad_norm": 12.5, "learning_rate": 4.47654440354106e-06, "loss": 1.6710395812988281, "step": 3946 }, { "epoch": 0.7186675161554564, "grad_norm": 63.25, "learning_rate": 4.47602752571361e-06, "loss": 1.7212402820587158, "step": 3948 }, { "epoch": 0.7190315827796486, "grad_norm": 10.1875, "learning_rate": 4.475510431285102e-06, "loss": 1.3765300512313843, "step": 3950 }, { "epoch": 0.7193956494038409, "grad_norm": 9.0625, "learning_rate": 4.474993120331418e-06, "loss": 1.3453419208526611, "step": 3952 }, { "epoch": 0.7197597160280331, "grad_norm": 11.375, "learning_rate": 4.4744755929284714e-06, "loss": 1.4030954837799072, "step": 3954 }, { "epoch": 0.7201237826522253, "grad_norm": 12.8125, "learning_rate": 4.473957849152207e-06, "loss": 1.8009711503982544, "step": 3956 }, { "epoch": 0.7204878492764176, "grad_norm": 6.5625, "learning_rate": 4.473439889078602e-06, "loss": 1.1683478355407715, "step": 3958 }, { "epoch": 0.7208519159006098, "grad_norm": 11.3125, "learning_rate": 4.472921712783665e-06, "loss": 1.19291353225708, "step": 3960 }, { "epoch": 0.721215982524802, "grad_norm": 17.125, "learning_rate": 4.4724033203434345e-06, "loss": 1.2174474000930786, "step": 3962 }, { "epoch": 0.7215800491489943, "grad_norm": 6.96875, "learning_rate": 4.471884711833985e-06, "loss": 1.6224758625030518, "step": 3964 }, { "epoch": 0.7219441157731865, "grad_norm": 16.125, "learning_rate": 4.471365887331418e-06, "loss": 1.1545883417129517, "step": 3966 }, { "epoch": 0.7223081823973787, "grad_norm": 15.875, "learning_rate": 4.47084684691187e-06, "loss": 1.8949850797653198, "step": 3968 }, { "epoch": 0.7226722490215709, "grad_norm": 9.875, "learning_rate": 4.4703275906515095e-06, "loss": 1.6063568592071533, "step": 3970 }, { "epoch": 0.7230363156457632, "grad_norm": 19.0, "learning_rate": 4.469808118626534e-06, "loss": 1.2111185789108276, "step": 3972 }, { "epoch": 0.7234003822699554, "grad_norm": 22.875, "learning_rate": 4.469288430913172e-06, "loss": 0.995964527130127, "step": 3974 }, { "epoch": 0.7237644488941476, "grad_norm": 4.65625, "learning_rate": 4.468768527587688e-06, "loss": 1.2622394561767578, "step": 3976 }, { "epoch": 0.7241285155183399, "grad_norm": 6.46875, "learning_rate": 4.468248408726376e-06, "loss": 1.2863210439682007, "step": 3978 }, { "epoch": 0.7244925821425321, "grad_norm": 15.5625, "learning_rate": 4.467728074405558e-06, "loss": 1.3262053728103638, "step": 3980 }, { "epoch": 0.7248566487667243, "grad_norm": 7.28125, "learning_rate": 4.467207524701595e-06, "loss": 1.5426204204559326, "step": 3982 }, { "epoch": 0.7252207153909166, "grad_norm": 5.8125, "learning_rate": 4.466686759690874e-06, "loss": 1.0809754133224487, "step": 3984 }, { "epoch": 0.7255847820151088, "grad_norm": 14.9375, "learning_rate": 4.466165779449814e-06, "loss": 1.4829933643341064, "step": 3986 }, { "epoch": 0.725948848639301, "grad_norm": 118.5, "learning_rate": 4.465644584054868e-06, "loss": 1.3769590854644775, "step": 3988 }, { "epoch": 0.7263129152634932, "grad_norm": 61.75, "learning_rate": 4.465123173582519e-06, "loss": 1.9018545150756836, "step": 3990 }, { "epoch": 0.7266769818876855, "grad_norm": 10.5, "learning_rate": 4.4646015481092805e-06, "loss": 1.611771821975708, "step": 3992 }, { "epoch": 0.7270410485118777, "grad_norm": 11.875, "learning_rate": 4.464079707711703e-06, "loss": 1.387900471687317, "step": 3994 }, { "epoch": 0.7274051151360699, "grad_norm": 12.0, "learning_rate": 4.46355765246636e-06, "loss": 1.3997489213943481, "step": 3996 }, { "epoch": 0.7277691817602622, "grad_norm": 18.375, "learning_rate": 4.463035382449864e-06, "loss": 1.6485671997070312, "step": 3998 }, { "epoch": 0.7281332483844544, "grad_norm": 13.625, "learning_rate": 4.462512897738855e-06, "loss": 1.6315934658050537, "step": 4000 }, { "epoch": 0.7284973150086466, "grad_norm": 13.125, "learning_rate": 4.461990198410003e-06, "loss": 1.6964771747589111, "step": 4002 }, { "epoch": 0.7288613816328388, "grad_norm": 15.0, "learning_rate": 4.461467284540016e-06, "loss": 1.8010307550430298, "step": 4004 }, { "epoch": 0.7292254482570311, "grad_norm": 6.5, "learning_rate": 4.460944156205628e-06, "loss": 1.5327054262161255, "step": 4006 }, { "epoch": 0.7295895148812233, "grad_norm": 10.4375, "learning_rate": 4.460420813483605e-06, "loss": 1.2161662578582764, "step": 4008 }, { "epoch": 0.7299535815054154, "grad_norm": 7.71875, "learning_rate": 4.459897256450747e-06, "loss": 1.3260434865951538, "step": 4010 }, { "epoch": 0.7303176481296078, "grad_norm": 33.5, "learning_rate": 4.459373485183882e-06, "loss": 1.3870368003845215, "step": 4012 }, { "epoch": 0.7306817147538, "grad_norm": 10.0, "learning_rate": 4.458849499759873e-06, "loss": 1.7670800685882568, "step": 4014 }, { "epoch": 0.7310457813779921, "grad_norm": 16.5, "learning_rate": 4.4583253002556126e-06, "loss": 1.6878101825714111, "step": 4016 }, { "epoch": 0.7314098480021844, "grad_norm": 23.75, "learning_rate": 4.457800886748024e-06, "loss": 0.6901309490203857, "step": 4018 }, { "epoch": 0.7317739146263766, "grad_norm": 5.1875, "learning_rate": 4.457276259314063e-06, "loss": 1.0010206699371338, "step": 4020 }, { "epoch": 0.7321379812505688, "grad_norm": 29.0, "learning_rate": 4.456751418030717e-06, "loss": 1.601664423942566, "step": 4022 }, { "epoch": 0.732502047874761, "grad_norm": 14.125, "learning_rate": 4.456226362975004e-06, "loss": 2.0186665058135986, "step": 4024 }, { "epoch": 0.7328661144989533, "grad_norm": 5.15625, "learning_rate": 4.455701094223973e-06, "loss": 1.2741717100143433, "step": 4026 }, { "epoch": 0.7332301811231455, "grad_norm": 12.25, "learning_rate": 4.455175611854708e-06, "loss": 1.0950629711151123, "step": 4028 }, { "epoch": 0.7335942477473377, "grad_norm": 9.6875, "learning_rate": 4.454649915944317e-06, "loss": 1.4390480518341064, "step": 4030 }, { "epoch": 0.73395831437153, "grad_norm": 8.5, "learning_rate": 4.454124006569948e-06, "loss": 0.883948802947998, "step": 4032 }, { "epoch": 0.7343223809957222, "grad_norm": 19.0, "learning_rate": 4.4535978838087725e-06, "loss": 1.3211839199066162, "step": 4034 }, { "epoch": 0.7346864476199144, "grad_norm": 5.25, "learning_rate": 4.4530715477379995e-06, "loss": 0.5684911012649536, "step": 4036 }, { "epoch": 0.7350505142441067, "grad_norm": 7.5, "learning_rate": 4.452544998434864e-06, "loss": 1.5237979888916016, "step": 4038 }, { "epoch": 0.7354145808682989, "grad_norm": 12.5625, "learning_rate": 4.452018235976638e-06, "loss": 1.486379623413086, "step": 4040 }, { "epoch": 0.7357786474924911, "grad_norm": 9.5625, "learning_rate": 4.45149126044062e-06, "loss": 1.3715254068374634, "step": 4042 }, { "epoch": 0.7361427141166833, "grad_norm": 8.8125, "learning_rate": 4.4509640719041425e-06, "loss": 1.466097116470337, "step": 4044 }, { "epoch": 0.7365067807408756, "grad_norm": 12.5, "learning_rate": 4.450436670444568e-06, "loss": 1.5135843753814697, "step": 4046 }, { "epoch": 0.7368708473650678, "grad_norm": 27.75, "learning_rate": 4.449909056139289e-06, "loss": 1.5392903089523315, "step": 4048 }, { "epoch": 0.73723491398926, "grad_norm": 16.875, "learning_rate": 4.449381229065734e-06, "loss": 1.4597059488296509, "step": 4050 }, { "epoch": 0.7375989806134523, "grad_norm": 13.875, "learning_rate": 4.448853189301354e-06, "loss": 1.262041449546814, "step": 4052 }, { "epoch": 0.7379630472376445, "grad_norm": 11.625, "learning_rate": 4.448324936923643e-06, "loss": 1.4917747974395752, "step": 4054 }, { "epoch": 0.7383271138618367, "grad_norm": 29.5, "learning_rate": 4.447796472010116e-06, "loss": 2.0125930309295654, "step": 4056 }, { "epoch": 0.738691180486029, "grad_norm": 6.0, "learning_rate": 4.4472677946383245e-06, "loss": 1.0335878133773804, "step": 4058 }, { "epoch": 0.7390552471102212, "grad_norm": 10.25, "learning_rate": 4.446738904885849e-06, "loss": 1.2267245054244995, "step": 4060 }, { "epoch": 0.7394193137344134, "grad_norm": 14.625, "learning_rate": 4.446209802830303e-06, "loss": 2.0021884441375732, "step": 4062 }, { "epoch": 0.7397833803586056, "grad_norm": 11.3125, "learning_rate": 4.445680488549329e-06, "loss": 1.563736915588379, "step": 4064 }, { "epoch": 0.7401474469827979, "grad_norm": 6.625, "learning_rate": 4.445150962120601e-06, "loss": 1.5454363822937012, "step": 4066 }, { "epoch": 0.7405115136069901, "grad_norm": 13.1875, "learning_rate": 4.444621223621827e-06, "loss": 1.5436625480651855, "step": 4068 }, { "epoch": 0.7408755802311823, "grad_norm": 7.25, "learning_rate": 4.444091273130744e-06, "loss": 1.4621388912200928, "step": 4070 }, { "epoch": 0.7412396468553746, "grad_norm": 30.25, "learning_rate": 4.443561110725118e-06, "loss": 1.1844981908798218, "step": 4072 }, { "epoch": 0.7416037134795668, "grad_norm": 42.75, "learning_rate": 4.44303073648275e-06, "loss": 1.3885607719421387, "step": 4074 }, { "epoch": 0.741967780103759, "grad_norm": 17.75, "learning_rate": 4.442500150481468e-06, "loss": 1.5434989929199219, "step": 4076 }, { "epoch": 0.7423318467279512, "grad_norm": 9.3125, "learning_rate": 4.4419693527991356e-06, "loss": 1.6302589178085327, "step": 4078 }, { "epoch": 0.7426959133521435, "grad_norm": 9.4375, "learning_rate": 4.441438343513644e-06, "loss": 1.5927282571792603, "step": 4080 }, { "epoch": 0.7430599799763357, "grad_norm": 18.75, "learning_rate": 4.440907122702919e-06, "loss": 1.510801911354065, "step": 4082 }, { "epoch": 0.7434240466005279, "grad_norm": 7.375, "learning_rate": 4.440375690444911e-06, "loss": 0.5664615035057068, "step": 4084 }, { "epoch": 0.7437881132247202, "grad_norm": 12.0, "learning_rate": 4.439844046817609e-06, "loss": 1.4451801776885986, "step": 4086 }, { "epoch": 0.7441521798489124, "grad_norm": 10.9375, "learning_rate": 4.439312191899028e-06, "loss": 1.762737512588501, "step": 4088 }, { "epoch": 0.7445162464731045, "grad_norm": 5.0, "learning_rate": 4.438780125767216e-06, "loss": 1.3794842958450317, "step": 4090 }, { "epoch": 0.7448803130972969, "grad_norm": 11.625, "learning_rate": 4.438247848500251e-06, "loss": 1.5104601383209229, "step": 4092 }, { "epoch": 0.745244379721489, "grad_norm": 13.5625, "learning_rate": 4.4377153601762435e-06, "loss": 2.0323023796081543, "step": 4094 }, { "epoch": 0.7456084463456812, "grad_norm": 12.0, "learning_rate": 4.437182660873334e-06, "loss": 1.3515594005584717, "step": 4096 }, { "epoch": 0.7459725129698734, "grad_norm": 7.75, "learning_rate": 4.436649750669692e-06, "loss": 1.6212824583053589, "step": 4098 }, { "epoch": 0.7463365795940657, "grad_norm": 8.1875, "learning_rate": 4.436116629643522e-06, "loss": 1.426540732383728, "step": 4100 }, { "epoch": 0.7467006462182579, "grad_norm": 18.125, "learning_rate": 4.4355832978730566e-06, "loss": 1.4811890125274658, "step": 4102 }, { "epoch": 0.7470647128424501, "grad_norm": 11.5, "learning_rate": 4.43504975543656e-06, "loss": 1.505039930343628, "step": 4104 }, { "epoch": 0.7474287794666424, "grad_norm": 10.25, "learning_rate": 4.434516002412328e-06, "loss": 1.4368852376937866, "step": 4106 }, { "epoch": 0.7477928460908346, "grad_norm": 7.625, "learning_rate": 4.433982038878686e-06, "loss": 1.3733965158462524, "step": 4108 }, { "epoch": 0.7481569127150268, "grad_norm": 3.734375, "learning_rate": 4.4334478649139915e-06, "loss": 0.9276962280273438, "step": 4110 }, { "epoch": 0.7485209793392191, "grad_norm": 51.25, "learning_rate": 4.4329134805966315e-06, "loss": 1.2068966627120972, "step": 4112 }, { "epoch": 0.7488850459634113, "grad_norm": 6.75, "learning_rate": 4.432378886005025e-06, "loss": 0.8691614866256714, "step": 4114 }, { "epoch": 0.7492491125876035, "grad_norm": 13.25, "learning_rate": 4.431844081217622e-06, "loss": 1.483361005783081, "step": 4116 }, { "epoch": 0.7496131792117957, "grad_norm": 27.25, "learning_rate": 4.431309066312903e-06, "loss": 1.6403098106384277, "step": 4118 }, { "epoch": 0.749977245835988, "grad_norm": 26.25, "learning_rate": 4.43077384136938e-06, "loss": 1.5191655158996582, "step": 4120 }, { "epoch": 0.7503413124601802, "grad_norm": 8.9375, "learning_rate": 4.430238406465594e-06, "loss": 1.4765812158584595, "step": 4122 }, { "epoch": 0.7507053790843724, "grad_norm": 7.5625, "learning_rate": 4.429702761680117e-06, "loss": 1.2506628036499023, "step": 4124 }, { "epoch": 0.7510694457085647, "grad_norm": 11.875, "learning_rate": 4.429166907091554e-06, "loss": 1.8009881973266602, "step": 4126 }, { "epoch": 0.7514335123327569, "grad_norm": 14.125, "learning_rate": 4.4286308427785394e-06, "loss": 1.8766233921051025, "step": 4128 }, { "epoch": 0.7517975789569491, "grad_norm": 7.03125, "learning_rate": 4.42809456881974e-06, "loss": 1.3045039176940918, "step": 4130 }, { "epoch": 0.7521616455811414, "grad_norm": 5.15625, "learning_rate": 4.4275580852938485e-06, "loss": 0.890677273273468, "step": 4132 }, { "epoch": 0.7525257122053336, "grad_norm": 9.1875, "learning_rate": 4.4270213922795935e-06, "loss": 1.0816421508789062, "step": 4134 }, { "epoch": 0.7528897788295258, "grad_norm": 12.0, "learning_rate": 4.426484489855733e-06, "loss": 1.4418411254882812, "step": 4136 }, { "epoch": 0.753253845453718, "grad_norm": 7.28125, "learning_rate": 4.425947378101054e-06, "loss": 1.4546067714691162, "step": 4138 }, { "epoch": 0.7536179120779103, "grad_norm": 9.125, "learning_rate": 4.425410057094377e-06, "loss": 1.187925934791565, "step": 4140 }, { "epoch": 0.7539819787021025, "grad_norm": 19.25, "learning_rate": 4.42487252691455e-06, "loss": 1.7679461240768433, "step": 4142 }, { "epoch": 0.7543460453262947, "grad_norm": 23.25, "learning_rate": 4.424334787640454e-06, "loss": 1.4857308864593506, "step": 4144 }, { "epoch": 0.754710111950487, "grad_norm": 10.125, "learning_rate": 4.423796839351001e-06, "loss": 1.3260689973831177, "step": 4146 }, { "epoch": 0.7550741785746792, "grad_norm": 13.6875, "learning_rate": 4.4232586821251325e-06, "loss": 1.6781939268112183, "step": 4148 }, { "epoch": 0.7554382451988714, "grad_norm": 6.875, "learning_rate": 4.4227203160418185e-06, "loss": 0.9126325845718384, "step": 4150 }, { "epoch": 0.7558023118230636, "grad_norm": 25.75, "learning_rate": 4.422181741180065e-06, "loss": 1.2500287294387817, "step": 4152 }, { "epoch": 0.7561663784472559, "grad_norm": 12.9375, "learning_rate": 4.421642957618905e-06, "loss": 1.8193004131317139, "step": 4154 }, { "epoch": 0.7565304450714481, "grad_norm": 13.5, "learning_rate": 4.421103965437401e-06, "loss": 1.2327810525894165, "step": 4156 }, { "epoch": 0.7568945116956403, "grad_norm": 9.5, "learning_rate": 4.4205647647146495e-06, "loss": 1.4102716445922852, "step": 4158 }, { "epoch": 0.7572585783198326, "grad_norm": 29.625, "learning_rate": 4.420025355529778e-06, "loss": 1.4116885662078857, "step": 4160 }, { "epoch": 0.7576226449440248, "grad_norm": 12.8125, "learning_rate": 4.419485737961938e-06, "loss": 1.8389171361923218, "step": 4162 }, { "epoch": 0.757986711568217, "grad_norm": 13.8125, "learning_rate": 4.41894591209032e-06, "loss": 1.4878777265548706, "step": 4164 }, { "epoch": 0.7583507781924093, "grad_norm": 14.0, "learning_rate": 4.41840587799414e-06, "loss": 1.5728201866149902, "step": 4166 }, { "epoch": 0.7587148448166015, "grad_norm": 21.5, "learning_rate": 4.417865635752644e-06, "loss": 1.3733251094818115, "step": 4168 }, { "epoch": 0.7590789114407936, "grad_norm": 6.0, "learning_rate": 4.4173251854451135e-06, "loss": 1.3655896186828613, "step": 4170 }, { "epoch": 0.7594429780649858, "grad_norm": 8.375, "learning_rate": 4.416784527150856e-06, "loss": 1.2453241348266602, "step": 4172 }, { "epoch": 0.7598070446891781, "grad_norm": 9.6875, "learning_rate": 4.41624366094921e-06, "loss": 1.616510272026062, "step": 4174 }, { "epoch": 0.7601711113133703, "grad_norm": 14.5, "learning_rate": 4.415702586919547e-06, "loss": 1.441003680229187, "step": 4176 }, { "epoch": 0.7605351779375625, "grad_norm": 7.84375, "learning_rate": 4.415161305141267e-06, "loss": 1.2992274761199951, "step": 4178 }, { "epoch": 0.7608992445617548, "grad_norm": 16.5, "learning_rate": 4.414619815693799e-06, "loss": 1.259737253189087, "step": 4180 }, { "epoch": 0.761263311185947, "grad_norm": 11.0625, "learning_rate": 4.4140781186566075e-06, "loss": 1.9988055229187012, "step": 4182 }, { "epoch": 0.7616273778101392, "grad_norm": 15.75, "learning_rate": 4.413536214109183e-06, "loss": 1.6156141757965088, "step": 4184 }, { "epoch": 0.7619914444343315, "grad_norm": 24.5, "learning_rate": 4.412994102131046e-06, "loss": 1.521872878074646, "step": 4186 }, { "epoch": 0.7623555110585237, "grad_norm": 7.78125, "learning_rate": 4.4124517828017534e-06, "loss": 1.257689118385315, "step": 4188 }, { "epoch": 0.7627195776827159, "grad_norm": 24.75, "learning_rate": 4.411909256200884e-06, "loss": 1.377212405204773, "step": 4190 }, { "epoch": 0.7630836443069081, "grad_norm": 7.34375, "learning_rate": 4.411366522408054e-06, "loss": 1.0804977416992188, "step": 4192 }, { "epoch": 0.7634477109311004, "grad_norm": 9.5625, "learning_rate": 4.4108235815029056e-06, "loss": 1.5017775297164917, "step": 4194 }, { "epoch": 0.7638117775552926, "grad_norm": 69.0, "learning_rate": 4.410280433565115e-06, "loss": 1.1005418300628662, "step": 4196 }, { "epoch": 0.7641758441794848, "grad_norm": 12.0625, "learning_rate": 4.409737078674387e-06, "loss": 1.6864604949951172, "step": 4198 }, { "epoch": 0.7645399108036771, "grad_norm": 10.1875, "learning_rate": 4.4091935169104535e-06, "loss": 1.6279773712158203, "step": 4200 }, { "epoch": 0.7649039774278693, "grad_norm": 19.875, "learning_rate": 4.408649748353083e-06, "loss": 1.8135032653808594, "step": 4202 }, { "epoch": 0.7652680440520615, "grad_norm": 12.0625, "learning_rate": 4.408105773082072e-06, "loss": 1.5221168994903564, "step": 4204 }, { "epoch": 0.7656321106762538, "grad_norm": 7.3125, "learning_rate": 4.407561591177245e-06, "loss": 1.4761295318603516, "step": 4206 }, { "epoch": 0.765996177300446, "grad_norm": 8.25, "learning_rate": 4.407017202718459e-06, "loss": 1.5649135112762451, "step": 4208 }, { "epoch": 0.7663602439246382, "grad_norm": 9.5625, "learning_rate": 4.406472607785599e-06, "loss": 1.192299485206604, "step": 4210 }, { "epoch": 0.7667243105488304, "grad_norm": 11.0, "learning_rate": 4.405927806458586e-06, "loss": 1.4417957067489624, "step": 4212 }, { "epoch": 0.7670883771730227, "grad_norm": 9.0625, "learning_rate": 4.405382798817364e-06, "loss": 1.2320743799209595, "step": 4214 }, { "epoch": 0.7674524437972149, "grad_norm": 27.25, "learning_rate": 4.404837584941911e-06, "loss": 1.0693466663360596, "step": 4216 }, { "epoch": 0.7678165104214071, "grad_norm": 5.1875, "learning_rate": 4.404292164912237e-06, "loss": 1.280682921409607, "step": 4218 }, { "epoch": 0.7681805770455994, "grad_norm": 8.4375, "learning_rate": 4.4037465388083785e-06, "loss": 1.1720728874206543, "step": 4220 }, { "epoch": 0.7685446436697916, "grad_norm": 8.9375, "learning_rate": 4.403200706710404e-06, "loss": 1.6981515884399414, "step": 4222 }, { "epoch": 0.7689087102939838, "grad_norm": 31.75, "learning_rate": 4.402654668698413e-06, "loss": 1.984631061553955, "step": 4224 }, { "epoch": 0.769272776918176, "grad_norm": 30.125, "learning_rate": 4.402108424852533e-06, "loss": 1.3658406734466553, "step": 4226 }, { "epoch": 0.7696368435423683, "grad_norm": 19.0, "learning_rate": 4.401561975252926e-06, "loss": 1.6068122386932373, "step": 4228 }, { "epoch": 0.7700009101665605, "grad_norm": 21.75, "learning_rate": 4.401015319979777e-06, "loss": 1.462298035621643, "step": 4230 }, { "epoch": 0.7703649767907527, "grad_norm": 14.875, "learning_rate": 4.400468459113308e-06, "loss": 1.1048028469085693, "step": 4232 }, { "epoch": 0.770729043414945, "grad_norm": 28.25, "learning_rate": 4.399921392733769e-06, "loss": 0.6294914484024048, "step": 4234 }, { "epoch": 0.7710931100391372, "grad_norm": 17.0, "learning_rate": 4.399374120921439e-06, "loss": 1.5844395160675049, "step": 4236 }, { "epoch": 0.7714571766633294, "grad_norm": 9.5625, "learning_rate": 4.398826643756628e-06, "loss": 1.5203673839569092, "step": 4238 }, { "epoch": 0.7718212432875217, "grad_norm": 10.0625, "learning_rate": 4.398278961319674e-06, "loss": 1.6357944011688232, "step": 4240 }, { "epoch": 0.7721853099117139, "grad_norm": 8.3125, "learning_rate": 4.397731073690951e-06, "loss": 1.2385048866271973, "step": 4242 }, { "epoch": 0.772549376535906, "grad_norm": 7.8125, "learning_rate": 4.397182980950857e-06, "loss": 1.3403857946395874, "step": 4244 }, { "epoch": 0.7729134431600982, "grad_norm": 9.6875, "learning_rate": 4.396634683179823e-06, "loss": 1.2890541553497314, "step": 4246 }, { "epoch": 0.7732775097842906, "grad_norm": 17.25, "learning_rate": 4.396086180458309e-06, "loss": 1.4887521266937256, "step": 4248 }, { "epoch": 0.7736415764084827, "grad_norm": 18.75, "learning_rate": 4.395537472866805e-06, "loss": 1.7618627548217773, "step": 4250 }, { "epoch": 0.7740056430326749, "grad_norm": 10.25, "learning_rate": 4.394988560485835e-06, "loss": 1.5332757234573364, "step": 4252 }, { "epoch": 0.7743697096568672, "grad_norm": 33.5, "learning_rate": 4.3944394433959445e-06, "loss": 1.649156093597412, "step": 4254 }, { "epoch": 0.7747337762810594, "grad_norm": 10.5625, "learning_rate": 4.393890121677718e-06, "loss": 1.6425827741622925, "step": 4256 }, { "epoch": 0.7750978429052516, "grad_norm": 12.6875, "learning_rate": 4.3933405954117655e-06, "loss": 1.2292001247406006, "step": 4258 }, { "epoch": 0.7754619095294439, "grad_norm": 16.5, "learning_rate": 4.392790864678728e-06, "loss": 1.3680897951126099, "step": 4260 }, { "epoch": 0.7758259761536361, "grad_norm": 20.125, "learning_rate": 4.392240929559274e-06, "loss": 1.5855176448822021, "step": 4262 }, { "epoch": 0.7761900427778283, "grad_norm": 12.625, "learning_rate": 4.391690790134105e-06, "loss": 1.9958208799362183, "step": 4264 }, { "epoch": 0.7765541094020205, "grad_norm": 5.15625, "learning_rate": 4.3911404464839546e-06, "loss": 1.373746395111084, "step": 4266 }, { "epoch": 0.7769181760262128, "grad_norm": 8.0, "learning_rate": 4.390589898689581e-06, "loss": 1.199842929840088, "step": 4268 }, { "epoch": 0.777282242650405, "grad_norm": 8.5625, "learning_rate": 4.390039146831775e-06, "loss": 1.4002423286437988, "step": 4270 }, { "epoch": 0.7776463092745972, "grad_norm": 10.875, "learning_rate": 4.389488190991358e-06, "loss": 1.4751213788986206, "step": 4272 }, { "epoch": 0.7780103758987895, "grad_norm": 17.125, "learning_rate": 4.38893703124918e-06, "loss": 1.281068205833435, "step": 4274 }, { "epoch": 0.7783744425229817, "grad_norm": 14.1875, "learning_rate": 4.38838566768612e-06, "loss": 0.930081844329834, "step": 4276 }, { "epoch": 0.7787385091471739, "grad_norm": 31.0, "learning_rate": 4.387834100383093e-06, "loss": 1.372563362121582, "step": 4278 }, { "epoch": 0.7791025757713662, "grad_norm": 9.375, "learning_rate": 4.387282329421033e-06, "loss": 1.3405052423477173, "step": 4280 }, { "epoch": 0.7794666423955584, "grad_norm": 22.0, "learning_rate": 4.386730354880916e-06, "loss": 1.2313324213027954, "step": 4282 }, { "epoch": 0.7798307090197506, "grad_norm": 145.0, "learning_rate": 4.386178176843737e-06, "loss": 1.7453323602676392, "step": 4284 }, { "epoch": 0.7801947756439428, "grad_norm": 8.6875, "learning_rate": 4.385625795390529e-06, "loss": 1.3262126445770264, "step": 4286 }, { "epoch": 0.7805588422681351, "grad_norm": 8.125, "learning_rate": 4.385073210602352e-06, "loss": 1.0706943273544312, "step": 4288 }, { "epoch": 0.7809229088923273, "grad_norm": 8.5625, "learning_rate": 4.3845204225602935e-06, "loss": 1.2356127500534058, "step": 4290 }, { "epoch": 0.7812869755165195, "grad_norm": 12.375, "learning_rate": 4.383967431345474e-06, "loss": 1.2644524574279785, "step": 4292 }, { "epoch": 0.7816510421407118, "grad_norm": 11.875, "learning_rate": 4.383414237039043e-06, "loss": 1.8036032915115356, "step": 4294 }, { "epoch": 0.782015108764904, "grad_norm": 11.0625, "learning_rate": 4.382860839722179e-06, "loss": 1.3682570457458496, "step": 4296 }, { "epoch": 0.7823791753890962, "grad_norm": 4.0625, "learning_rate": 4.382307239476093e-06, "loss": 1.3352452516555786, "step": 4298 }, { "epoch": 0.7827432420132885, "grad_norm": 22.375, "learning_rate": 4.381753436382018e-06, "loss": 1.594243049621582, "step": 4300 }, { "epoch": 0.7831073086374807, "grad_norm": 8.3125, "learning_rate": 4.381199430521228e-06, "loss": 1.4326969385147095, "step": 4302 }, { "epoch": 0.7834713752616729, "grad_norm": 17.375, "learning_rate": 4.3806452219750184e-06, "loss": 1.5666792392730713, "step": 4304 }, { "epoch": 0.7838354418858651, "grad_norm": 7.3125, "learning_rate": 4.380090810824719e-06, "loss": 1.482906460762024, "step": 4306 }, { "epoch": 0.7841995085100574, "grad_norm": 15.3125, "learning_rate": 4.379536197151685e-06, "loss": 1.2503796815872192, "step": 4308 }, { "epoch": 0.7845635751342496, "grad_norm": 9.375, "learning_rate": 4.378981381037305e-06, "loss": 1.3743815422058105, "step": 4310 }, { "epoch": 0.7849276417584418, "grad_norm": 8.0625, "learning_rate": 4.3784263625629965e-06, "loss": 1.182907223701477, "step": 4312 }, { "epoch": 0.7852917083826341, "grad_norm": 27.875, "learning_rate": 4.377871141810205e-06, "loss": 1.4062925577163696, "step": 4314 }, { "epoch": 0.7856557750068263, "grad_norm": 3.90625, "learning_rate": 4.377315718860407e-06, "loss": 1.1224201917648315, "step": 4316 }, { "epoch": 0.7860198416310185, "grad_norm": 11.0, "learning_rate": 4.376760093795111e-06, "loss": 1.3343844413757324, "step": 4318 }, { "epoch": 0.7863839082552107, "grad_norm": 8.4375, "learning_rate": 4.376204266695848e-06, "loss": 1.4972634315490723, "step": 4320 }, { "epoch": 0.786747974879403, "grad_norm": 12.625, "learning_rate": 4.375648237644188e-06, "loss": 1.687469482421875, "step": 4322 }, { "epoch": 0.7871120415035952, "grad_norm": 68.0, "learning_rate": 4.375092006721723e-06, "loss": 1.7964059114456177, "step": 4324 }, { "epoch": 0.7874761081277873, "grad_norm": 4.46875, "learning_rate": 4.3745355740100796e-06, "loss": 0.9894921779632568, "step": 4326 }, { "epoch": 0.7878401747519796, "grad_norm": 43.5, "learning_rate": 4.37397893959091e-06, "loss": 1.2102699279785156, "step": 4328 }, { "epoch": 0.7882042413761718, "grad_norm": 9.3125, "learning_rate": 4.3734221035459e-06, "loss": 1.2620714902877808, "step": 4330 }, { "epoch": 0.788568308000364, "grad_norm": 8.5625, "learning_rate": 4.3728650659567626e-06, "loss": 0.9282611608505249, "step": 4332 }, { "epoch": 0.7889323746245563, "grad_norm": 10.25, "learning_rate": 4.372307826905241e-06, "loss": 1.0102256536483765, "step": 4334 }, { "epoch": 0.7892964412487485, "grad_norm": 5.78125, "learning_rate": 4.371750386473107e-06, "loss": 1.2553620338439941, "step": 4336 }, { "epoch": 0.7896605078729407, "grad_norm": 15.1875, "learning_rate": 4.371192744742162e-06, "loss": 1.548854112625122, "step": 4338 }, { "epoch": 0.7900245744971329, "grad_norm": 7.3125, "learning_rate": 4.3706349017942395e-06, "loss": 0.9886182546615601, "step": 4340 }, { "epoch": 0.7903886411213252, "grad_norm": 13.8125, "learning_rate": 4.3700768577112e-06, "loss": 1.607574462890625, "step": 4342 }, { "epoch": 0.7907527077455174, "grad_norm": 108.5, "learning_rate": 4.369518612574933e-06, "loss": 1.316232442855835, "step": 4344 }, { "epoch": 0.7911167743697096, "grad_norm": 13.875, "learning_rate": 4.368960166467362e-06, "loss": 1.0501240491867065, "step": 4346 }, { "epoch": 0.7914808409939019, "grad_norm": 9.125, "learning_rate": 4.368401519470433e-06, "loss": 1.7694333791732788, "step": 4348 }, { "epoch": 0.7918449076180941, "grad_norm": 2.53125, "learning_rate": 4.367842671666126e-06, "loss": 0.9769310355186462, "step": 4350 }, { "epoch": 0.7922089742422863, "grad_norm": 8.6875, "learning_rate": 4.367283623136451e-06, "loss": 1.5035035610198975, "step": 4352 }, { "epoch": 0.7925730408664786, "grad_norm": 7.5625, "learning_rate": 4.366724373963446e-06, "loss": 1.4821988344192505, "step": 4354 }, { "epoch": 0.7929371074906708, "grad_norm": 8.0, "learning_rate": 4.366164924229178e-06, "loss": 1.3605796098709106, "step": 4356 }, { "epoch": 0.793301174114863, "grad_norm": 8.1875, "learning_rate": 4.3656052740157426e-06, "loss": 1.541952133178711, "step": 4358 }, { "epoch": 0.7936652407390552, "grad_norm": 6.75, "learning_rate": 4.365045423405269e-06, "loss": 1.5068315267562866, "step": 4360 }, { "epoch": 0.7940293073632475, "grad_norm": 8.9375, "learning_rate": 4.36448537247991e-06, "loss": 1.310437798500061, "step": 4362 }, { "epoch": 0.7943933739874397, "grad_norm": 11.4375, "learning_rate": 4.363925121321854e-06, "loss": 1.3380366563796997, "step": 4364 }, { "epoch": 0.7947574406116319, "grad_norm": 23.5, "learning_rate": 4.363364670013312e-06, "loss": 1.4515764713287354, "step": 4366 }, { "epoch": 0.7951215072358242, "grad_norm": 5.90625, "learning_rate": 4.362804018636532e-06, "loss": 1.442319393157959, "step": 4368 }, { "epoch": 0.7954855738600164, "grad_norm": 7.1875, "learning_rate": 4.362243167273784e-06, "loss": 1.3407368659973145, "step": 4370 }, { "epoch": 0.7958496404842086, "grad_norm": 10.625, "learning_rate": 4.361682116007372e-06, "loss": 1.3934518098831177, "step": 4372 }, { "epoch": 0.7962137071084009, "grad_norm": 4.90625, "learning_rate": 4.361120864919629e-06, "loss": 1.281641960144043, "step": 4374 }, { "epoch": 0.7965777737325931, "grad_norm": 13.9375, "learning_rate": 4.360559414092914e-06, "loss": 1.5547196865081787, "step": 4376 }, { "epoch": 0.7969418403567853, "grad_norm": 18.875, "learning_rate": 4.359997763609619e-06, "loss": 1.333369255065918, "step": 4378 }, { "epoch": 0.7973059069809775, "grad_norm": 31.5, "learning_rate": 4.359435913552163e-06, "loss": 1.5652134418487549, "step": 4380 }, { "epoch": 0.7976699736051698, "grad_norm": 10.0625, "learning_rate": 4.3588738640029984e-06, "loss": 1.4560751914978027, "step": 4382 }, { "epoch": 0.798034040229362, "grad_norm": 9.25, "learning_rate": 4.358311615044599e-06, "loss": 1.3550523519515991, "step": 4384 }, { "epoch": 0.7983981068535542, "grad_norm": 9.5625, "learning_rate": 4.3577491667594765e-06, "loss": 1.4192368984222412, "step": 4386 }, { "epoch": 0.7987621734777465, "grad_norm": 11.25, "learning_rate": 4.357186519230165e-06, "loss": 1.5846765041351318, "step": 4388 }, { "epoch": 0.7991262401019387, "grad_norm": 8.9375, "learning_rate": 4.356623672539233e-06, "loss": 1.237911343574524, "step": 4390 }, { "epoch": 0.7994903067261309, "grad_norm": 15.3125, "learning_rate": 4.356060626769274e-06, "loss": 0.7316661477088928, "step": 4392 }, { "epoch": 0.7998543733503231, "grad_norm": 19.625, "learning_rate": 4.355497382002915e-06, "loss": 1.2689944505691528, "step": 4394 }, { "epoch": 0.8002184399745154, "grad_norm": 12.3125, "learning_rate": 4.3549339383228065e-06, "loss": 1.4916542768478394, "step": 4396 }, { "epoch": 0.8005825065987076, "grad_norm": 8.75, "learning_rate": 4.354370295811635e-06, "loss": 1.2607371807098389, "step": 4398 }, { "epoch": 0.8009465732228997, "grad_norm": 16.375, "learning_rate": 4.353806454552111e-06, "loss": 1.2823785543441772, "step": 4400 }, { "epoch": 0.801310639847092, "grad_norm": 11.875, "learning_rate": 4.353242414626977e-06, "loss": 1.5608652830123901, "step": 4402 }, { "epoch": 0.8016747064712842, "grad_norm": 11.125, "learning_rate": 4.352678176119002e-06, "loss": 1.3546953201293945, "step": 4404 }, { "epoch": 0.8020387730954764, "grad_norm": 25.75, "learning_rate": 4.352113739110987e-06, "loss": 1.2929246425628662, "step": 4406 }, { "epoch": 0.8024028397196687, "grad_norm": 9.625, "learning_rate": 4.35154910368576e-06, "loss": 0.9250297546386719, "step": 4408 }, { "epoch": 0.8027669063438609, "grad_norm": 7.75, "learning_rate": 4.35098426992618e-06, "loss": 1.7485544681549072, "step": 4410 }, { "epoch": 0.8031309729680531, "grad_norm": 9.625, "learning_rate": 4.350419237915134e-06, "loss": 1.0787283182144165, "step": 4412 }, { "epoch": 0.8034950395922453, "grad_norm": 8.75, "learning_rate": 4.349854007735536e-06, "loss": 0.7617124319076538, "step": 4414 }, { "epoch": 0.8038591062164376, "grad_norm": 6.90625, "learning_rate": 4.349288579470333e-06, "loss": 1.2137619256973267, "step": 4416 }, { "epoch": 0.8042231728406298, "grad_norm": 17.0, "learning_rate": 4.3487229532025e-06, "loss": 1.2146120071411133, "step": 4418 }, { "epoch": 0.804587239464822, "grad_norm": 8.25, "learning_rate": 4.348157129015039e-06, "loss": 1.1762677431106567, "step": 4420 }, { "epoch": 0.8049513060890143, "grad_norm": 9.25, "learning_rate": 4.347591106990984e-06, "loss": 1.1438363790512085, "step": 4422 }, { "epoch": 0.8053153727132065, "grad_norm": 10.1875, "learning_rate": 4.347024887213393e-06, "loss": 1.514618992805481, "step": 4424 }, { "epoch": 0.8056794393373987, "grad_norm": 12.3125, "learning_rate": 4.346458469765361e-06, "loss": 1.3525161743164062, "step": 4426 }, { "epoch": 0.806043505961591, "grad_norm": 6.25, "learning_rate": 4.345891854730005e-06, "loss": 1.247894048690796, "step": 4428 }, { "epoch": 0.8064075725857832, "grad_norm": 12.875, "learning_rate": 4.345325042190473e-06, "loss": 1.8402509689331055, "step": 4430 }, { "epoch": 0.8067716392099754, "grad_norm": 29.125, "learning_rate": 4.344758032229943e-06, "loss": 1.3306828737258911, "step": 4432 }, { "epoch": 0.8071357058341676, "grad_norm": 24.75, "learning_rate": 4.344190824931622e-06, "loss": 1.2305538654327393, "step": 4434 }, { "epoch": 0.8074997724583599, "grad_norm": 10.875, "learning_rate": 4.343623420378745e-06, "loss": 1.4118926525115967, "step": 4436 }, { "epoch": 0.8078638390825521, "grad_norm": 5.71875, "learning_rate": 4.3430558186545765e-06, "loss": 1.3336848020553589, "step": 4438 }, { "epoch": 0.8082279057067443, "grad_norm": 3.5, "learning_rate": 4.34248801984241e-06, "loss": 1.0588898658752441, "step": 4440 }, { "epoch": 0.8085919723309366, "grad_norm": 15.25, "learning_rate": 4.3419200240255665e-06, "loss": 1.6459929943084717, "step": 4442 }, { "epoch": 0.8089560389551288, "grad_norm": 45.0, "learning_rate": 4.341351831287398e-06, "loss": 1.7264268398284912, "step": 4444 }, { "epoch": 0.809320105579321, "grad_norm": 7.25, "learning_rate": 4.340783441711284e-06, "loss": 1.2586559057235718, "step": 4446 }, { "epoch": 0.8096841722035133, "grad_norm": 6.5625, "learning_rate": 4.340214855380634e-06, "loss": 1.5741991996765137, "step": 4448 }, { "epoch": 0.8100482388277055, "grad_norm": 5.96875, "learning_rate": 4.339646072378886e-06, "loss": 0.9454053640365601, "step": 4450 }, { "epoch": 0.8104123054518977, "grad_norm": 15.875, "learning_rate": 4.339077092789505e-06, "loss": 1.0356849431991577, "step": 4452 }, { "epoch": 0.8107763720760899, "grad_norm": 7.125, "learning_rate": 4.338507916695988e-06, "loss": 0.7811887264251709, "step": 4454 }, { "epoch": 0.8111404387002822, "grad_norm": 16.625, "learning_rate": 4.337938544181858e-06, "loss": 1.0020370483398438, "step": 4456 }, { "epoch": 0.8115045053244744, "grad_norm": 13.125, "learning_rate": 4.337368975330669e-06, "loss": 1.3336344957351685, "step": 4458 }, { "epoch": 0.8118685719486666, "grad_norm": 36.0, "learning_rate": 4.336799210226003e-06, "loss": 2.022167205810547, "step": 4460 }, { "epoch": 0.8122326385728589, "grad_norm": 16.125, "learning_rate": 4.3362292489514716e-06, "loss": 2.00360107421875, "step": 4462 }, { "epoch": 0.8125967051970511, "grad_norm": 19.5, "learning_rate": 4.335659091590711e-06, "loss": 2.0605931282043457, "step": 4464 }, { "epoch": 0.8129607718212433, "grad_norm": 7.5625, "learning_rate": 4.3350887382273934e-06, "loss": 1.5292812585830688, "step": 4466 }, { "epoch": 0.8133248384454355, "grad_norm": 7.90625, "learning_rate": 4.334518188945213e-06, "loss": 1.4856352806091309, "step": 4468 }, { "epoch": 0.8136889050696278, "grad_norm": 8.9375, "learning_rate": 4.333947443827897e-06, "loss": 1.4383578300476074, "step": 4470 }, { "epoch": 0.81405297169382, "grad_norm": 16.125, "learning_rate": 4.3333765029592e-06, "loss": 1.5232003927230835, "step": 4472 }, { "epoch": 0.8144170383180122, "grad_norm": 9.75, "learning_rate": 4.3328053664229045e-06, "loss": 1.5454494953155518, "step": 4474 }, { "epoch": 0.8147811049422045, "grad_norm": 12.6875, "learning_rate": 4.332234034302824e-06, "loss": 1.1203497648239136, "step": 4476 }, { "epoch": 0.8151451715663967, "grad_norm": 12.0625, "learning_rate": 4.3316625066827955e-06, "loss": 1.3930609226226807, "step": 4478 }, { "epoch": 0.8155092381905888, "grad_norm": 7.5625, "learning_rate": 4.331090783646693e-06, "loss": 1.8819421529769897, "step": 4480 }, { "epoch": 0.8158733048147812, "grad_norm": 7.96875, "learning_rate": 4.330518865278412e-06, "loss": 1.3567163944244385, "step": 4482 }, { "epoch": 0.8162373714389733, "grad_norm": 11.125, "learning_rate": 4.32994675166188e-06, "loss": 1.6875742673873901, "step": 4484 }, { "epoch": 0.8166014380631655, "grad_norm": 12.9375, "learning_rate": 4.329374442881051e-06, "loss": 1.3831372261047363, "step": 4486 }, { "epoch": 0.8169655046873577, "grad_norm": 15.8125, "learning_rate": 4.32880193901991e-06, "loss": 1.1459221839904785, "step": 4488 }, { "epoch": 0.81732957131155, "grad_norm": 14.3125, "learning_rate": 4.328229240162471e-06, "loss": 0.67169189453125, "step": 4490 }, { "epoch": 0.8176936379357422, "grad_norm": 100.0, "learning_rate": 4.3276563463927725e-06, "loss": 1.5136171579360962, "step": 4492 }, { "epoch": 0.8180577045599344, "grad_norm": 16.25, "learning_rate": 4.327083257794886e-06, "loss": 1.5977423191070557, "step": 4494 }, { "epoch": 0.8184217711841267, "grad_norm": 11.5, "learning_rate": 4.326509974452909e-06, "loss": 1.636972427368164, "step": 4496 }, { "epoch": 0.8187858378083189, "grad_norm": 25.375, "learning_rate": 4.325936496450971e-06, "loss": 1.7512624263763428, "step": 4498 }, { "epoch": 0.8191499044325111, "grad_norm": 12.0, "learning_rate": 4.3253628238732245e-06, "loss": 1.5959237813949585, "step": 4500 }, { "epoch": 0.8195139710567034, "grad_norm": 10.9375, "learning_rate": 4.3247889568038544e-06, "loss": 1.3929579257965088, "step": 4502 }, { "epoch": 0.8198780376808956, "grad_norm": 19.75, "learning_rate": 4.3242148953270745e-06, "loss": 1.0622797012329102, "step": 4504 }, { "epoch": 0.8202421043050878, "grad_norm": 20.125, "learning_rate": 4.323640639527126e-06, "loss": 1.4992486238479614, "step": 4506 }, { "epoch": 0.82060617092928, "grad_norm": 16.125, "learning_rate": 4.323066189488277e-06, "loss": 1.6865487098693848, "step": 4508 }, { "epoch": 0.8209702375534723, "grad_norm": 13.1875, "learning_rate": 4.322491545294826e-06, "loss": 0.5326933860778809, "step": 4510 }, { "epoch": 0.8213343041776645, "grad_norm": 9.5625, "learning_rate": 4.321916707031101e-06, "loss": 1.5173028707504272, "step": 4512 }, { "epoch": 0.8216983708018567, "grad_norm": 22.5, "learning_rate": 4.321341674781456e-06, "loss": 1.3065694570541382, "step": 4514 }, { "epoch": 0.822062437426049, "grad_norm": 18.25, "learning_rate": 4.320766448630276e-06, "loss": 1.378330111503601, "step": 4516 }, { "epoch": 0.8224265040502412, "grad_norm": 13.625, "learning_rate": 4.320191028661972e-06, "loss": 1.4198566675186157, "step": 4518 }, { "epoch": 0.8227905706744334, "grad_norm": 84.5, "learning_rate": 4.319615414960984e-06, "loss": 1.341217041015625, "step": 4520 }, { "epoch": 0.8231546372986257, "grad_norm": 9.75, "learning_rate": 4.319039607611782e-06, "loss": 0.5010709762573242, "step": 4522 }, { "epoch": 0.8235187039228179, "grad_norm": 6.09375, "learning_rate": 4.318463606698865e-06, "loss": 1.281144618988037, "step": 4524 }, { "epoch": 0.8238827705470101, "grad_norm": 28.0, "learning_rate": 4.317887412306755e-06, "loss": 1.253221869468689, "step": 4526 }, { "epoch": 0.8242468371712023, "grad_norm": 8.4375, "learning_rate": 4.317311024520009e-06, "loss": 1.6719715595245361, "step": 4528 }, { "epoch": 0.8246109037953946, "grad_norm": 33.75, "learning_rate": 4.316734443423208e-06, "loss": 1.5020778179168701, "step": 4530 }, { "epoch": 0.8249749704195868, "grad_norm": 6.5, "learning_rate": 4.3161576691009646e-06, "loss": 1.1740750074386597, "step": 4532 }, { "epoch": 0.825339037043779, "grad_norm": 11.0, "learning_rate": 4.315580701637917e-06, "loss": 1.3550879955291748, "step": 4534 }, { "epoch": 0.8257031036679713, "grad_norm": 9.0, "learning_rate": 4.315003541118733e-06, "loss": 1.1664139032363892, "step": 4536 }, { "epoch": 0.8260671702921635, "grad_norm": 12.25, "learning_rate": 4.314426187628108e-06, "loss": 1.4160387516021729, "step": 4538 }, { "epoch": 0.8264312369163557, "grad_norm": 13.625, "learning_rate": 4.313848641250767e-06, "loss": 1.502232551574707, "step": 4540 }, { "epoch": 0.8267953035405479, "grad_norm": 6.15625, "learning_rate": 4.313270902071463e-06, "loss": 1.5931636095046997, "step": 4542 }, { "epoch": 0.8271593701647402, "grad_norm": 6.84375, "learning_rate": 4.312692970174977e-06, "loss": 1.3021752834320068, "step": 4544 }, { "epoch": 0.8275234367889324, "grad_norm": 10.75, "learning_rate": 4.312114845646116e-06, "loss": 1.3560556173324585, "step": 4546 }, { "epoch": 0.8278875034131246, "grad_norm": 14.0625, "learning_rate": 4.31153652856972e-06, "loss": 1.1802349090576172, "step": 4548 }, { "epoch": 0.8282515700373169, "grad_norm": 3.90625, "learning_rate": 4.310958019030652e-06, "loss": 0.9591573476791382, "step": 4550 }, { "epoch": 0.8286156366615091, "grad_norm": 33.0, "learning_rate": 4.310379317113809e-06, "loss": 1.2214243412017822, "step": 4552 }, { "epoch": 0.8289797032857013, "grad_norm": 18.5, "learning_rate": 4.309800422904111e-06, "loss": 1.799782633781433, "step": 4554 }, { "epoch": 0.8293437699098936, "grad_norm": 13.875, "learning_rate": 4.30922133648651e-06, "loss": 1.4806492328643799, "step": 4556 }, { "epoch": 0.8297078365340858, "grad_norm": 11.5, "learning_rate": 4.3086420579459835e-06, "loss": 1.4170966148376465, "step": 4558 }, { "epoch": 0.830071903158278, "grad_norm": 9.625, "learning_rate": 4.308062587367537e-06, "loss": 1.4671202898025513, "step": 4560 }, { "epoch": 0.8304359697824701, "grad_norm": 9.5, "learning_rate": 4.307482924836208e-06, "loss": 1.47428560256958, "step": 4562 }, { "epoch": 0.8308000364066624, "grad_norm": 6.90625, "learning_rate": 4.306903070437059e-06, "loss": 0.9847546815872192, "step": 4564 }, { "epoch": 0.8311641030308546, "grad_norm": 17.625, "learning_rate": 4.30632302425518e-06, "loss": 1.356264591217041, "step": 4566 }, { "epoch": 0.8315281696550468, "grad_norm": 8.625, "learning_rate": 4.305742786375693e-06, "loss": 1.7652785778045654, "step": 4568 }, { "epoch": 0.8318922362792391, "grad_norm": 21.75, "learning_rate": 4.305162356883742e-06, "loss": 1.3786014318466187, "step": 4570 }, { "epoch": 0.8322563029034313, "grad_norm": 12.3125, "learning_rate": 4.3045817358645044e-06, "loss": 1.3860353231430054, "step": 4572 }, { "epoch": 0.8326203695276235, "grad_norm": 8.1875, "learning_rate": 4.304000923403186e-06, "loss": 1.4982531070709229, "step": 4574 }, { "epoch": 0.8329844361518158, "grad_norm": 2.59375, "learning_rate": 4.3034199195850144e-06, "loss": 1.2097407579421997, "step": 4576 }, { "epoch": 0.833348502776008, "grad_norm": 10.0625, "learning_rate": 4.302838724495253e-06, "loss": 1.3626539707183838, "step": 4578 }, { "epoch": 0.8337125694002002, "grad_norm": 15.6875, "learning_rate": 4.302257338219189e-06, "loss": 1.9367485046386719, "step": 4580 }, { "epoch": 0.8340766360243924, "grad_norm": 27.125, "learning_rate": 4.301675760842138e-06, "loss": 1.4965035915374756, "step": 4582 }, { "epoch": 0.8344407026485847, "grad_norm": 22.25, "learning_rate": 4.301093992449445e-06, "loss": 1.592379093170166, "step": 4584 }, { "epoch": 0.8348047692727769, "grad_norm": 15.25, "learning_rate": 4.3005120331264795e-06, "loss": 1.2901054620742798, "step": 4586 }, { "epoch": 0.8351688358969691, "grad_norm": 12.0625, "learning_rate": 4.2999298829586455e-06, "loss": 1.8928816318511963, "step": 4588 }, { "epoch": 0.8355329025211614, "grad_norm": 14.75, "learning_rate": 4.299347542031368e-06, "loss": 1.8752672672271729, "step": 4590 }, { "epoch": 0.8358969691453536, "grad_norm": 8.875, "learning_rate": 4.298765010430105e-06, "loss": 1.0792758464813232, "step": 4592 }, { "epoch": 0.8362610357695458, "grad_norm": 5.25, "learning_rate": 4.29818228824034e-06, "loss": 1.0743558406829834, "step": 4594 }, { "epoch": 0.8366251023937381, "grad_norm": 9.6875, "learning_rate": 4.297599375547586e-06, "loss": 1.0034387111663818, "step": 4596 }, { "epoch": 0.8369891690179303, "grad_norm": 10.6875, "learning_rate": 4.297016272437382e-06, "loss": 1.4353010654449463, "step": 4598 }, { "epoch": 0.8373532356421225, "grad_norm": 23.875, "learning_rate": 4.296432978995296e-06, "loss": 1.987682580947876, "step": 4600 }, { "epoch": 0.8377173022663147, "grad_norm": 11.8125, "learning_rate": 4.295849495306924e-06, "loss": 2.0193583965301514, "step": 4602 }, { "epoch": 0.838081368890507, "grad_norm": 7.0625, "learning_rate": 4.295265821457891e-06, "loss": 1.526608943939209, "step": 4604 }, { "epoch": 0.8384454355146992, "grad_norm": 13.375, "learning_rate": 4.294681957533849e-06, "loss": 1.3104884624481201, "step": 4606 }, { "epoch": 0.8388095021388914, "grad_norm": 17.875, "learning_rate": 4.294097903620474e-06, "loss": 1.399722933769226, "step": 4608 }, { "epoch": 0.8391735687630837, "grad_norm": 30.375, "learning_rate": 4.293513659803478e-06, "loss": 1.5541167259216309, "step": 4610 }, { "epoch": 0.8395376353872759, "grad_norm": 10.5, "learning_rate": 4.292929226168594e-06, "loss": 1.7088623046875, "step": 4612 }, { "epoch": 0.8399017020114681, "grad_norm": 3.609375, "learning_rate": 4.292344602801586e-06, "loss": 1.0808995962142944, "step": 4614 }, { "epoch": 0.8402657686356603, "grad_norm": 3.0625, "learning_rate": 4.2917597897882445e-06, "loss": 1.1097280979156494, "step": 4616 }, { "epoch": 0.8406298352598526, "grad_norm": 21.375, "learning_rate": 4.29117478721439e-06, "loss": 1.1709375381469727, "step": 4618 }, { "epoch": 0.8409939018840448, "grad_norm": 24.125, "learning_rate": 4.290589595165867e-06, "loss": 1.7756097316741943, "step": 4620 }, { "epoch": 0.841357968508237, "grad_norm": 8.0, "learning_rate": 4.290004213728551e-06, "loss": 1.5190043449401855, "step": 4622 }, { "epoch": 0.8417220351324293, "grad_norm": 7.6875, "learning_rate": 4.289418642988346e-06, "loss": 1.4005178213119507, "step": 4624 }, { "epoch": 0.8420861017566215, "grad_norm": 12.3125, "learning_rate": 4.28883288303118e-06, "loss": 1.7801865339279175, "step": 4626 }, { "epoch": 0.8424501683808137, "grad_norm": 16.75, "learning_rate": 4.288246933943011e-06, "loss": 1.6216456890106201, "step": 4628 }, { "epoch": 0.842814235005006, "grad_norm": 17.125, "learning_rate": 4.287660795809826e-06, "loss": 1.7046103477478027, "step": 4630 }, { "epoch": 0.8431783016291982, "grad_norm": 13.6875, "learning_rate": 4.287074468717637e-06, "loss": 2.0742743015289307, "step": 4632 }, { "epoch": 0.8435423682533904, "grad_norm": 25.625, "learning_rate": 4.286487952752486e-06, "loss": 1.380745530128479, "step": 4634 }, { "epoch": 0.8439064348775825, "grad_norm": 21.875, "learning_rate": 4.285901248000442e-06, "loss": 0.9399363994598389, "step": 4636 }, { "epoch": 0.8442705015017749, "grad_norm": 12.0, "learning_rate": 4.285314354547601e-06, "loss": 1.4656147956848145, "step": 4638 }, { "epoch": 0.844634568125967, "grad_norm": 6.15625, "learning_rate": 4.284727272480087e-06, "loss": 1.3026562929153442, "step": 4640 }, { "epoch": 0.8449986347501592, "grad_norm": 10.8125, "learning_rate": 4.284140001884053e-06, "loss": 1.3558553457260132, "step": 4642 }, { "epoch": 0.8453627013743515, "grad_norm": 6.21875, "learning_rate": 4.2835525428456785e-06, "loss": 1.4424870014190674, "step": 4644 }, { "epoch": 0.8457267679985437, "grad_norm": 23.25, "learning_rate": 4.2829648954511684e-06, "loss": 1.5834052562713623, "step": 4646 }, { "epoch": 0.8460908346227359, "grad_norm": 29.375, "learning_rate": 4.2823770597867595e-06, "loss": 1.6375362873077393, "step": 4648 }, { "epoch": 0.8464549012469282, "grad_norm": 72.0, "learning_rate": 4.2817890359387145e-06, "loss": 0.601618766784668, "step": 4650 }, { "epoch": 0.8468189678711204, "grad_norm": 20.75, "learning_rate": 4.281200823993323e-06, "loss": 1.486088514328003, "step": 4652 }, { "epoch": 0.8471830344953126, "grad_norm": 8.4375, "learning_rate": 4.280612424036904e-06, "loss": 1.263069987297058, "step": 4654 }, { "epoch": 0.8475471011195048, "grad_norm": 8.6875, "learning_rate": 4.280023836155801e-06, "loss": 1.40183687210083, "step": 4656 }, { "epoch": 0.8479111677436971, "grad_norm": 18.125, "learning_rate": 4.279435060436387e-06, "loss": 1.2984257936477661, "step": 4658 }, { "epoch": 0.8482752343678893, "grad_norm": 12.25, "learning_rate": 4.278846096965063e-06, "loss": 1.5261800289154053, "step": 4660 }, { "epoch": 0.8486393009920815, "grad_norm": 17.625, "learning_rate": 4.278256945828258e-06, "loss": 1.8382480144500732, "step": 4662 }, { "epoch": 0.8490033676162738, "grad_norm": 5.25, "learning_rate": 4.277667607112425e-06, "loss": 0.8402523994445801, "step": 4664 }, { "epoch": 0.849367434240466, "grad_norm": 6.75, "learning_rate": 4.2770780809040495e-06, "loss": 0.997430682182312, "step": 4666 }, { "epoch": 0.8497315008646582, "grad_norm": 14.4375, "learning_rate": 4.276488367289641e-06, "loss": 1.6727664470672607, "step": 4668 }, { "epoch": 0.8500955674888505, "grad_norm": 26.125, "learning_rate": 4.275898466355738e-06, "loss": 1.6993602514266968, "step": 4670 }, { "epoch": 0.8504596341130427, "grad_norm": 13.8125, "learning_rate": 4.2753083781889045e-06, "loss": 1.5225656032562256, "step": 4672 }, { "epoch": 0.8508237007372349, "grad_norm": 81.5, "learning_rate": 4.274718102875737e-06, "loss": 1.5078167915344238, "step": 4674 }, { "epoch": 0.8511877673614271, "grad_norm": 12.25, "learning_rate": 4.274127640502852e-06, "loss": 1.785815954208374, "step": 4676 }, { "epoch": 0.8515518339856194, "grad_norm": 34.0, "learning_rate": 4.2735369911569e-06, "loss": 1.3342301845550537, "step": 4678 }, { "epoch": 0.8519159006098116, "grad_norm": 12.3125, "learning_rate": 4.2729461549245565e-06, "loss": 1.3573226928710938, "step": 4680 }, { "epoch": 0.8522799672340038, "grad_norm": 7.125, "learning_rate": 4.272355131892523e-06, "loss": 1.471217393875122, "step": 4682 }, { "epoch": 0.8526440338581961, "grad_norm": 11.5625, "learning_rate": 4.271763922147531e-06, "loss": 1.3622182607650757, "step": 4684 }, { "epoch": 0.8530081004823883, "grad_norm": 3.609375, "learning_rate": 4.271172525776336e-06, "loss": 1.1282398700714111, "step": 4686 }, { "epoch": 0.8533721671065805, "grad_norm": 9.25, "learning_rate": 4.270580942865725e-06, "loss": 1.0616729259490967, "step": 4688 }, { "epoch": 0.8537362337307728, "grad_norm": 9.0625, "learning_rate": 4.26998917350251e-06, "loss": 1.4764699935913086, "step": 4690 }, { "epoch": 0.854100300354965, "grad_norm": 17.625, "learning_rate": 4.269397217773531e-06, "loss": 1.7590532302856445, "step": 4692 }, { "epoch": 0.8544643669791572, "grad_norm": 44.75, "learning_rate": 4.268805075765654e-06, "loss": 2.0107500553131104, "step": 4694 }, { "epoch": 0.8548284336033494, "grad_norm": 11.75, "learning_rate": 4.268212747565774e-06, "loss": 1.7459686994552612, "step": 4696 }, { "epoch": 0.8551925002275417, "grad_norm": 9.6875, "learning_rate": 4.267620233260814e-06, "loss": 1.4347848892211914, "step": 4698 }, { "epoch": 0.8555565668517339, "grad_norm": 12.25, "learning_rate": 4.267027532937721e-06, "loss": 1.373937964439392, "step": 4700 }, { "epoch": 0.8559206334759261, "grad_norm": 8.6875, "learning_rate": 4.266434646683473e-06, "loss": 1.5379109382629395, "step": 4702 }, { "epoch": 0.8562847001001184, "grad_norm": 13.5, "learning_rate": 4.265841574585072e-06, "loss": 1.384209156036377, "step": 4704 }, { "epoch": 0.8566487667243106, "grad_norm": 8.5, "learning_rate": 4.265248316729551e-06, "loss": 1.0949008464813232, "step": 4706 }, { "epoch": 0.8570128333485028, "grad_norm": 6.96875, "learning_rate": 4.264654873203967e-06, "loss": 1.5007158517837524, "step": 4708 }, { "epoch": 0.857376899972695, "grad_norm": 16.625, "learning_rate": 4.2640612440954055e-06, "loss": 1.3567674160003662, "step": 4710 }, { "epoch": 0.8577409665968873, "grad_norm": 24.375, "learning_rate": 4.263467429490979e-06, "loss": 0.9061417579650879, "step": 4712 }, { "epoch": 0.8581050332210794, "grad_norm": 15.25, "learning_rate": 4.262873429477829e-06, "loss": 0.4820134937763214, "step": 4714 }, { "epoch": 0.8584690998452716, "grad_norm": 10.875, "learning_rate": 4.262279244143119e-06, "loss": 1.7351123094558716, "step": 4716 }, { "epoch": 0.858833166469464, "grad_norm": 15.125, "learning_rate": 4.261684873574047e-06, "loss": 1.369818091392517, "step": 4718 }, { "epoch": 0.8591972330936561, "grad_norm": 14.6875, "learning_rate": 4.261090317857831e-06, "loss": 1.9767913818359375, "step": 4720 }, { "epoch": 0.8595612997178483, "grad_norm": 11.875, "learning_rate": 4.260495577081724e-06, "loss": 1.9178898334503174, "step": 4722 }, { "epoch": 0.8599253663420406, "grad_norm": 12.5, "learning_rate": 4.259900651332998e-06, "loss": 1.4070062637329102, "step": 4724 }, { "epoch": 0.8602894329662328, "grad_norm": 21.625, "learning_rate": 4.259305540698958e-06, "loss": 1.573645830154419, "step": 4726 }, { "epoch": 0.860653499590425, "grad_norm": 22.875, "learning_rate": 4.2587102452669325e-06, "loss": 1.4348583221435547, "step": 4728 }, { "epoch": 0.8610175662146172, "grad_norm": 8.625, "learning_rate": 4.25811476512428e-06, "loss": 1.2445785999298096, "step": 4730 }, { "epoch": 0.8613816328388095, "grad_norm": 15.0, "learning_rate": 4.257519100358385e-06, "loss": 1.4276273250579834, "step": 4732 }, { "epoch": 0.8617456994630017, "grad_norm": 12.5, "learning_rate": 4.2569232510566585e-06, "loss": 1.5543544292449951, "step": 4734 }, { "epoch": 0.8621097660871939, "grad_norm": 10.5625, "learning_rate": 4.256327217306537e-06, "loss": 1.5052167177200317, "step": 4736 }, { "epoch": 0.8624738327113862, "grad_norm": 7.03125, "learning_rate": 4.25573099919549e-06, "loss": 1.2933942079544067, "step": 4738 }, { "epoch": 0.8628378993355784, "grad_norm": 16.375, "learning_rate": 4.255134596811007e-06, "loss": 1.1105557680130005, "step": 4740 }, { "epoch": 0.8632019659597706, "grad_norm": 12.5625, "learning_rate": 4.254538010240608e-06, "loss": 0.692167341709137, "step": 4742 }, { "epoch": 0.8635660325839629, "grad_norm": 12.875, "learning_rate": 4.253941239571841e-06, "loss": 1.613133192062378, "step": 4744 }, { "epoch": 0.8639300992081551, "grad_norm": 11.0, "learning_rate": 4.253344284892279e-06, "loss": 1.5525827407836914, "step": 4746 }, { "epoch": 0.8642941658323473, "grad_norm": 9.625, "learning_rate": 4.252747146289521e-06, "loss": 1.4382461309432983, "step": 4748 }, { "epoch": 0.8646582324565395, "grad_norm": 12.25, "learning_rate": 4.252149823851198e-06, "loss": 1.298912763595581, "step": 4750 }, { "epoch": 0.8650222990807318, "grad_norm": 11.75, "learning_rate": 4.251552317664962e-06, "loss": 1.6574656963348389, "step": 4752 }, { "epoch": 0.865386365704924, "grad_norm": 9.0625, "learning_rate": 4.250954627818495e-06, "loss": 1.4766242504119873, "step": 4754 }, { "epoch": 0.8657504323291162, "grad_norm": 16.5, "learning_rate": 4.250356754399507e-06, "loss": 1.4572079181671143, "step": 4756 }, { "epoch": 0.8661144989533085, "grad_norm": 6.59375, "learning_rate": 4.249758697495733e-06, "loss": 1.1828886270523071, "step": 4758 }, { "epoch": 0.8664785655775007, "grad_norm": 4.1875, "learning_rate": 4.249160457194933e-06, "loss": 1.2524945735931396, "step": 4760 }, { "epoch": 0.8668426322016929, "grad_norm": 13.25, "learning_rate": 4.2485620335849e-06, "loss": 1.5294002294540405, "step": 4762 }, { "epoch": 0.8672066988258852, "grad_norm": 7.65625, "learning_rate": 4.2479634267534484e-06, "loss": 1.5452783107757568, "step": 4764 }, { "epoch": 0.8675707654500774, "grad_norm": 6.15625, "learning_rate": 4.247364636788421e-06, "loss": 1.3477267026901245, "step": 4766 }, { "epoch": 0.8679348320742696, "grad_norm": 17.375, "learning_rate": 4.246765663777689e-06, "loss": 1.3008713722229004, "step": 4768 }, { "epoch": 0.8682988986984618, "grad_norm": 21.0, "learning_rate": 4.2461665078091475e-06, "loss": 1.7729074954986572, "step": 4770 }, { "epoch": 0.8686629653226541, "grad_norm": 7.09375, "learning_rate": 4.245567168970721e-06, "loss": 1.221357822418213, "step": 4772 }, { "epoch": 0.8690270319468463, "grad_norm": 7.75, "learning_rate": 4.244967647350361e-06, "loss": 1.4549627304077148, "step": 4774 }, { "epoch": 0.8693910985710385, "grad_norm": 9.625, "learning_rate": 4.244367943036045e-06, "loss": 1.2176653146743774, "step": 4776 }, { "epoch": 0.8697551651952308, "grad_norm": 17.625, "learning_rate": 4.243768056115774e-06, "loss": 1.3642253875732422, "step": 4778 }, { "epoch": 0.870119231819423, "grad_norm": 24.125, "learning_rate": 4.243167986677584e-06, "loss": 1.2372949123382568, "step": 4780 }, { "epoch": 0.8704832984436152, "grad_norm": 9.75, "learning_rate": 4.242567734809529e-06, "loss": 1.4391237497329712, "step": 4782 }, { "epoch": 0.8708473650678074, "grad_norm": 15.1875, "learning_rate": 4.241967300599696e-06, "loss": 1.5092319250106812, "step": 4784 }, { "epoch": 0.8712114316919997, "grad_norm": 11.9375, "learning_rate": 4.241366684136192e-06, "loss": 1.4717799425125122, "step": 4786 }, { "epoch": 0.8715754983161919, "grad_norm": 8.4375, "learning_rate": 4.24076588550716e-06, "loss": 1.384933590888977, "step": 4788 }, { "epoch": 0.871939564940384, "grad_norm": 8.125, "learning_rate": 4.240164904800761e-06, "loss": 1.3773796558380127, "step": 4790 }, { "epoch": 0.8723036315645764, "grad_norm": 30.5, "learning_rate": 4.23956374210519e-06, "loss": 1.5136041641235352, "step": 4792 }, { "epoch": 0.8726676981887685, "grad_norm": 11.8125, "learning_rate": 4.238962397508662e-06, "loss": 1.400811791419983, "step": 4794 }, { "epoch": 0.8730317648129607, "grad_norm": 16.375, "learning_rate": 4.238360871099424e-06, "loss": 2.15653657913208, "step": 4796 }, { "epoch": 0.873395831437153, "grad_norm": 31.5, "learning_rate": 4.2377591629657465e-06, "loss": 1.1911017894744873, "step": 4798 }, { "epoch": 0.8737598980613452, "grad_norm": 18.0, "learning_rate": 4.237157273195927e-06, "loss": 1.8955740928649902, "step": 4800 }, { "epoch": 0.8741239646855374, "grad_norm": 10.6875, "learning_rate": 4.2365552018782925e-06, "loss": 1.627846360206604, "step": 4802 }, { "epoch": 0.8744880313097296, "grad_norm": 8.125, "learning_rate": 4.235952949101195e-06, "loss": 1.3204150199890137, "step": 4804 }, { "epoch": 0.8748520979339219, "grad_norm": 7.1875, "learning_rate": 4.2353505149530095e-06, "loss": 1.3699387311935425, "step": 4806 }, { "epoch": 0.8752161645581141, "grad_norm": 13.3125, "learning_rate": 4.234747899522142e-06, "loss": 1.353880763053894, "step": 4808 }, { "epoch": 0.8755802311823063, "grad_norm": 22.875, "learning_rate": 4.234145102897025e-06, "loss": 0.9445521235466003, "step": 4810 }, { "epoch": 0.8759442978064986, "grad_norm": 5.625, "learning_rate": 4.2335421251661155e-06, "loss": 1.4221251010894775, "step": 4812 }, { "epoch": 0.8763083644306908, "grad_norm": 10.9375, "learning_rate": 4.232938966417898e-06, "loss": 1.3604538440704346, "step": 4814 }, { "epoch": 0.876672431054883, "grad_norm": 16.0, "learning_rate": 4.232335626740883e-06, "loss": 1.7300820350646973, "step": 4816 }, { "epoch": 0.8770364976790753, "grad_norm": 22.125, "learning_rate": 4.231732106223611e-06, "loss": 1.5328739881515503, "step": 4818 }, { "epoch": 0.8774005643032675, "grad_norm": 17.375, "learning_rate": 4.231128404954643e-06, "loss": 1.9044041633605957, "step": 4820 }, { "epoch": 0.8777646309274597, "grad_norm": 19.875, "learning_rate": 4.230524523022571e-06, "loss": 1.772191047668457, "step": 4822 }, { "epoch": 0.8781286975516519, "grad_norm": 12.0, "learning_rate": 4.2299204605160125e-06, "loss": 1.2660330533981323, "step": 4824 }, { "epoch": 0.8784927641758442, "grad_norm": 94.5, "learning_rate": 4.2293162175236105e-06, "loss": 1.0608837604522705, "step": 4826 }, { "epoch": 0.8788568308000364, "grad_norm": 8.9375, "learning_rate": 4.228711794134035e-06, "loss": 1.3414355516433716, "step": 4828 }, { "epoch": 0.8792208974242286, "grad_norm": 162.0, "learning_rate": 4.228107190435984e-06, "loss": 1.876403570175171, "step": 4830 }, { "epoch": 0.8795849640484209, "grad_norm": 11.25, "learning_rate": 4.22750240651818e-06, "loss": 0.911278486251831, "step": 4832 }, { "epoch": 0.8799490306726131, "grad_norm": 9.5, "learning_rate": 4.226897442469372e-06, "loss": 1.4786566495895386, "step": 4834 }, { "epoch": 0.8803130972968053, "grad_norm": 4.5, "learning_rate": 4.226292298378337e-06, "loss": 0.9341336488723755, "step": 4836 }, { "epoch": 0.8806771639209976, "grad_norm": 6.71875, "learning_rate": 4.225686974333877e-06, "loss": 1.1129498481750488, "step": 4838 }, { "epoch": 0.8810412305451898, "grad_norm": 8.5625, "learning_rate": 4.22508147042482e-06, "loss": 1.2893462181091309, "step": 4840 }, { "epoch": 0.881405297169382, "grad_norm": 18.375, "learning_rate": 4.224475786740022e-06, "loss": 1.4436461925506592, "step": 4842 }, { "epoch": 0.8817693637935742, "grad_norm": 10.8125, "learning_rate": 4.223869923368366e-06, "loss": 0.9314145445823669, "step": 4844 }, { "epoch": 0.8821334304177665, "grad_norm": 18.375, "learning_rate": 4.223263880398757e-06, "loss": 1.4014346599578857, "step": 4846 }, { "epoch": 0.8824974970419587, "grad_norm": 8.4375, "learning_rate": 4.222657657920131e-06, "loss": 0.8709172606468201, "step": 4848 }, { "epoch": 0.8828615636661509, "grad_norm": 6.21875, "learning_rate": 4.22205125602145e-06, "loss": 1.2982878684997559, "step": 4850 }, { "epoch": 0.8832256302903432, "grad_norm": 19.125, "learning_rate": 4.2214446747917e-06, "loss": 1.6648458242416382, "step": 4852 }, { "epoch": 0.8835896969145354, "grad_norm": 14.5, "learning_rate": 4.2208379143198926e-06, "loss": 0.7558045387268066, "step": 4854 }, { "epoch": 0.8839537635387276, "grad_norm": 22.75, "learning_rate": 4.22023097469507e-06, "loss": 1.7609848976135254, "step": 4856 }, { "epoch": 0.8843178301629198, "grad_norm": 8.1875, "learning_rate": 4.219623856006296e-06, "loss": 1.5912220478057861, "step": 4858 }, { "epoch": 0.8846818967871121, "grad_norm": 39.5, "learning_rate": 4.2190165583426645e-06, "loss": 1.973174810409546, "step": 4860 }, { "epoch": 0.8850459634113043, "grad_norm": 9.6875, "learning_rate": 4.218409081793294e-06, "loss": 1.7836987972259521, "step": 4862 }, { "epoch": 0.8854100300354965, "grad_norm": 3.3125, "learning_rate": 4.217801426447328e-06, "loss": 1.269561529159546, "step": 4864 }, { "epoch": 0.8857740966596888, "grad_norm": 3.796875, "learning_rate": 4.217193592393937e-06, "loss": 0.7881308794021606, "step": 4866 }, { "epoch": 0.886138163283881, "grad_norm": 12.125, "learning_rate": 4.21658557972232e-06, "loss": 0.9713007807731628, "step": 4868 }, { "epoch": 0.8865022299080731, "grad_norm": 9.8125, "learning_rate": 4.215977388521699e-06, "loss": 1.0350278615951538, "step": 4870 }, { "epoch": 0.8868662965322655, "grad_norm": 5.0, "learning_rate": 4.2153690188813255e-06, "loss": 1.3495904207229614, "step": 4872 }, { "epoch": 0.8872303631564576, "grad_norm": 20.125, "learning_rate": 4.214760470890473e-06, "loss": 1.4872019290924072, "step": 4874 }, { "epoch": 0.8875944297806498, "grad_norm": 15.0, "learning_rate": 4.214151744638444e-06, "loss": 1.4880549907684326, "step": 4876 }, { "epoch": 0.887958496404842, "grad_norm": 9.5625, "learning_rate": 4.213542840214569e-06, "loss": 1.6322938203811646, "step": 4878 }, { "epoch": 0.8883225630290343, "grad_norm": 5.6875, "learning_rate": 4.212933757708198e-06, "loss": 1.224144458770752, "step": 4880 }, { "epoch": 0.8886866296532265, "grad_norm": 19.875, "learning_rate": 4.212324497208714e-06, "loss": 1.2496262788772583, "step": 4882 }, { "epoch": 0.8890506962774187, "grad_norm": 8.5625, "learning_rate": 4.211715058805523e-06, "loss": 1.1319372653961182, "step": 4884 }, { "epoch": 0.889414762901611, "grad_norm": 6.3125, "learning_rate": 4.21110544258806e-06, "loss": 1.5279579162597656, "step": 4886 }, { "epoch": 0.8897788295258032, "grad_norm": 7.9375, "learning_rate": 4.210495648645778e-06, "loss": 1.1159669160842896, "step": 4888 }, { "epoch": 0.8901428961499954, "grad_norm": 20.125, "learning_rate": 4.209885677068167e-06, "loss": 1.555025339126587, "step": 4890 }, { "epoch": 0.8905069627741877, "grad_norm": 17.75, "learning_rate": 4.209275527944736e-06, "loss": 1.6326406002044678, "step": 4892 }, { "epoch": 0.8908710293983799, "grad_norm": 12.6875, "learning_rate": 4.208665201365023e-06, "loss": 1.3105897903442383, "step": 4894 }, { "epoch": 0.8912350960225721, "grad_norm": 8.75, "learning_rate": 4.208054697418589e-06, "loss": 0.8129794001579285, "step": 4896 }, { "epoch": 0.8915991626467643, "grad_norm": 40.75, "learning_rate": 4.207444016195024e-06, "loss": 0.6662251949310303, "step": 4898 }, { "epoch": 0.8919632292709566, "grad_norm": 16.0, "learning_rate": 4.206833157783944e-06, "loss": 0.923011839389801, "step": 4900 }, { "epoch": 0.8923272958951488, "grad_norm": 28.125, "learning_rate": 4.206222122274988e-06, "loss": 1.3391501903533936, "step": 4902 }, { "epoch": 0.892691362519341, "grad_norm": 18.75, "learning_rate": 4.205610909757823e-06, "loss": 1.3556047677993774, "step": 4904 }, { "epoch": 0.8930554291435333, "grad_norm": 20.75, "learning_rate": 4.204999520322145e-06, "loss": 1.1428438425064087, "step": 4906 }, { "epoch": 0.8934194957677255, "grad_norm": 12.4375, "learning_rate": 4.2043879540576695e-06, "loss": 1.2189842462539673, "step": 4908 }, { "epoch": 0.8937835623919177, "grad_norm": 13.0, "learning_rate": 4.203776211054144e-06, "loss": 1.8105757236480713, "step": 4910 }, { "epoch": 0.89414762901611, "grad_norm": 5.78125, "learning_rate": 4.203164291401336e-06, "loss": 1.2286920547485352, "step": 4912 }, { "epoch": 0.8945116956403022, "grad_norm": 7.46875, "learning_rate": 4.202552195189046e-06, "loss": 1.553588628768921, "step": 4914 }, { "epoch": 0.8948757622644944, "grad_norm": 6.3125, "learning_rate": 4.201939922507093e-06, "loss": 1.4365291595458984, "step": 4916 }, { "epoch": 0.8952398288886866, "grad_norm": 29.375, "learning_rate": 4.201327473445329e-06, "loss": 1.1697344779968262, "step": 4918 }, { "epoch": 0.8956038955128789, "grad_norm": 148.0, "learning_rate": 4.200714848093627e-06, "loss": 1.9819934368133545, "step": 4920 }, { "epoch": 0.8959679621370711, "grad_norm": 20.625, "learning_rate": 4.200102046541887e-06, "loss": 1.492671012878418, "step": 4922 }, { "epoch": 0.8963320287612633, "grad_norm": 17.25, "learning_rate": 4.199489068880034e-06, "loss": 1.1183415651321411, "step": 4924 }, { "epoch": 0.8966960953854556, "grad_norm": 10.3125, "learning_rate": 4.198875915198021e-06, "loss": 1.5800209045410156, "step": 4926 }, { "epoch": 0.8970601620096478, "grad_norm": 9.6875, "learning_rate": 4.198262585585827e-06, "loss": 1.3528177738189697, "step": 4928 }, { "epoch": 0.89742422863384, "grad_norm": 11.9375, "learning_rate": 4.1976490801334555e-06, "loss": 1.722776174545288, "step": 4930 }, { "epoch": 0.8977882952580322, "grad_norm": 11.4375, "learning_rate": 4.197035398930935e-06, "loss": 1.3847436904907227, "step": 4932 }, { "epoch": 0.8981523618822245, "grad_norm": 11.375, "learning_rate": 4.19642154206832e-06, "loss": 1.584481120109558, "step": 4934 }, { "epoch": 0.8985164285064167, "grad_norm": 4.34375, "learning_rate": 4.195807509635692e-06, "loss": 1.181349754333496, "step": 4936 }, { "epoch": 0.8988804951306089, "grad_norm": 12.875, "learning_rate": 4.195193301723158e-06, "loss": 1.2476022243499756, "step": 4938 }, { "epoch": 0.8992445617548012, "grad_norm": 13.8125, "learning_rate": 4.194578918420852e-06, "loss": 1.6769561767578125, "step": 4940 }, { "epoch": 0.8996086283789934, "grad_norm": 10.9375, "learning_rate": 4.193964359818931e-06, "loss": 1.8135871887207031, "step": 4942 }, { "epoch": 0.8999726950031856, "grad_norm": 8.625, "learning_rate": 4.193349626007578e-06, "loss": 1.135275959968567, "step": 4944 }, { "epoch": 0.9003367616273779, "grad_norm": 9.625, "learning_rate": 4.192734717077004e-06, "loss": 1.2510685920715332, "step": 4946 }, { "epoch": 0.90070082825157, "grad_norm": 6.71875, "learning_rate": 4.192119633117443e-06, "loss": 1.337836503982544, "step": 4948 }, { "epoch": 0.9010648948757622, "grad_norm": 11.0625, "learning_rate": 4.191504374219158e-06, "loss": 1.3526248931884766, "step": 4950 }, { "epoch": 0.9014289614999544, "grad_norm": 19.625, "learning_rate": 4.190888940472435e-06, "loss": 1.9208570718765259, "step": 4952 }, { "epoch": 0.9017930281241467, "grad_norm": 7.75, "learning_rate": 4.1902733319675855e-06, "loss": 1.0100975036621094, "step": 4954 }, { "epoch": 0.9021570947483389, "grad_norm": 9.1875, "learning_rate": 4.1896575487949485e-06, "loss": 1.0518213510513306, "step": 4956 }, { "epoch": 0.9025211613725311, "grad_norm": 10.125, "learning_rate": 4.189041591044889e-06, "loss": 1.4388116598129272, "step": 4958 }, { "epoch": 0.9028852279967234, "grad_norm": 57.0, "learning_rate": 4.1884254588077935e-06, "loss": 1.567813515663147, "step": 4960 }, { "epoch": 0.9032492946209156, "grad_norm": 39.5, "learning_rate": 4.187809152174078e-06, "loss": 1.2036789655685425, "step": 4962 }, { "epoch": 0.9036133612451078, "grad_norm": 14.125, "learning_rate": 4.187192671234182e-06, "loss": 1.4062695503234863, "step": 4964 }, { "epoch": 0.9039774278693001, "grad_norm": 6.5625, "learning_rate": 4.186576016078575e-06, "loss": 1.321616291999817, "step": 4966 }, { "epoch": 0.9043414944934923, "grad_norm": 8.8125, "learning_rate": 4.185959186797747e-06, "loss": 0.9876461625099182, "step": 4968 }, { "epoch": 0.9047055611176845, "grad_norm": 7.84375, "learning_rate": 4.185342183482213e-06, "loss": 1.205830693244934, "step": 4970 }, { "epoch": 0.9050696277418767, "grad_norm": 14.375, "learning_rate": 4.184725006222517e-06, "loss": 1.518424391746521, "step": 4972 }, { "epoch": 0.905433694366069, "grad_norm": 9.875, "learning_rate": 4.184107655109227e-06, "loss": 1.6104869842529297, "step": 4974 }, { "epoch": 0.9057977609902612, "grad_norm": 8.875, "learning_rate": 4.18349013023294e-06, "loss": 1.4636800289154053, "step": 4976 }, { "epoch": 0.9061618276144534, "grad_norm": 9.5625, "learning_rate": 4.18287243168427e-06, "loss": 1.4461798667907715, "step": 4978 }, { "epoch": 0.9065258942386457, "grad_norm": 28.875, "learning_rate": 4.182254559553867e-06, "loss": 1.3796623945236206, "step": 4980 }, { "epoch": 0.9068899608628379, "grad_norm": 13.0, "learning_rate": 4.181636513932397e-06, "loss": 1.3355103731155396, "step": 4982 }, { "epoch": 0.9072540274870301, "grad_norm": 25.375, "learning_rate": 4.181018294910557e-06, "loss": 2.0719802379608154, "step": 4984 }, { "epoch": 0.9076180941112224, "grad_norm": 17.75, "learning_rate": 4.1803999025790695e-06, "loss": 1.1612348556518555, "step": 4986 }, { "epoch": 0.9079821607354146, "grad_norm": 13.5, "learning_rate": 4.17978133702868e-06, "loss": 1.8009529113769531, "step": 4988 }, { "epoch": 0.9083462273596068, "grad_norm": 16.0, "learning_rate": 4.179162598350159e-06, "loss": 1.4866795539855957, "step": 4990 }, { "epoch": 0.908710293983799, "grad_norm": 22.0, "learning_rate": 4.178543686634307e-06, "loss": 1.992185354232788, "step": 4992 }, { "epoch": 0.9090743606079913, "grad_norm": 31.75, "learning_rate": 4.177924601971944e-06, "loss": 1.487194299697876, "step": 4994 }, { "epoch": 0.9094384272321835, "grad_norm": 9.8125, "learning_rate": 4.177305344453921e-06, "loss": 1.4130845069885254, "step": 4996 }, { "epoch": 0.9098024938563757, "grad_norm": 13.125, "learning_rate": 4.176685914171109e-06, "loss": 0.9557865858078003, "step": 4998 }, { "epoch": 0.910166560480568, "grad_norm": 10.5625, "learning_rate": 4.176066311214407e-06, "loss": 1.0403658151626587, "step": 5000 }, { "epoch": 0.9105306271047602, "grad_norm": 48.5, "learning_rate": 4.175446535674742e-06, "loss": 0.5787985324859619, "step": 5002 }, { "epoch": 0.9108946937289524, "grad_norm": 8.625, "learning_rate": 4.174826587643061e-06, "loss": 1.4757091999053955, "step": 5004 }, { "epoch": 0.9112587603531446, "grad_norm": 17.0, "learning_rate": 4.174206467210337e-06, "loss": 1.932018756866455, "step": 5006 }, { "epoch": 0.9116228269773369, "grad_norm": 6.46875, "learning_rate": 4.173586174467575e-06, "loss": 1.2241672277450562, "step": 5008 }, { "epoch": 0.9119868936015291, "grad_norm": 9.0625, "learning_rate": 4.172965709505797e-06, "loss": 1.5072886943817139, "step": 5010 }, { "epoch": 0.9123509602257213, "grad_norm": 6.3125, "learning_rate": 4.1723450724160565e-06, "loss": 1.0905475616455078, "step": 5012 }, { "epoch": 0.9127150268499136, "grad_norm": 15.125, "learning_rate": 4.171724263289426e-06, "loss": 1.4574275016784668, "step": 5014 }, { "epoch": 0.9130790934741058, "grad_norm": 6.5, "learning_rate": 4.171103282217009e-06, "loss": 1.2100286483764648, "step": 5016 }, { "epoch": 0.913443160098298, "grad_norm": 5.84375, "learning_rate": 4.170482129289931e-06, "loss": 1.3488829135894775, "step": 5018 }, { "epoch": 0.9138072267224903, "grad_norm": 10.0625, "learning_rate": 4.169860804599344e-06, "loss": 1.435385823249817, "step": 5020 }, { "epoch": 0.9141712933466825, "grad_norm": 11.625, "learning_rate": 4.169239308236424e-06, "loss": 1.5078396797180176, "step": 5022 }, { "epoch": 0.9145353599708747, "grad_norm": 12.6875, "learning_rate": 4.168617640292376e-06, "loss": 1.3219835758209229, "step": 5024 }, { "epoch": 0.9148994265950668, "grad_norm": 6.4375, "learning_rate": 4.167995800858425e-06, "loss": 1.2192504405975342, "step": 5026 }, { "epoch": 0.9152634932192591, "grad_norm": 32.75, "learning_rate": 4.167373790025824e-06, "loss": 1.5529321432113647, "step": 5028 }, { "epoch": 0.9156275598434513, "grad_norm": 21.0, "learning_rate": 4.166751607885848e-06, "loss": 1.614398717880249, "step": 5030 }, { "epoch": 0.9159916264676435, "grad_norm": 14.125, "learning_rate": 4.166129254529804e-06, "loss": 1.584756851196289, "step": 5032 }, { "epoch": 0.9163556930918358, "grad_norm": 21.875, "learning_rate": 4.165506730049017e-06, "loss": 1.5878582000732422, "step": 5034 }, { "epoch": 0.916719759716028, "grad_norm": 29.625, "learning_rate": 4.164884034534842e-06, "loss": 1.42337965965271, "step": 5036 }, { "epoch": 0.9170838263402202, "grad_norm": 14.8125, "learning_rate": 4.1642611680786545e-06, "loss": 1.5671693086624146, "step": 5038 }, { "epoch": 0.9174478929644125, "grad_norm": 7.1875, "learning_rate": 4.16363813077186e-06, "loss": 1.4436068534851074, "step": 5040 }, { "epoch": 0.9178119595886047, "grad_norm": 5.84375, "learning_rate": 4.1630149227058846e-06, "loss": 1.289434552192688, "step": 5042 }, { "epoch": 0.9181760262127969, "grad_norm": 8.125, "learning_rate": 4.1623915439721826e-06, "loss": 0.9608882069587708, "step": 5044 }, { "epoch": 0.9185400928369891, "grad_norm": 19.625, "learning_rate": 4.161767994662233e-06, "loss": 1.9189702272415161, "step": 5046 }, { "epoch": 0.9189041594611814, "grad_norm": 7.84375, "learning_rate": 4.161144274867538e-06, "loss": 1.2211374044418335, "step": 5048 }, { "epoch": 0.9192682260853736, "grad_norm": 10.0, "learning_rate": 4.160520384679626e-06, "loss": 0.8679113984107971, "step": 5050 }, { "epoch": 0.9196322927095658, "grad_norm": 9.5, "learning_rate": 4.15989632419005e-06, "loss": 1.583971381187439, "step": 5052 }, { "epoch": 0.9199963593337581, "grad_norm": 6.125, "learning_rate": 4.159272093490391e-06, "loss": 0.958564043045044, "step": 5054 }, { "epoch": 0.9203604259579503, "grad_norm": 19.875, "learning_rate": 4.1586476926722495e-06, "loss": 1.3093253374099731, "step": 5056 }, { "epoch": 0.9207244925821425, "grad_norm": 6.78125, "learning_rate": 4.158023121827255e-06, "loss": 1.24656081199646, "step": 5058 }, { "epoch": 0.9210885592063348, "grad_norm": 8.25, "learning_rate": 4.15739838104706e-06, "loss": 1.2656464576721191, "step": 5060 }, { "epoch": 0.921452625830527, "grad_norm": 21.5, "learning_rate": 4.156773470423343e-06, "loss": 1.220057725906372, "step": 5062 }, { "epoch": 0.9218166924547192, "grad_norm": 4.875, "learning_rate": 4.1561483900478085e-06, "loss": 1.1945585012435913, "step": 5064 }, { "epoch": 0.9221807590789114, "grad_norm": 10.8125, "learning_rate": 4.155523140012182e-06, "loss": 1.4139323234558105, "step": 5066 }, { "epoch": 0.9225448257031037, "grad_norm": 20.625, "learning_rate": 4.154897720408217e-06, "loss": 1.480215311050415, "step": 5068 }, { "epoch": 0.9229088923272959, "grad_norm": 26.5, "learning_rate": 4.154272131327693e-06, "loss": 1.612576961517334, "step": 5070 }, { "epoch": 0.9232729589514881, "grad_norm": 12.875, "learning_rate": 4.153646372862411e-06, "loss": 0.518166184425354, "step": 5072 }, { "epoch": 0.9236370255756804, "grad_norm": 32.25, "learning_rate": 4.1530204451042e-06, "loss": 1.3966941833496094, "step": 5074 }, { "epoch": 0.9240010921998726, "grad_norm": 6.34375, "learning_rate": 4.152394348144912e-06, "loss": 1.0344822406768799, "step": 5076 }, { "epoch": 0.9243651588240648, "grad_norm": 5.03125, "learning_rate": 4.151768082076422e-06, "loss": 0.9916671514511108, "step": 5078 }, { "epoch": 0.9247292254482571, "grad_norm": 10.375, "learning_rate": 4.151141646990633e-06, "loss": 1.3234797716140747, "step": 5080 }, { "epoch": 0.9250932920724493, "grad_norm": 12.75, "learning_rate": 4.150515042979474e-06, "loss": 1.350399136543274, "step": 5082 }, { "epoch": 0.9254573586966415, "grad_norm": 5.65625, "learning_rate": 4.149888270134895e-06, "loss": 1.0546815395355225, "step": 5084 }, { "epoch": 0.9258214253208337, "grad_norm": 20.125, "learning_rate": 4.149261328548873e-06, "loss": 1.148550271987915, "step": 5086 }, { "epoch": 0.926185491945026, "grad_norm": 23.75, "learning_rate": 4.148634218313406e-06, "loss": 1.4253292083740234, "step": 5088 }, { "epoch": 0.9265495585692182, "grad_norm": 21.75, "learning_rate": 4.148006939520524e-06, "loss": 1.2504509687423706, "step": 5090 }, { "epoch": 0.9269136251934104, "grad_norm": 17.25, "learning_rate": 4.147379492262278e-06, "loss": 1.775317907333374, "step": 5092 }, { "epoch": 0.9272776918176027, "grad_norm": 10.375, "learning_rate": 4.146751876630739e-06, "loss": 1.9931669235229492, "step": 5094 }, { "epoch": 0.9276417584417949, "grad_norm": 5.59375, "learning_rate": 4.14612409271801e-06, "loss": 1.3912854194641113, "step": 5096 }, { "epoch": 0.9280058250659871, "grad_norm": 6.78125, "learning_rate": 4.145496140616217e-06, "loss": 1.1869392395019531, "step": 5098 }, { "epoch": 0.9283698916901792, "grad_norm": 5.5, "learning_rate": 4.1448680204175054e-06, "loss": 1.099517583847046, "step": 5100 }, { "epoch": 0.9287339583143716, "grad_norm": 10.0, "learning_rate": 4.144239732214052e-06, "loss": 1.2340768575668335, "step": 5102 }, { "epoch": 0.9290980249385637, "grad_norm": 16.625, "learning_rate": 4.143611276098055e-06, "loss": 1.3761390447616577, "step": 5104 }, { "epoch": 0.9294620915627559, "grad_norm": 15.1875, "learning_rate": 4.1429826521617385e-06, "loss": 1.8559261560440063, "step": 5106 }, { "epoch": 0.9298261581869482, "grad_norm": 5.65625, "learning_rate": 4.14235386049735e-06, "loss": 1.3722867965698242, "step": 5108 }, { "epoch": 0.9301902248111404, "grad_norm": 12.75, "learning_rate": 4.141724901197161e-06, "loss": 1.351995825767517, "step": 5110 }, { "epoch": 0.9305542914353326, "grad_norm": 6.875, "learning_rate": 4.14109577435347e-06, "loss": 1.1992316246032715, "step": 5112 }, { "epoch": 0.9309183580595249, "grad_norm": 9.0625, "learning_rate": 4.1404664800586e-06, "loss": 0.958271861076355, "step": 5114 }, { "epoch": 0.9312824246837171, "grad_norm": 16.75, "learning_rate": 4.139837018404895e-06, "loss": 1.400178074836731, "step": 5116 }, { "epoch": 0.9316464913079093, "grad_norm": 15.9375, "learning_rate": 4.139207389484727e-06, "loss": 2.0021300315856934, "step": 5118 }, { "epoch": 0.9320105579321015, "grad_norm": 6.40625, "learning_rate": 4.1385775933904915e-06, "loss": 1.1845452785491943, "step": 5120 }, { "epoch": 0.9323746245562938, "grad_norm": 12.1875, "learning_rate": 4.1379476302146085e-06, "loss": 1.2682902812957764, "step": 5122 }, { "epoch": 0.932738691180486, "grad_norm": 8.4375, "learning_rate": 4.1373175000495215e-06, "loss": 1.6483125686645508, "step": 5124 }, { "epoch": 0.9331027578046782, "grad_norm": 13.3125, "learning_rate": 4.136687202987701e-06, "loss": 1.129159927368164, "step": 5126 }, { "epoch": 0.9334668244288705, "grad_norm": 3.546875, "learning_rate": 4.136056739121641e-06, "loss": 1.296934723854065, "step": 5128 }, { "epoch": 0.9338308910530627, "grad_norm": 6.53125, "learning_rate": 4.1354261085438575e-06, "loss": 0.9634895920753479, "step": 5130 }, { "epoch": 0.9341949576772549, "grad_norm": 10.4375, "learning_rate": 4.134795311346894e-06, "loss": 1.4433799982070923, "step": 5132 }, { "epoch": 0.9345590243014472, "grad_norm": 11.875, "learning_rate": 4.1341643476233185e-06, "loss": 1.0714704990386963, "step": 5134 }, { "epoch": 0.9349230909256394, "grad_norm": 11.8125, "learning_rate": 4.133533217465721e-06, "loss": 1.5339690446853638, "step": 5136 }, { "epoch": 0.9352871575498316, "grad_norm": 27.375, "learning_rate": 4.132901920966716e-06, "loss": 1.901155948638916, "step": 5138 }, { "epoch": 0.9356512241740238, "grad_norm": 18.0, "learning_rate": 4.132270458218947e-06, "loss": 1.8158046007156372, "step": 5140 }, { "epoch": 0.9360152907982161, "grad_norm": 7.75, "learning_rate": 4.1316388293150765e-06, "loss": 1.4996728897094727, "step": 5142 }, { "epoch": 0.9363793574224083, "grad_norm": 9.5, "learning_rate": 4.131007034347795e-06, "loss": 1.6671572923660278, "step": 5144 }, { "epoch": 0.9367434240466005, "grad_norm": 18.75, "learning_rate": 4.130375073409814e-06, "loss": 1.4469289779663086, "step": 5146 }, { "epoch": 0.9371074906707928, "grad_norm": 6.40625, "learning_rate": 4.129742946593872e-06, "loss": 0.9581305384635925, "step": 5148 }, { "epoch": 0.937471557294985, "grad_norm": 3.15625, "learning_rate": 4.12911065399273e-06, "loss": 0.9949471354484558, "step": 5150 }, { "epoch": 0.9378356239191772, "grad_norm": 6.0625, "learning_rate": 4.128478195699176e-06, "loss": 1.3300024271011353, "step": 5152 }, { "epoch": 0.9381996905433695, "grad_norm": 12.4375, "learning_rate": 4.12784557180602e-06, "loss": 1.3249174356460571, "step": 5154 }, { "epoch": 0.9385637571675617, "grad_norm": 10.25, "learning_rate": 4.127212782406098e-06, "loss": 1.8071355819702148, "step": 5156 }, { "epoch": 0.9389278237917539, "grad_norm": 7.6875, "learning_rate": 4.1265798275922685e-06, "loss": 1.3531473875045776, "step": 5158 }, { "epoch": 0.9392918904159461, "grad_norm": 13.125, "learning_rate": 4.125946707457415e-06, "loss": 1.3463969230651855, "step": 5160 }, { "epoch": 0.9396559570401384, "grad_norm": 13.8125, "learning_rate": 4.125313422094443e-06, "loss": 1.3990118503570557, "step": 5162 }, { "epoch": 0.9400200236643306, "grad_norm": 12.1875, "learning_rate": 4.124679971596289e-06, "loss": 1.2347328662872314, "step": 5164 }, { "epoch": 0.9403840902885228, "grad_norm": 11.25, "learning_rate": 4.1240463560559066e-06, "loss": 1.0382413864135742, "step": 5166 }, { "epoch": 0.9407481569127151, "grad_norm": 14.625, "learning_rate": 4.123412575566276e-06, "loss": 1.5316096544265747, "step": 5168 }, { "epoch": 0.9411122235369073, "grad_norm": 19.625, "learning_rate": 4.122778630220403e-06, "loss": 1.7390072345733643, "step": 5170 }, { "epoch": 0.9414762901610995, "grad_norm": 24.625, "learning_rate": 4.122144520111317e-06, "loss": 1.4744058847427368, "step": 5172 }, { "epoch": 0.9418403567852917, "grad_norm": 9.5, "learning_rate": 4.12151024533207e-06, "loss": 1.5495997667312622, "step": 5174 }, { "epoch": 0.942204423409484, "grad_norm": 6.65625, "learning_rate": 4.1208758059757405e-06, "loss": 1.188522219657898, "step": 5176 }, { "epoch": 0.9425684900336762, "grad_norm": 7.4375, "learning_rate": 4.120241202135428e-06, "loss": 1.4370396137237549, "step": 5178 }, { "epoch": 0.9429325566578683, "grad_norm": 20.625, "learning_rate": 4.119606433904259e-06, "loss": 1.4181649684906006, "step": 5180 }, { "epoch": 0.9432966232820607, "grad_norm": 9.0, "learning_rate": 4.118971501375383e-06, "loss": 1.4289231300354004, "step": 5182 }, { "epoch": 0.9436606899062528, "grad_norm": 13.4375, "learning_rate": 4.1183364046419726e-06, "loss": 1.099740743637085, "step": 5184 }, { "epoch": 0.944024756530445, "grad_norm": 14.875, "learning_rate": 4.117701143797229e-06, "loss": 0.9242735505104065, "step": 5186 }, { "epoch": 0.9443888231546373, "grad_norm": 13.5625, "learning_rate": 4.1170657189343725e-06, "loss": 1.3909621238708496, "step": 5188 }, { "epoch": 0.9447528897788295, "grad_norm": 6.71875, "learning_rate": 4.116430130146648e-06, "loss": 1.3420443534851074, "step": 5190 }, { "epoch": 0.9451169564030217, "grad_norm": 5.1875, "learning_rate": 4.115794377527327e-06, "loss": 1.2318744659423828, "step": 5192 }, { "epoch": 0.9454810230272139, "grad_norm": 9.125, "learning_rate": 4.115158461169703e-06, "loss": 1.52884840965271, "step": 5194 }, { "epoch": 0.9458450896514062, "grad_norm": 11.75, "learning_rate": 4.114522381167093e-06, "loss": 1.4363899230957031, "step": 5196 }, { "epoch": 0.9462091562755984, "grad_norm": 13.375, "learning_rate": 4.11388613761284e-06, "loss": 1.6438429355621338, "step": 5198 }, { "epoch": 0.9465732228997906, "grad_norm": 12.5, "learning_rate": 4.113249730600311e-06, "loss": 1.9754016399383545, "step": 5200 }, { "epoch": 0.9469372895239829, "grad_norm": 10.625, "learning_rate": 4.112613160222897e-06, "loss": 1.2382174730300903, "step": 5202 }, { "epoch": 0.9473013561481751, "grad_norm": 16.875, "learning_rate": 4.11197642657401e-06, "loss": 1.3318297863006592, "step": 5204 }, { "epoch": 0.9476654227723673, "grad_norm": 11.6875, "learning_rate": 4.1113395297470895e-06, "loss": 1.870293378829956, "step": 5206 }, { "epoch": 0.9480294893965596, "grad_norm": 26.0, "learning_rate": 4.110702469835596e-06, "loss": 1.3904330730438232, "step": 5208 }, { "epoch": 0.9483935560207518, "grad_norm": 3.125, "learning_rate": 4.110065246933016e-06, "loss": 0.9703547358512878, "step": 5210 }, { "epoch": 0.948757622644944, "grad_norm": 12.125, "learning_rate": 4.109427861132861e-06, "loss": 1.6436619758605957, "step": 5212 }, { "epoch": 0.9491216892691362, "grad_norm": 8.125, "learning_rate": 4.108790312528662e-06, "loss": 1.5922930240631104, "step": 5214 }, { "epoch": 0.9494857558933285, "grad_norm": 13.5625, "learning_rate": 4.108152601213979e-06, "loss": 1.4536755084991455, "step": 5216 }, { "epoch": 0.9498498225175207, "grad_norm": 38.5, "learning_rate": 4.107514727282394e-06, "loss": 1.1911377906799316, "step": 5218 }, { "epoch": 0.9502138891417129, "grad_norm": 17.5, "learning_rate": 4.106876690827508e-06, "loss": 0.9176419973373413, "step": 5220 }, { "epoch": 0.9505779557659052, "grad_norm": 15.4375, "learning_rate": 4.106238491942956e-06, "loss": 1.0802327394485474, "step": 5222 }, { "epoch": 0.9509420223900974, "grad_norm": 5.4375, "learning_rate": 4.105600130722387e-06, "loss": 0.9345170259475708, "step": 5224 }, { "epoch": 0.9513060890142896, "grad_norm": 6.75, "learning_rate": 4.104961607259481e-06, "loss": 1.1717047691345215, "step": 5226 }, { "epoch": 0.9516701556384819, "grad_norm": 9.4375, "learning_rate": 4.1043229216479364e-06, "loss": 1.4178457260131836, "step": 5228 }, { "epoch": 0.9520342222626741, "grad_norm": 9.1875, "learning_rate": 4.103684073981478e-06, "loss": 1.4470610618591309, "step": 5230 }, { "epoch": 0.9523982888868663, "grad_norm": 5.4375, "learning_rate": 4.103045064353854e-06, "loss": 1.306809425354004, "step": 5232 }, { "epoch": 0.9527623555110585, "grad_norm": 22.25, "learning_rate": 4.1024058928588386e-06, "loss": 1.5226964950561523, "step": 5234 }, { "epoch": 0.9531264221352508, "grad_norm": 8.625, "learning_rate": 4.101766559590226e-06, "loss": 2.154179334640503, "step": 5236 }, { "epoch": 0.953490488759443, "grad_norm": 11.0, "learning_rate": 4.1011270646418345e-06, "loss": 1.469681978225708, "step": 5238 }, { "epoch": 0.9538545553836352, "grad_norm": 25.0, "learning_rate": 4.10048740810751e-06, "loss": 1.5788908004760742, "step": 5240 }, { "epoch": 0.9542186220078275, "grad_norm": 18.375, "learning_rate": 4.099847590081117e-06, "loss": 1.4505411386489868, "step": 5242 }, { "epoch": 0.9545826886320197, "grad_norm": 8.75, "learning_rate": 4.099207610656548e-06, "loss": 1.5523974895477295, "step": 5244 }, { "epoch": 0.9549467552562119, "grad_norm": 8.375, "learning_rate": 4.0985674699277176e-06, "loss": 1.5758659839630127, "step": 5246 }, { "epoch": 0.9553108218804041, "grad_norm": 10.4375, "learning_rate": 4.097927167988562e-06, "loss": 1.305679202079773, "step": 5248 }, { "epoch": 0.9556748885045964, "grad_norm": 7.4375, "learning_rate": 4.097286704933045e-06, "loss": 0.9770964980125427, "step": 5250 }, { "epoch": 0.9560389551287886, "grad_norm": 13.5, "learning_rate": 4.09664608085515e-06, "loss": 1.7222895622253418, "step": 5252 }, { "epoch": 0.9564030217529808, "grad_norm": 18.125, "learning_rate": 4.0960052958488885e-06, "loss": 1.7340359687805176, "step": 5254 }, { "epoch": 0.9567670883771731, "grad_norm": 11.5, "learning_rate": 4.095364350008289e-06, "loss": 1.6007449626922607, "step": 5256 }, { "epoch": 0.9571311550013653, "grad_norm": 19.75, "learning_rate": 4.094723243427413e-06, "loss": 1.782277226448059, "step": 5258 }, { "epoch": 0.9574952216255574, "grad_norm": 12.625, "learning_rate": 4.094081976200336e-06, "loss": 1.4803569316864014, "step": 5260 }, { "epoch": 0.9578592882497498, "grad_norm": 6.78125, "learning_rate": 4.093440548421162e-06, "loss": 1.0331615209579468, "step": 5262 }, { "epoch": 0.958223354873942, "grad_norm": 10.5625, "learning_rate": 4.092798960184021e-06, "loss": 1.596687912940979, "step": 5264 }, { "epoch": 0.9585874214981341, "grad_norm": 16.5, "learning_rate": 4.092157211583061e-06, "loss": 1.4851155281066895, "step": 5266 }, { "epoch": 0.9589514881223263, "grad_norm": 7.71875, "learning_rate": 4.091515302712456e-06, "loss": 1.4892480373382568, "step": 5268 }, { "epoch": 0.9593155547465186, "grad_norm": 3.546875, "learning_rate": 4.090873233666402e-06, "loss": 1.0636554956436157, "step": 5270 }, { "epoch": 0.9596796213707108, "grad_norm": 10.75, "learning_rate": 4.090231004539125e-06, "loss": 1.035035252571106, "step": 5272 }, { "epoch": 0.960043687994903, "grad_norm": 23.125, "learning_rate": 4.089588615424865e-06, "loss": 1.5316381454467773, "step": 5274 }, { "epoch": 0.9604077546190953, "grad_norm": 13.5625, "learning_rate": 4.0889460664178904e-06, "loss": 1.423702359199524, "step": 5276 }, { "epoch": 0.9607718212432875, "grad_norm": 35.75, "learning_rate": 4.088303357612494e-06, "loss": 1.3729106187820435, "step": 5278 }, { "epoch": 0.9611358878674797, "grad_norm": 9.125, "learning_rate": 4.0876604891029916e-06, "loss": 1.5042821168899536, "step": 5280 }, { "epoch": 0.961499954491672, "grad_norm": 12.1875, "learning_rate": 4.08701746098372e-06, "loss": 1.5164340734481812, "step": 5282 }, { "epoch": 0.9618640211158642, "grad_norm": 7.75, "learning_rate": 4.086374273349041e-06, "loss": 1.0936529636383057, "step": 5284 }, { "epoch": 0.9622280877400564, "grad_norm": 9.625, "learning_rate": 4.08573092629334e-06, "loss": 1.1869004964828491, "step": 5286 }, { "epoch": 0.9625921543642486, "grad_norm": 8.25, "learning_rate": 4.085087419911026e-06, "loss": 1.4046446084976196, "step": 5288 }, { "epoch": 0.9629562209884409, "grad_norm": 36.75, "learning_rate": 4.084443754296529e-06, "loss": 0.8610131144523621, "step": 5290 }, { "epoch": 0.9633202876126331, "grad_norm": 9.125, "learning_rate": 4.0837999295443074e-06, "loss": 1.0124895572662354, "step": 5292 }, { "epoch": 0.9636843542368253, "grad_norm": 21.0, "learning_rate": 4.083155945748839e-06, "loss": 1.593213677406311, "step": 5294 }, { "epoch": 0.9640484208610176, "grad_norm": 16.75, "learning_rate": 4.082511803004624e-06, "loss": 1.8866251707077026, "step": 5296 }, { "epoch": 0.9644124874852098, "grad_norm": 28.0, "learning_rate": 4.081867501406189e-06, "loss": 0.991114616394043, "step": 5298 }, { "epoch": 0.964776554109402, "grad_norm": 20.75, "learning_rate": 4.0812230410480836e-06, "loss": 1.0621370077133179, "step": 5300 }, { "epoch": 0.9651406207335943, "grad_norm": 10.3125, "learning_rate": 4.080578422024878e-06, "loss": 1.4864797592163086, "step": 5302 }, { "epoch": 0.9655046873577865, "grad_norm": 27.875, "learning_rate": 4.079933644431168e-06, "loss": 1.1908533573150635, "step": 5304 }, { "epoch": 0.9658687539819787, "grad_norm": 10.875, "learning_rate": 4.0792887083615714e-06, "loss": 1.3227975368499756, "step": 5306 }, { "epoch": 0.9662328206061709, "grad_norm": 31.0, "learning_rate": 4.078643613910733e-06, "loss": 1.6085257530212402, "step": 5308 }, { "epoch": 0.9665968872303632, "grad_norm": 10.625, "learning_rate": 4.077998361173314e-06, "loss": 1.7413854598999023, "step": 5310 }, { "epoch": 0.9669609538545554, "grad_norm": 7.375, "learning_rate": 4.0773529502440055e-06, "loss": 1.1168752908706665, "step": 5312 }, { "epoch": 0.9673250204787476, "grad_norm": 44.25, "learning_rate": 4.076707381217516e-06, "loss": 1.4369099140167236, "step": 5314 }, { "epoch": 0.9676890871029399, "grad_norm": 18.875, "learning_rate": 4.076061654188583e-06, "loss": 1.3208189010620117, "step": 5316 }, { "epoch": 0.9680531537271321, "grad_norm": 11.8125, "learning_rate": 4.075415769251963e-06, "loss": 1.54694664478302, "step": 5318 }, { "epoch": 0.9684172203513243, "grad_norm": 23.75, "learning_rate": 4.074769726502438e-06, "loss": 1.453613042831421, "step": 5320 }, { "epoch": 0.9687812869755165, "grad_norm": 13.375, "learning_rate": 4.07412352603481e-06, "loss": 1.1985145807266235, "step": 5322 }, { "epoch": 0.9691453535997088, "grad_norm": 16.375, "learning_rate": 4.073477167943908e-06, "loss": 1.7540076971054077, "step": 5324 }, { "epoch": 0.969509420223901, "grad_norm": 9.875, "learning_rate": 4.072830652324582e-06, "loss": 1.4244577884674072, "step": 5326 }, { "epoch": 0.9698734868480932, "grad_norm": 10.3125, "learning_rate": 4.0721839792717055e-06, "loss": 1.3076109886169434, "step": 5328 }, { "epoch": 0.9702375534722855, "grad_norm": 6.375, "learning_rate": 4.071537148880174e-06, "loss": 1.3746185302734375, "step": 5330 }, { "epoch": 0.9706016200964777, "grad_norm": 18.5, "learning_rate": 4.070890161244911e-06, "loss": 1.543274998664856, "step": 5332 }, { "epoch": 0.9709656867206699, "grad_norm": 114.0, "learning_rate": 4.070243016460855e-06, "loss": 1.909379005432129, "step": 5334 }, { "epoch": 0.9713297533448622, "grad_norm": 8.6875, "learning_rate": 4.069595714622974e-06, "loss": 1.3400285243988037, "step": 5336 }, { "epoch": 0.9716938199690544, "grad_norm": 36.25, "learning_rate": 4.068948255826257e-06, "loss": 1.5058397054672241, "step": 5338 }, { "epoch": 0.9720578865932465, "grad_norm": 24.375, "learning_rate": 4.0683006401657155e-06, "loss": 1.848771572113037, "step": 5340 }, { "epoch": 0.9724219532174387, "grad_norm": 12.4375, "learning_rate": 4.0676528677363845e-06, "loss": 1.7847723960876465, "step": 5342 }, { "epoch": 0.972786019841631, "grad_norm": 14.375, "learning_rate": 4.067004938633322e-06, "loss": 1.0416680574417114, "step": 5344 }, { "epoch": 0.9731500864658232, "grad_norm": 28.125, "learning_rate": 4.066356852951609e-06, "loss": 1.158281922340393, "step": 5346 }, { "epoch": 0.9735141530900154, "grad_norm": 9.5625, "learning_rate": 4.0657086107863485e-06, "loss": 1.5224478244781494, "step": 5348 }, { "epoch": 0.9738782197142077, "grad_norm": 19.25, "learning_rate": 4.0650602122326684e-06, "loss": 1.4053987264633179, "step": 5350 }, { "epoch": 0.9742422863383999, "grad_norm": 5.34375, "learning_rate": 4.064411657385719e-06, "loss": 1.400122046470642, "step": 5352 }, { "epoch": 0.9746063529625921, "grad_norm": 2.765625, "learning_rate": 4.063762946340673e-06, "loss": 1.2220582962036133, "step": 5354 }, { "epoch": 0.9749704195867844, "grad_norm": 5.0625, "learning_rate": 4.063114079192726e-06, "loss": 1.0634700059890747, "step": 5356 }, { "epoch": 0.9753344862109766, "grad_norm": 11.6875, "learning_rate": 4.062465056037095e-06, "loss": 1.2520416975021362, "step": 5358 }, { "epoch": 0.9756985528351688, "grad_norm": 37.5, "learning_rate": 4.061815876969023e-06, "loss": 1.2351628541946411, "step": 5360 }, { "epoch": 0.976062619459361, "grad_norm": 14.9375, "learning_rate": 4.061166542083775e-06, "loss": 1.0903244018554688, "step": 5362 }, { "epoch": 0.9764266860835533, "grad_norm": 14.8125, "learning_rate": 4.060517051476637e-06, "loss": 1.4582080841064453, "step": 5364 }, { "epoch": 0.9767907527077455, "grad_norm": 15.8125, "learning_rate": 4.05986740524292e-06, "loss": 1.5534783601760864, "step": 5366 }, { "epoch": 0.9771548193319377, "grad_norm": 11.75, "learning_rate": 4.059217603477955e-06, "loss": 1.6136937141418457, "step": 5368 }, { "epoch": 0.97751888595613, "grad_norm": 6.21875, "learning_rate": 4.058567646277101e-06, "loss": 1.4576661586761475, "step": 5370 }, { "epoch": 0.9778829525803222, "grad_norm": 12.8125, "learning_rate": 4.057917533735734e-06, "loss": 1.1450663805007935, "step": 5372 }, { "epoch": 0.9782470192045144, "grad_norm": 20.25, "learning_rate": 4.057267265949257e-06, "loss": 2.101703643798828, "step": 5374 }, { "epoch": 0.9786110858287067, "grad_norm": 16.75, "learning_rate": 4.056616843013094e-06, "loss": 1.921669602394104, "step": 5376 }, { "epoch": 0.9789751524528989, "grad_norm": 17.25, "learning_rate": 4.055966265022689e-06, "loss": 1.2655748128890991, "step": 5378 }, { "epoch": 0.9793392190770911, "grad_norm": 54.5, "learning_rate": 4.055315532073517e-06, "loss": 1.0070053339004517, "step": 5380 }, { "epoch": 0.9797032857012833, "grad_norm": 16.5, "learning_rate": 4.054664644261065e-06, "loss": 1.4612586498260498, "step": 5382 }, { "epoch": 0.9800673523254756, "grad_norm": 12.125, "learning_rate": 4.054013601680852e-06, "loss": 1.4487459659576416, "step": 5384 }, { "epoch": 0.9804314189496678, "grad_norm": 12.25, "learning_rate": 4.0533624044284145e-06, "loss": 1.3196929693222046, "step": 5386 }, { "epoch": 0.98079548557386, "grad_norm": 12.875, "learning_rate": 4.052711052599313e-06, "loss": 0.611315131187439, "step": 5388 }, { "epoch": 0.9811595521980523, "grad_norm": 10.5625, "learning_rate": 4.05205954628913e-06, "loss": 0.8820526599884033, "step": 5390 }, { "epoch": 0.9815236188222445, "grad_norm": 5.3125, "learning_rate": 4.051407885593473e-06, "loss": 1.5229648351669312, "step": 5392 }, { "epoch": 0.9818876854464367, "grad_norm": 4.34375, "learning_rate": 4.050756070607969e-06, "loss": 1.0040631294250488, "step": 5394 }, { "epoch": 0.9822517520706289, "grad_norm": 11.125, "learning_rate": 4.050104101428271e-06, "loss": 1.1378288269042969, "step": 5396 }, { "epoch": 0.9826158186948212, "grad_norm": 7.03125, "learning_rate": 4.049451978150052e-06, "loss": 1.4940667152404785, "step": 5398 }, { "epoch": 0.9829798853190134, "grad_norm": 8.4375, "learning_rate": 4.048799700869007e-06, "loss": 1.600928544998169, "step": 5400 }, { "epoch": 0.9833439519432056, "grad_norm": 5.375, "learning_rate": 4.048147269680857e-06, "loss": 1.032188057899475, "step": 5402 }, { "epoch": 0.9837080185673979, "grad_norm": 5.40625, "learning_rate": 4.047494684681343e-06, "loss": 1.52291738986969, "step": 5404 }, { "epoch": 0.9840720851915901, "grad_norm": 10.4375, "learning_rate": 4.046841945966229e-06, "loss": 1.5357755422592163, "step": 5406 }, { "epoch": 0.9844361518157823, "grad_norm": 11.0, "learning_rate": 4.046189053631302e-06, "loss": 1.7469202280044556, "step": 5408 }, { "epoch": 0.9848002184399746, "grad_norm": 10.0, "learning_rate": 4.0455360077723716e-06, "loss": 1.3371202945709229, "step": 5410 }, { "epoch": 0.9851642850641668, "grad_norm": 10.9375, "learning_rate": 4.044882808485267e-06, "loss": 1.1834675073623657, "step": 5412 }, { "epoch": 0.985528351688359, "grad_norm": 9.875, "learning_rate": 4.044229455865848e-06, "loss": 1.4310107231140137, "step": 5414 }, { "epoch": 0.9858924183125511, "grad_norm": 22.625, "learning_rate": 4.043575950009987e-06, "loss": 1.296782374382019, "step": 5416 }, { "epoch": 0.9862564849367434, "grad_norm": 13.1875, "learning_rate": 4.042922291013584e-06, "loss": 0.7016429901123047, "step": 5418 }, { "epoch": 0.9866205515609356, "grad_norm": 17.75, "learning_rate": 4.042268478972562e-06, "loss": 0.45298367738723755, "step": 5420 }, { "epoch": 0.9869846181851278, "grad_norm": 12.4375, "learning_rate": 4.041614513982864e-06, "loss": 1.768958568572998, "step": 5422 }, { "epoch": 0.9873486848093201, "grad_norm": 14.0, "learning_rate": 4.040960396140457e-06, "loss": 1.13344407081604, "step": 5424 }, { "epoch": 0.9877127514335123, "grad_norm": 9.3125, "learning_rate": 4.040306125541332e-06, "loss": 1.4268752336502075, "step": 5426 }, { "epoch": 0.9880768180577045, "grad_norm": 6.875, "learning_rate": 4.039651702281499e-06, "loss": 1.3852629661560059, "step": 5428 }, { "epoch": 0.9884408846818968, "grad_norm": 68.5, "learning_rate": 4.038997126456992e-06, "loss": 1.208700180053711, "step": 5430 }, { "epoch": 0.988804951306089, "grad_norm": 18.25, "learning_rate": 4.038342398163866e-06, "loss": 1.5933505296707153, "step": 5432 }, { "epoch": 0.9891690179302812, "grad_norm": 17.875, "learning_rate": 4.037687517498203e-06, "loss": 1.0996559858322144, "step": 5434 }, { "epoch": 0.9895330845544734, "grad_norm": 9.5, "learning_rate": 4.037032484556099e-06, "loss": 1.4849562644958496, "step": 5436 }, { "epoch": 0.9898971511786657, "grad_norm": 9.5, "learning_rate": 4.036377299433683e-06, "loss": 1.2864630222320557, "step": 5438 }, { "epoch": 0.9902612178028579, "grad_norm": 16.625, "learning_rate": 4.035721962227098e-06, "loss": 1.2677327394485474, "step": 5440 }, { "epoch": 0.9906252844270501, "grad_norm": 23.125, "learning_rate": 4.035066473032513e-06, "loss": 0.8988248109817505, "step": 5442 }, { "epoch": 0.9909893510512424, "grad_norm": 9.625, "learning_rate": 4.034410831946117e-06, "loss": 1.4658727645874023, "step": 5444 }, { "epoch": 0.9913534176754346, "grad_norm": 8.9375, "learning_rate": 4.033755039064124e-06, "loss": 1.4299671649932861, "step": 5446 }, { "epoch": 0.9917174842996268, "grad_norm": 5.96875, "learning_rate": 4.033099094482769e-06, "loss": 1.3738458156585693, "step": 5448 }, { "epoch": 0.9920815509238191, "grad_norm": 11.25, "learning_rate": 4.032442998298308e-06, "loss": 1.3911213874816895, "step": 5450 }, { "epoch": 0.9924456175480113, "grad_norm": 8.3125, "learning_rate": 4.031786750607021e-06, "loss": 1.5168060064315796, "step": 5452 }, { "epoch": 0.9928096841722035, "grad_norm": 11.9375, "learning_rate": 4.03113035150521e-06, "loss": 1.37184476852417, "step": 5454 }, { "epoch": 0.9931737507963957, "grad_norm": 12.6875, "learning_rate": 4.0304738010891984e-06, "loss": 1.438374638557434, "step": 5456 }, { "epoch": 0.993537817420588, "grad_norm": 17.0, "learning_rate": 4.029817099455333e-06, "loss": 1.4127064943313599, "step": 5458 }, { "epoch": 0.9939018840447802, "grad_norm": 44.0, "learning_rate": 4.029160246699982e-06, "loss": 2.000234603881836, "step": 5460 }, { "epoch": 0.9942659506689724, "grad_norm": 14.25, "learning_rate": 4.028503242919536e-06, "loss": 1.509704828262329, "step": 5462 }, { "epoch": 0.9946300172931647, "grad_norm": 18.25, "learning_rate": 4.0278460882104065e-06, "loss": 1.3360744714736938, "step": 5464 }, { "epoch": 0.9949940839173569, "grad_norm": 13.125, "learning_rate": 4.0271887826690285e-06, "loss": 1.2061679363250732, "step": 5466 }, { "epoch": 0.9953581505415491, "grad_norm": 3.90625, "learning_rate": 4.02653132639186e-06, "loss": 1.2287050485610962, "step": 5468 }, { "epoch": 0.9957222171657414, "grad_norm": 13.375, "learning_rate": 4.025873719475379e-06, "loss": 1.4585390090942383, "step": 5470 }, { "epoch": 0.9960862837899336, "grad_norm": 14.0, "learning_rate": 4.025215962016088e-06, "loss": 1.3630969524383545, "step": 5472 }, { "epoch": 0.9964503504141258, "grad_norm": 18.5, "learning_rate": 4.024558054110509e-06, "loss": 0.8193284273147583, "step": 5474 }, { "epoch": 0.996814417038318, "grad_norm": 25.875, "learning_rate": 4.023899995855188e-06, "loss": 1.4585412740707397, "step": 5476 }, { "epoch": 0.9971784836625103, "grad_norm": 4.25, "learning_rate": 4.023241787346692e-06, "loss": 1.2779680490493774, "step": 5478 }, { "epoch": 0.9975425502867025, "grad_norm": 15.125, "learning_rate": 4.0225834286816115e-06, "loss": 1.2345284223556519, "step": 5480 }, { "epoch": 0.9979066169108947, "grad_norm": 8.5625, "learning_rate": 4.021924919956556e-06, "loss": 1.4153228998184204, "step": 5482 }, { "epoch": 0.998270683535087, "grad_norm": 14.4375, "learning_rate": 4.02126626126816e-06, "loss": 1.317355751991272, "step": 5484 }, { "epoch": 0.9986347501592792, "grad_norm": 11.125, "learning_rate": 4.020607452713078e-06, "loss": 1.4046615362167358, "step": 5486 }, { "epoch": 0.9989988167834714, "grad_norm": 7.46875, "learning_rate": 4.0199484943879896e-06, "loss": 1.197911262512207, "step": 5488 }, { "epoch": 0.9993628834076635, "grad_norm": 15.375, "learning_rate": 4.019289386389593e-06, "loss": 1.910951018333435, "step": 5490 }, { "epoch": 0.9997269500318559, "grad_norm": 17.0, "learning_rate": 4.01863012881461e-06, "loss": 1.6107884645462036, "step": 5492 }, { "epoch": 1.0, "grad_norm": 17.5, "learning_rate": 4.017970721759784e-06, "loss": 1.5087711811065674, "step": 5494 }, { "epoch": 1.0003640666241922, "grad_norm": 3.484375, "learning_rate": 4.0173111653218795e-06, "loss": 1.4003055095672607, "step": 5496 }, { "epoch": 1.0007281332483844, "grad_norm": 32.75, "learning_rate": 4.0166514595976845e-06, "loss": 0.9484976530075073, "step": 5498 }, { "epoch": 1.0010921998725766, "grad_norm": 9.4375, "learning_rate": 4.015991604684008e-06, "loss": 1.5096923112869263, "step": 5500 }, { "epoch": 1.001456266496769, "grad_norm": 19.375, "learning_rate": 4.0153316006776795e-06, "loss": 1.0394673347473145, "step": 5502 }, { "epoch": 1.0018203331209612, "grad_norm": 132.0, "learning_rate": 4.0146714476755555e-06, "loss": 1.490920066833496, "step": 5504 }, { "epoch": 1.0021843997451534, "grad_norm": 27.125, "learning_rate": 4.0140111457745076e-06, "loss": 0.35383227467536926, "step": 5506 }, { "epoch": 1.0025484663693456, "grad_norm": 11.75, "learning_rate": 4.013350695071434e-06, "loss": 1.3834779262542725, "step": 5508 }, { "epoch": 1.0029125329935378, "grad_norm": 4.78125, "learning_rate": 4.012690095663253e-06, "loss": 1.3156682252883911, "step": 5510 }, { "epoch": 1.00327659961773, "grad_norm": 15.5625, "learning_rate": 4.012029347646903e-06, "loss": 1.3250792026519775, "step": 5512 }, { "epoch": 1.0036406662419222, "grad_norm": 20.0, "learning_rate": 4.01136845111935e-06, "loss": 1.9037601947784424, "step": 5514 }, { "epoch": 1.0040047328661146, "grad_norm": 71.5, "learning_rate": 4.010707406177573e-06, "loss": 1.2338621616363525, "step": 5516 }, { "epoch": 1.0043687994903068, "grad_norm": 19.625, "learning_rate": 4.010046212918581e-06, "loss": 1.43930983543396, "step": 5518 }, { "epoch": 1.004732866114499, "grad_norm": 6.0, "learning_rate": 4.009384871439401e-06, "loss": 1.3999929428100586, "step": 5520 }, { "epoch": 1.0050969327386912, "grad_norm": 12.5625, "learning_rate": 4.008723381837082e-06, "loss": 1.4799768924713135, "step": 5522 }, { "epoch": 1.0054609993628834, "grad_norm": 13.0, "learning_rate": 4.0080617442086945e-06, "loss": 1.5514425039291382, "step": 5524 }, { "epoch": 1.0058250659870756, "grad_norm": 9.25, "learning_rate": 4.007399958651331e-06, "loss": 1.4073677062988281, "step": 5526 }, { "epoch": 1.006189132611268, "grad_norm": 17.25, "learning_rate": 4.006738025262106e-06, "loss": 1.86566162109375, "step": 5528 }, { "epoch": 1.0065531992354602, "grad_norm": 11.3125, "learning_rate": 4.006075944138157e-06, "loss": 1.0989599227905273, "step": 5530 }, { "epoch": 1.0069172658596524, "grad_norm": 14.875, "learning_rate": 4.00541371537664e-06, "loss": 1.3692044019699097, "step": 5532 }, { "epoch": 1.0072813324838445, "grad_norm": 242.0, "learning_rate": 4.004751339074734e-06, "loss": 1.4603779315948486, "step": 5534 }, { "epoch": 1.0076453991080367, "grad_norm": 25.625, "learning_rate": 4.004088815329641e-06, "loss": 1.5434331893920898, "step": 5536 }, { "epoch": 1.008009465732229, "grad_norm": 7.4375, "learning_rate": 4.003426144238583e-06, "loss": 1.4738571643829346, "step": 5538 }, { "epoch": 1.0083735323564211, "grad_norm": 18.875, "learning_rate": 4.002763325898808e-06, "loss": 1.3381311893463135, "step": 5540 }, { "epoch": 1.0087375989806135, "grad_norm": 9.75, "learning_rate": 4.002100360407576e-06, "loss": 1.3541488647460938, "step": 5542 }, { "epoch": 1.0091016656048057, "grad_norm": 30.875, "learning_rate": 4.0014372478621775e-06, "loss": 1.6272519826889038, "step": 5544 }, { "epoch": 1.009465732228998, "grad_norm": 6.03125, "learning_rate": 4.000773988359922e-06, "loss": 1.1985750198364258, "step": 5546 }, { "epoch": 1.0098297988531901, "grad_norm": 2.203125, "learning_rate": 4.000110581998139e-06, "loss": 1.1730234622955322, "step": 5548 }, { "epoch": 1.0101938654773823, "grad_norm": 25.875, "learning_rate": 3.9994470288741805e-06, "loss": 0.9922491312026978, "step": 5550 }, { "epoch": 1.0105579321015745, "grad_norm": 18.25, "learning_rate": 3.998783329085421e-06, "loss": 1.6758556365966797, "step": 5552 }, { "epoch": 1.0109219987257667, "grad_norm": 5.59375, "learning_rate": 3.998119482729258e-06, "loss": 1.390196442604065, "step": 5554 }, { "epoch": 1.0112860653499591, "grad_norm": 18.375, "learning_rate": 3.997455489903104e-06, "loss": 1.6273266077041626, "step": 5556 }, { "epoch": 1.0116501319741513, "grad_norm": 5.25, "learning_rate": 3.9967913507044e-06, "loss": 1.3647640943527222, "step": 5558 }, { "epoch": 1.0120141985983435, "grad_norm": 9.1875, "learning_rate": 3.996127065230604e-06, "loss": 1.4864213466644287, "step": 5560 }, { "epoch": 1.0123782652225357, "grad_norm": 17.375, "learning_rate": 3.995462633579199e-06, "loss": 1.1231017112731934, "step": 5562 }, { "epoch": 1.012742331846728, "grad_norm": 4.9375, "learning_rate": 3.9947980558476865e-06, "loss": 0.7884931564331055, "step": 5564 }, { "epoch": 1.01310639847092, "grad_norm": 20.0, "learning_rate": 3.9941333321335904e-06, "loss": 1.5433924198150635, "step": 5566 }, { "epoch": 1.0134704650951125, "grad_norm": 13.5625, "learning_rate": 3.993468462534457e-06, "loss": 1.4204564094543457, "step": 5568 }, { "epoch": 1.0138345317193047, "grad_norm": 30.25, "learning_rate": 3.992803447147853e-06, "loss": 1.3641741275787354, "step": 5570 }, { "epoch": 1.014198598343497, "grad_norm": 13.5, "learning_rate": 3.992138286071366e-06, "loss": 1.415997862815857, "step": 5572 }, { "epoch": 1.014562664967689, "grad_norm": 8.0, "learning_rate": 3.991472979402608e-06, "loss": 1.17242431640625, "step": 5574 }, { "epoch": 1.0149267315918813, "grad_norm": 19.125, "learning_rate": 3.990807527239206e-06, "loss": 1.5586891174316406, "step": 5576 }, { "epoch": 1.0152907982160735, "grad_norm": 4.71875, "learning_rate": 3.9901419296788165e-06, "loss": 1.1179163455963135, "step": 5578 }, { "epoch": 1.0156548648402657, "grad_norm": 12.625, "learning_rate": 3.989476186819111e-06, "loss": 1.4072048664093018, "step": 5580 }, { "epoch": 1.016018931464458, "grad_norm": 12.75, "learning_rate": 3.988810298757785e-06, "loss": 1.5453095436096191, "step": 5582 }, { "epoch": 1.0163829980886503, "grad_norm": 4.375, "learning_rate": 3.988144265592556e-06, "loss": 1.5294172763824463, "step": 5584 }, { "epoch": 1.0167470647128425, "grad_norm": 7.375, "learning_rate": 3.987478087421159e-06, "loss": 1.491885781288147, "step": 5586 }, { "epoch": 1.0171111313370347, "grad_norm": 13.5625, "learning_rate": 3.986811764341355e-06, "loss": 1.6509900093078613, "step": 5588 }, { "epoch": 1.0174751979612269, "grad_norm": 3.21875, "learning_rate": 3.986145296450924e-06, "loss": 0.958034336566925, "step": 5590 }, { "epoch": 1.017839264585419, "grad_norm": 15.5625, "learning_rate": 3.9854786838476674e-06, "loss": 2.092007875442505, "step": 5592 }, { "epoch": 1.0182033312096113, "grad_norm": 99.0, "learning_rate": 3.984811926629408e-06, "loss": 1.3249096870422363, "step": 5594 }, { "epoch": 1.0185673978338037, "grad_norm": 14.3125, "learning_rate": 3.9841450248939894e-06, "loss": 1.4061754941940308, "step": 5596 }, { "epoch": 1.0189314644579959, "grad_norm": 11.4375, "learning_rate": 3.983477978739276e-06, "loss": 1.425875186920166, "step": 5598 }, { "epoch": 1.019295531082188, "grad_norm": 26.875, "learning_rate": 3.982810788263155e-06, "loss": 1.3803876638412476, "step": 5600 }, { "epoch": 1.0196595977063803, "grad_norm": 22.375, "learning_rate": 3.982143453563535e-06, "loss": 1.3369160890579224, "step": 5602 }, { "epoch": 1.0200236643305725, "grad_norm": 133.0, "learning_rate": 3.981475974738343e-06, "loss": 1.377591609954834, "step": 5604 }, { "epoch": 1.0203877309547646, "grad_norm": 9.875, "learning_rate": 3.980808351885528e-06, "loss": 1.2054471969604492, "step": 5606 }, { "epoch": 1.0207517975789568, "grad_norm": 44.0, "learning_rate": 3.980140585103064e-06, "loss": 1.5488436222076416, "step": 5608 }, { "epoch": 1.0211158642031493, "grad_norm": 97.0, "learning_rate": 3.97947267448894e-06, "loss": 1.4750125408172607, "step": 5610 }, { "epoch": 1.0214799308273415, "grad_norm": 19.875, "learning_rate": 3.978804620141171e-06, "loss": 1.8069579601287842, "step": 5612 }, { "epoch": 1.0218439974515336, "grad_norm": 3.671875, "learning_rate": 3.97813642215779e-06, "loss": 1.0305663347244263, "step": 5614 }, { "epoch": 1.0222080640757258, "grad_norm": 9.1875, "learning_rate": 3.9774680806368534e-06, "loss": 1.0835367441177368, "step": 5616 }, { "epoch": 1.022572130699918, "grad_norm": 8.625, "learning_rate": 3.976799595676438e-06, "loss": 1.466511607170105, "step": 5618 }, { "epoch": 1.0229361973241102, "grad_norm": 3.9375, "learning_rate": 3.97613096737464e-06, "loss": 1.1156589984893799, "step": 5620 }, { "epoch": 1.0233002639483026, "grad_norm": 10.875, "learning_rate": 3.9754621958295795e-06, "loss": 1.5707851648330688, "step": 5622 }, { "epoch": 1.0236643305724948, "grad_norm": 13.875, "learning_rate": 3.974793281139394e-06, "loss": 1.498622179031372, "step": 5624 }, { "epoch": 1.024028397196687, "grad_norm": 23.375, "learning_rate": 3.974124223402246e-06, "loss": 1.5071301460266113, "step": 5626 }, { "epoch": 1.0243924638208792, "grad_norm": 18.25, "learning_rate": 3.973455022716314e-06, "loss": 1.4509986639022827, "step": 5628 }, { "epoch": 1.0247565304450714, "grad_norm": 20.0, "learning_rate": 3.972785679179804e-06, "loss": 1.3919399976730347, "step": 5630 }, { "epoch": 1.0251205970692636, "grad_norm": 12.5, "learning_rate": 3.972116192890937e-06, "loss": 1.5151044130325317, "step": 5632 }, { "epoch": 1.0254846636934558, "grad_norm": 9.0625, "learning_rate": 3.97144656394796e-06, "loss": 1.400679111480713, "step": 5634 }, { "epoch": 1.0258487303176482, "grad_norm": 13.25, "learning_rate": 3.970776792449135e-06, "loss": 0.9440341591835022, "step": 5636 }, { "epoch": 1.0262127969418404, "grad_norm": 10.25, "learning_rate": 3.970106878492751e-06, "loss": 1.6199828386306763, "step": 5638 }, { "epoch": 1.0265768635660326, "grad_norm": 18.0, "learning_rate": 3.9694368221771125e-06, "loss": 0.8532905578613281, "step": 5640 }, { "epoch": 1.0269409301902248, "grad_norm": 3.34375, "learning_rate": 3.96876662360055e-06, "loss": 0.8992357850074768, "step": 5642 }, { "epoch": 1.027304996814417, "grad_norm": 4.5625, "learning_rate": 3.968096282861412e-06, "loss": 0.9135205745697021, "step": 5644 }, { "epoch": 1.0276690634386092, "grad_norm": 18.75, "learning_rate": 3.967425800058068e-06, "loss": 1.770528793334961, "step": 5646 }, { "epoch": 1.0280331300628014, "grad_norm": 9.0625, "learning_rate": 3.966755175288908e-06, "loss": 1.3742282390594482, "step": 5648 }, { "epoch": 1.0283971966869938, "grad_norm": 17.125, "learning_rate": 3.966084408652344e-06, "loss": 1.5820229053497314, "step": 5650 }, { "epoch": 1.028761263311186, "grad_norm": 9.8125, "learning_rate": 3.965413500246807e-06, "loss": 1.4844284057617188, "step": 5652 }, { "epoch": 1.0291253299353782, "grad_norm": 8.8125, "learning_rate": 3.964742450170753e-06, "loss": 1.593349575996399, "step": 5654 }, { "epoch": 1.0294893965595704, "grad_norm": 17.625, "learning_rate": 3.964071258522654e-06, "loss": 1.4519903659820557, "step": 5656 }, { "epoch": 1.0298534631837626, "grad_norm": 6.21875, "learning_rate": 3.9633999254010045e-06, "loss": 1.1237472295761108, "step": 5658 }, { "epoch": 1.0302175298079548, "grad_norm": 17.625, "learning_rate": 3.962728450904321e-06, "loss": 1.5210973024368286, "step": 5660 }, { "epoch": 1.030581596432147, "grad_norm": 7.875, "learning_rate": 3.9620568351311384e-06, "loss": 0.661182165145874, "step": 5662 }, { "epoch": 1.0309456630563394, "grad_norm": 7.9375, "learning_rate": 3.961385078180013e-06, "loss": 1.507887363433838, "step": 5664 }, { "epoch": 1.0313097296805316, "grad_norm": 15.125, "learning_rate": 3.960713180149526e-06, "loss": 1.4062614440917969, "step": 5666 }, { "epoch": 1.0316737963047238, "grad_norm": 8.75, "learning_rate": 3.960041141138271e-06, "loss": 1.1013820171356201, "step": 5668 }, { "epoch": 1.032037862928916, "grad_norm": 11.25, "learning_rate": 3.9593689612448706e-06, "loss": 1.5551948547363281, "step": 5670 }, { "epoch": 1.0324019295531082, "grad_norm": 18.625, "learning_rate": 3.958696640567961e-06, "loss": 1.3923044204711914, "step": 5672 }, { "epoch": 1.0327659961773004, "grad_norm": 7.4375, "learning_rate": 3.9580241792062066e-06, "loss": 1.240286946296692, "step": 5674 }, { "epoch": 1.0331300628014928, "grad_norm": 40.5, "learning_rate": 3.957351577258286e-06, "loss": 1.9395053386688232, "step": 5676 }, { "epoch": 1.033494129425685, "grad_norm": 2.40625, "learning_rate": 3.956678834822902e-06, "loss": 1.061018705368042, "step": 5678 }, { "epoch": 1.0338581960498772, "grad_norm": 14.6875, "learning_rate": 3.956005951998775e-06, "loss": 1.4600074291229248, "step": 5680 }, { "epoch": 1.0342222626740694, "grad_norm": 3.234375, "learning_rate": 3.955332928884649e-06, "loss": 1.0646876096725464, "step": 5682 }, { "epoch": 1.0345863292982616, "grad_norm": 10.4375, "learning_rate": 3.9546597655792884e-06, "loss": 1.699087381362915, "step": 5684 }, { "epoch": 1.0349503959224537, "grad_norm": 13.0625, "learning_rate": 3.953986462181475e-06, "loss": 1.546065092086792, "step": 5686 }, { "epoch": 1.035314462546646, "grad_norm": 11.1875, "learning_rate": 3.9533130187900136e-06, "loss": 0.25471073389053345, "step": 5688 }, { "epoch": 1.0356785291708384, "grad_norm": 12.875, "learning_rate": 3.952639435503732e-06, "loss": 1.4835538864135742, "step": 5690 }, { "epoch": 1.0360425957950306, "grad_norm": 4.9375, "learning_rate": 3.951965712421473e-06, "loss": 1.3616836071014404, "step": 5692 }, { "epoch": 1.0364066624192227, "grad_norm": 45.5, "learning_rate": 3.951291849642104e-06, "loss": 1.5426254272460938, "step": 5694 }, { "epoch": 1.036770729043415, "grad_norm": 41.5, "learning_rate": 3.9506178472645106e-06, "loss": 0.7989203929901123, "step": 5696 }, { "epoch": 1.0371347956676071, "grad_norm": 29.0, "learning_rate": 3.949943705387601e-06, "loss": 0.9459846019744873, "step": 5698 }, { "epoch": 1.0374988622917993, "grad_norm": 9.125, "learning_rate": 3.949269424110304e-06, "loss": 1.3828026056289673, "step": 5700 }, { "epoch": 1.0378629289159915, "grad_norm": 14.5, "learning_rate": 3.948595003531564e-06, "loss": 1.2757847309112549, "step": 5702 }, { "epoch": 1.038226995540184, "grad_norm": 7.625, "learning_rate": 3.947920443750351e-06, "loss": 1.0518667697906494, "step": 5704 }, { "epoch": 1.0385910621643761, "grad_norm": 27.0, "learning_rate": 3.947245744865657e-06, "loss": 1.598201036453247, "step": 5706 }, { "epoch": 1.0389551287885683, "grad_norm": 11.9375, "learning_rate": 3.9465709069764864e-06, "loss": 0.9058932065963745, "step": 5708 }, { "epoch": 1.0393191954127605, "grad_norm": 7.21875, "learning_rate": 3.945895930181873e-06, "loss": 1.2411967515945435, "step": 5710 }, { "epoch": 1.0396832620369527, "grad_norm": 77.0, "learning_rate": 3.945220814580865e-06, "loss": 1.7014228105545044, "step": 5712 }, { "epoch": 1.040047328661145, "grad_norm": 7.90625, "learning_rate": 3.944545560272532e-06, "loss": 1.5376970767974854, "step": 5714 }, { "epoch": 1.040411395285337, "grad_norm": 3.4375, "learning_rate": 3.943870167355968e-06, "loss": 1.106825351715088, "step": 5716 }, { "epoch": 1.0407754619095295, "grad_norm": 13.375, "learning_rate": 3.94319463593028e-06, "loss": 1.4561913013458252, "step": 5718 }, { "epoch": 1.0411395285337217, "grad_norm": 15.1875, "learning_rate": 3.942518966094603e-06, "loss": 1.4524739980697632, "step": 5720 }, { "epoch": 1.041503595157914, "grad_norm": 9.0, "learning_rate": 3.941843157948086e-06, "loss": 1.5416932106018066, "step": 5722 }, { "epoch": 1.041867661782106, "grad_norm": 5.84375, "learning_rate": 3.941167211589904e-06, "loss": 1.21150541305542, "step": 5724 }, { "epoch": 1.0422317284062983, "grad_norm": 8.125, "learning_rate": 3.940491127119247e-06, "loss": 1.4245576858520508, "step": 5726 }, { "epoch": 1.0425957950304905, "grad_norm": 13.1875, "learning_rate": 3.939814904635329e-06, "loss": 1.2398494482040405, "step": 5728 }, { "epoch": 1.042959861654683, "grad_norm": 16.875, "learning_rate": 3.939138544237382e-06, "loss": 1.990612506866455, "step": 5730 }, { "epoch": 1.043323928278875, "grad_norm": 8.4375, "learning_rate": 3.93846204602466e-06, "loss": 0.8355557918548584, "step": 5732 }, { "epoch": 1.0436879949030673, "grad_norm": 10.0, "learning_rate": 3.9377854100964364e-06, "loss": 1.5349178314208984, "step": 5734 }, { "epoch": 1.0440520615272595, "grad_norm": 6.4375, "learning_rate": 3.937108636552004e-06, "loss": 0.882619321346283, "step": 5736 }, { "epoch": 1.0444161281514517, "grad_norm": 19.875, "learning_rate": 3.936431725490676e-06, "loss": 1.2917356491088867, "step": 5738 }, { "epoch": 1.0447801947756439, "grad_norm": 11.6875, "learning_rate": 3.9357546770117885e-06, "loss": 1.7410742044448853, "step": 5740 }, { "epoch": 1.045144261399836, "grad_norm": 11.1875, "learning_rate": 3.935077491214694e-06, "loss": 1.5905922651290894, "step": 5742 }, { "epoch": 1.0455083280240285, "grad_norm": 11.6875, "learning_rate": 3.934400168198768e-06, "loss": 1.0633156299591064, "step": 5744 }, { "epoch": 1.0458723946482207, "grad_norm": 6.5625, "learning_rate": 3.933722708063402e-06, "loss": 1.1479876041412354, "step": 5746 }, { "epoch": 1.0462364612724129, "grad_norm": 52.25, "learning_rate": 3.933045110908015e-06, "loss": 2.0883524417877197, "step": 5748 }, { "epoch": 1.046600527896605, "grad_norm": 14.3125, "learning_rate": 3.932367376832038e-06, "loss": 1.606029748916626, "step": 5750 }, { "epoch": 1.0469645945207973, "grad_norm": 13.3125, "learning_rate": 3.931689505934928e-06, "loss": 0.7816003561019897, "step": 5752 }, { "epoch": 1.0473286611449895, "grad_norm": 8.1875, "learning_rate": 3.931011498316158e-06, "loss": 1.4243874549865723, "step": 5754 }, { "epoch": 1.0476927277691817, "grad_norm": 30.5, "learning_rate": 3.930333354075223e-06, "loss": 2.042520761489868, "step": 5756 }, { "epoch": 1.048056794393374, "grad_norm": 6.09375, "learning_rate": 3.92965507331164e-06, "loss": 1.138911485671997, "step": 5758 }, { "epoch": 1.0484208610175663, "grad_norm": 13.75, "learning_rate": 3.9289766561249425e-06, "loss": 0.16640490293502808, "step": 5760 }, { "epoch": 1.0487849276417585, "grad_norm": 11.3125, "learning_rate": 3.928298102614685e-06, "loss": 1.4840346574783325, "step": 5762 }, { "epoch": 1.0491489942659507, "grad_norm": 5.15625, "learning_rate": 3.9276194128804425e-06, "loss": 0.917036771774292, "step": 5764 }, { "epoch": 1.0495130608901428, "grad_norm": 7.15625, "learning_rate": 3.9269405870218115e-06, "loss": 1.4044708013534546, "step": 5766 }, { "epoch": 1.049877127514335, "grad_norm": 132.0, "learning_rate": 3.926261625138404e-06, "loss": 0.7152976393699646, "step": 5768 }, { "epoch": 1.0502411941385275, "grad_norm": 7.3125, "learning_rate": 3.92558252732986e-06, "loss": 0.7644884586334229, "step": 5770 }, { "epoch": 1.0506052607627197, "grad_norm": 3.9375, "learning_rate": 3.9249032936958285e-06, "loss": 1.206880807876587, "step": 5772 }, { "epoch": 1.0509693273869118, "grad_norm": 21.125, "learning_rate": 3.924223924335988e-06, "loss": 0.6048542857170105, "step": 5774 }, { "epoch": 1.051333394011104, "grad_norm": 28.125, "learning_rate": 3.923544419350033e-06, "loss": 1.5641148090362549, "step": 5776 }, { "epoch": 1.0516974606352962, "grad_norm": 13.8125, "learning_rate": 3.922864778837675e-06, "loss": 1.6029363870620728, "step": 5778 }, { "epoch": 1.0520615272594884, "grad_norm": 21.125, "learning_rate": 3.922185002898652e-06, "loss": 1.8753116130828857, "step": 5780 }, { "epoch": 1.0524255938836806, "grad_norm": 4.78125, "learning_rate": 3.921505091632718e-06, "loss": 1.0220108032226562, "step": 5782 }, { "epoch": 1.052789660507873, "grad_norm": 21.25, "learning_rate": 3.920825045139646e-06, "loss": 2.271599054336548, "step": 5784 }, { "epoch": 1.0531537271320652, "grad_norm": 44.0, "learning_rate": 3.920144863519228e-06, "loss": 2.0198800563812256, "step": 5786 }, { "epoch": 1.0535177937562574, "grad_norm": 27.75, "learning_rate": 3.919464546871283e-06, "loss": 1.8812106847763062, "step": 5788 }, { "epoch": 1.0538818603804496, "grad_norm": 29.0, "learning_rate": 3.918784095295642e-06, "loss": 1.1237467527389526, "step": 5790 }, { "epoch": 1.0542459270046418, "grad_norm": 10.8125, "learning_rate": 3.918103508892157e-06, "loss": 1.5218358039855957, "step": 5792 }, { "epoch": 1.054609993628834, "grad_norm": 12.5, "learning_rate": 3.917422787760704e-06, "loss": 1.8341076374053955, "step": 5794 }, { "epoch": 1.0549740602530262, "grad_norm": 4.1875, "learning_rate": 3.916741932001173e-06, "loss": 1.0572822093963623, "step": 5796 }, { "epoch": 1.0553381268772186, "grad_norm": 7.6875, "learning_rate": 3.916060941713481e-06, "loss": 1.036707878112793, "step": 5798 }, { "epoch": 1.0557021935014108, "grad_norm": 23.5, "learning_rate": 3.915379816997558e-06, "loss": 1.1157270669937134, "step": 5800 }, { "epoch": 1.056066260125603, "grad_norm": 10.625, "learning_rate": 3.914698557953355e-06, "loss": 1.7082679271697998, "step": 5802 }, { "epoch": 1.0564303267497952, "grad_norm": 10.0, "learning_rate": 3.914017164680847e-06, "loss": 1.4446851015090942, "step": 5804 }, { "epoch": 1.0567943933739874, "grad_norm": 37.5, "learning_rate": 3.913335637280024e-06, "loss": 0.7253285050392151, "step": 5806 }, { "epoch": 1.0571584599981796, "grad_norm": 11.875, "learning_rate": 3.912653975850897e-06, "loss": 1.3953938484191895, "step": 5808 }, { "epoch": 1.057522526622372, "grad_norm": 13.625, "learning_rate": 3.911972180493499e-06, "loss": 1.1639409065246582, "step": 5810 }, { "epoch": 1.0578865932465642, "grad_norm": 17.25, "learning_rate": 3.9112902513078775e-06, "loss": 2.2260875701904297, "step": 5812 }, { "epoch": 1.0582506598707564, "grad_norm": 13.625, "learning_rate": 3.910608188394106e-06, "loss": 1.5188775062561035, "step": 5814 }, { "epoch": 1.0586147264949486, "grad_norm": 19.0, "learning_rate": 3.909925991852274e-06, "loss": 0.23688673973083496, "step": 5816 }, { "epoch": 1.0589787931191408, "grad_norm": 14.125, "learning_rate": 3.909243661782488e-06, "loss": 1.7974152565002441, "step": 5818 }, { "epoch": 1.059342859743333, "grad_norm": 28.5, "learning_rate": 3.908561198284881e-06, "loss": 1.3932104110717773, "step": 5820 }, { "epoch": 1.0597069263675252, "grad_norm": 22.5, "learning_rate": 3.9078786014596e-06, "loss": 1.7903791666030884, "step": 5822 }, { "epoch": 1.0600709929917176, "grad_norm": 22.25, "learning_rate": 3.907195871406813e-06, "loss": 1.9884388446807861, "step": 5824 }, { "epoch": 1.0604350596159098, "grad_norm": 11.5625, "learning_rate": 3.90651300822671e-06, "loss": 1.666649580001831, "step": 5826 }, { "epoch": 1.060799126240102, "grad_norm": 8.8125, "learning_rate": 3.905830012019496e-06, "loss": 1.3741055727005005, "step": 5828 }, { "epoch": 1.0611631928642942, "grad_norm": 24.75, "learning_rate": 3.905146882885399e-06, "loss": 1.3806006908416748, "step": 5830 }, { "epoch": 1.0615272594884864, "grad_norm": 26.625, "learning_rate": 3.904463620924665e-06, "loss": 1.460968017578125, "step": 5832 }, { "epoch": 1.0618913261126786, "grad_norm": 33.5, "learning_rate": 3.903780226237559e-06, "loss": 1.4285956621170044, "step": 5834 }, { "epoch": 1.0622553927368708, "grad_norm": 4.34375, "learning_rate": 3.9030966989243675e-06, "loss": 1.3044016361236572, "step": 5836 }, { "epoch": 1.0626194593610632, "grad_norm": 20.25, "learning_rate": 3.9024130390853975e-06, "loss": 1.442693829536438, "step": 5838 }, { "epoch": 1.0629835259852554, "grad_norm": 13.5, "learning_rate": 3.90172924682097e-06, "loss": 1.4817867279052734, "step": 5840 }, { "epoch": 1.0633475926094476, "grad_norm": 19.125, "learning_rate": 3.901045322231429e-06, "loss": 1.949110746383667, "step": 5842 }, { "epoch": 1.0637116592336398, "grad_norm": 10.5625, "learning_rate": 3.9003612654171395e-06, "loss": 1.528233289718628, "step": 5844 }, { "epoch": 1.064075725857832, "grad_norm": 12.1875, "learning_rate": 3.899677076478483e-06, "loss": 1.6203858852386475, "step": 5846 }, { "epoch": 1.0644397924820241, "grad_norm": 7.875, "learning_rate": 3.898992755515862e-06, "loss": 1.0835732221603394, "step": 5848 }, { "epoch": 1.0648038591062163, "grad_norm": 21.0, "learning_rate": 3.898308302629697e-06, "loss": 1.5007903575897217, "step": 5850 }, { "epoch": 1.0651679257304087, "grad_norm": 14.875, "learning_rate": 3.89762371792043e-06, "loss": 1.658623218536377, "step": 5852 }, { "epoch": 1.065531992354601, "grad_norm": 13.25, "learning_rate": 3.896939001488519e-06, "loss": 1.3804954290390015, "step": 5854 }, { "epoch": 1.0658960589787931, "grad_norm": 11.125, "learning_rate": 3.896254153434444e-06, "loss": 1.5994998216629028, "step": 5856 }, { "epoch": 1.0662601256029853, "grad_norm": 6.125, "learning_rate": 3.895569173858705e-06, "loss": 1.1914063692092896, "step": 5858 }, { "epoch": 1.0666241922271775, "grad_norm": 6.1875, "learning_rate": 3.89488406286182e-06, "loss": 1.1871479749679565, "step": 5860 }, { "epoch": 1.0669882588513697, "grad_norm": 5.5625, "learning_rate": 3.894198820544325e-06, "loss": 1.380682349205017, "step": 5862 }, { "epoch": 1.0673523254755621, "grad_norm": 9.8125, "learning_rate": 3.893513447006776e-06, "loss": 0.861020565032959, "step": 5864 }, { "epoch": 1.0677163920997543, "grad_norm": 33.25, "learning_rate": 3.89282794234975e-06, "loss": 1.5483207702636719, "step": 5866 }, { "epoch": 1.0680804587239465, "grad_norm": 13.0625, "learning_rate": 3.892142306673842e-06, "loss": 1.4005470275878906, "step": 5868 }, { "epoch": 1.0684445253481387, "grad_norm": 17.75, "learning_rate": 3.891456540079667e-06, "loss": 1.4869873523712158, "step": 5870 }, { "epoch": 1.068808591972331, "grad_norm": 10.8125, "learning_rate": 3.890770642667856e-06, "loss": 1.5940215587615967, "step": 5872 }, { "epoch": 1.069172658596523, "grad_norm": 8.9375, "learning_rate": 3.890084614539063e-06, "loss": 1.8276211023330688, "step": 5874 }, { "epoch": 1.0695367252207153, "grad_norm": 12.0625, "learning_rate": 3.889398455793962e-06, "loss": 1.4968147277832031, "step": 5876 }, { "epoch": 1.0699007918449077, "grad_norm": 17.25, "learning_rate": 3.88871216653324e-06, "loss": 1.6963486671447754, "step": 5878 }, { "epoch": 1.0702648584691, "grad_norm": 9.0, "learning_rate": 3.8880257468576114e-06, "loss": 1.1339311599731445, "step": 5880 }, { "epoch": 1.070628925093292, "grad_norm": 20.375, "learning_rate": 3.887339196867801e-06, "loss": 0.6718922257423401, "step": 5882 }, { "epoch": 1.0709929917174843, "grad_norm": 6.125, "learning_rate": 3.8866525166645606e-06, "loss": 1.4228242635726929, "step": 5884 }, { "epoch": 1.0713570583416765, "grad_norm": 9.375, "learning_rate": 3.885965706348657e-06, "loss": 1.4468133449554443, "step": 5886 }, { "epoch": 1.0717211249658687, "grad_norm": 9.9375, "learning_rate": 3.885278766020876e-06, "loss": 1.1995999813079834, "step": 5888 }, { "epoch": 1.0720851915900609, "grad_norm": 9.6875, "learning_rate": 3.884591695782023e-06, "loss": 1.6023592948913574, "step": 5890 }, { "epoch": 1.0724492582142533, "grad_norm": 15.0, "learning_rate": 3.883904495732925e-06, "loss": 1.5041823387145996, "step": 5892 }, { "epoch": 1.0728133248384455, "grad_norm": 4.71875, "learning_rate": 3.883217165974423e-06, "loss": 0.845273494720459, "step": 5894 }, { "epoch": 1.0731773914626377, "grad_norm": 46.5, "learning_rate": 3.882529706607383e-06, "loss": 1.9611445665359497, "step": 5896 }, { "epoch": 1.0735414580868299, "grad_norm": 11.4375, "learning_rate": 3.8818421177326835e-06, "loss": 1.5844045877456665, "step": 5898 }, { "epoch": 1.073905524711022, "grad_norm": 8.0625, "learning_rate": 3.881154399451228e-06, "loss": 1.4252173900604248, "step": 5900 }, { "epoch": 1.0742695913352143, "grad_norm": 8.1875, "learning_rate": 3.880466551863935e-06, "loss": 1.6375072002410889, "step": 5902 }, { "epoch": 1.0746336579594065, "grad_norm": 11.875, "learning_rate": 3.879778575071744e-06, "loss": 1.4291961193084717, "step": 5904 }, { "epoch": 1.0749977245835989, "grad_norm": 12.5, "learning_rate": 3.879090469175613e-06, "loss": 1.6476150751113892, "step": 5906 }, { "epoch": 1.075361791207791, "grad_norm": 9.0625, "learning_rate": 3.878402234276517e-06, "loss": 1.1103986501693726, "step": 5908 }, { "epoch": 1.0757258578319833, "grad_norm": 22.75, "learning_rate": 3.877713870475454e-06, "loss": 1.3158318996429443, "step": 5910 }, { "epoch": 1.0760899244561755, "grad_norm": 19.875, "learning_rate": 3.877025377873437e-06, "loss": 2.143655776977539, "step": 5912 }, { "epoch": 1.0764539910803677, "grad_norm": 12.625, "learning_rate": 3.8763367565715e-06, "loss": 1.40102219581604, "step": 5914 }, { "epoch": 1.0768180577045599, "grad_norm": 6.40625, "learning_rate": 3.875648006670696e-06, "loss": 1.3365706205368042, "step": 5916 }, { "epoch": 1.0771821243287523, "grad_norm": 9.0, "learning_rate": 3.874959128272096e-06, "loss": 1.4652103185653687, "step": 5918 }, { "epoch": 1.0775461909529445, "grad_norm": 6.4375, "learning_rate": 3.874270121476789e-06, "loss": 1.1194751262664795, "step": 5920 }, { "epoch": 1.0779102575771367, "grad_norm": 7.34375, "learning_rate": 3.873580986385884e-06, "loss": 1.4235799312591553, "step": 5922 }, { "epoch": 1.0782743242013288, "grad_norm": 11.25, "learning_rate": 3.872891723100512e-06, "loss": 1.8807854652404785, "step": 5924 }, { "epoch": 1.078638390825521, "grad_norm": 15.3125, "learning_rate": 3.872202331721815e-06, "loss": 1.4013569355010986, "step": 5926 }, { "epoch": 1.0790024574497132, "grad_norm": 9.25, "learning_rate": 3.871512812350962e-06, "loss": 1.485183835029602, "step": 5928 }, { "epoch": 1.0793665240739054, "grad_norm": 16.375, "learning_rate": 3.8708231650891345e-06, "loss": 1.5554332733154297, "step": 5930 }, { "epoch": 1.0797305906980978, "grad_norm": 12.5625, "learning_rate": 3.870133390037537e-06, "loss": 1.461188554763794, "step": 5932 }, { "epoch": 1.08009465732229, "grad_norm": 19.625, "learning_rate": 3.869443487297392e-06, "loss": 1.625997543334961, "step": 5934 }, { "epoch": 1.0804587239464822, "grad_norm": 2.609375, "learning_rate": 3.868753456969937e-06, "loss": 1.266181230545044, "step": 5936 }, { "epoch": 1.0808227905706744, "grad_norm": 43.25, "learning_rate": 3.868063299156434e-06, "loss": 2.1645095348358154, "step": 5938 }, { "epoch": 1.0811868571948666, "grad_norm": 82.5, "learning_rate": 3.867373013958159e-06, "loss": 1.5211347341537476, "step": 5940 }, { "epoch": 1.0815509238190588, "grad_norm": 4.25, "learning_rate": 3.86668260147641e-06, "loss": 0.9621850252151489, "step": 5942 }, { "epoch": 1.081914990443251, "grad_norm": 15.5, "learning_rate": 3.865992061812501e-06, "loss": 1.035224199295044, "step": 5944 }, { "epoch": 1.0822790570674434, "grad_norm": 17.125, "learning_rate": 3.865301395067768e-06, "loss": 1.3911495208740234, "step": 5946 }, { "epoch": 1.0826431236916356, "grad_norm": 31.625, "learning_rate": 3.864610601343562e-06, "loss": 1.8952548503875732, "step": 5948 }, { "epoch": 1.0830071903158278, "grad_norm": 12.0, "learning_rate": 3.863919680741253e-06, "loss": 1.6181720495224, "step": 5950 }, { "epoch": 1.08337125694002, "grad_norm": 14.5, "learning_rate": 3.863228633362232e-06, "loss": 0.6717743873596191, "step": 5952 }, { "epoch": 1.0837353235642122, "grad_norm": 19.75, "learning_rate": 3.862537459307908e-06, "loss": 1.3409478664398193, "step": 5954 }, { "epoch": 1.0840993901884044, "grad_norm": 24.375, "learning_rate": 3.861846158679707e-06, "loss": 2.1179358959198, "step": 5956 }, { "epoch": 1.0844634568125966, "grad_norm": 8.0, "learning_rate": 3.861154731579075e-06, "loss": 1.4280810356140137, "step": 5958 }, { "epoch": 1.084827523436789, "grad_norm": 37.25, "learning_rate": 3.8604631781074755e-06, "loss": 1.9865317344665527, "step": 5960 }, { "epoch": 1.0851915900609812, "grad_norm": 10.4375, "learning_rate": 3.859771498366392e-06, "loss": 1.5203263759613037, "step": 5962 }, { "epoch": 1.0855556566851734, "grad_norm": 24.375, "learning_rate": 3.859079692457327e-06, "loss": 1.3228542804718018, "step": 5964 }, { "epoch": 1.0859197233093656, "grad_norm": 13.5, "learning_rate": 3.858387760481797e-06, "loss": 0.8345619440078735, "step": 5966 }, { "epoch": 1.0862837899335578, "grad_norm": 8.375, "learning_rate": 3.857695702541343e-06, "loss": 1.1953986883163452, "step": 5968 }, { "epoch": 1.08664785655775, "grad_norm": 20.625, "learning_rate": 3.85700351873752e-06, "loss": 0.8473007678985596, "step": 5970 }, { "epoch": 1.0870119231819424, "grad_norm": 7.53125, "learning_rate": 3.856311209171904e-06, "loss": 1.0867971181869507, "step": 5972 }, { "epoch": 1.0873759898061346, "grad_norm": 24.875, "learning_rate": 3.855618773946087e-06, "loss": 1.3933706283569336, "step": 5974 }, { "epoch": 1.0877400564303268, "grad_norm": 9.625, "learning_rate": 3.854926213161684e-06, "loss": 1.458542823791504, "step": 5976 }, { "epoch": 1.088104123054519, "grad_norm": 16.0, "learning_rate": 3.8542335269203235e-06, "loss": 1.3841851949691772, "step": 5978 }, { "epoch": 1.0884681896787112, "grad_norm": 46.25, "learning_rate": 3.853540715323655e-06, "loss": 1.2392947673797607, "step": 5980 }, { "epoch": 1.0888322563029034, "grad_norm": 11.8125, "learning_rate": 3.852847778473345e-06, "loss": 1.506159782409668, "step": 5982 }, { "epoch": 1.0891963229270956, "grad_norm": 9.4375, "learning_rate": 3.8521547164710805e-06, "loss": 1.456261157989502, "step": 5984 }, { "epoch": 1.089560389551288, "grad_norm": 20.75, "learning_rate": 3.8514615294185656e-06, "loss": 2.058417797088623, "step": 5986 }, { "epoch": 1.0899244561754802, "grad_norm": 20.125, "learning_rate": 3.850768217417521e-06, "loss": 1.8634366989135742, "step": 5988 }, { "epoch": 1.0902885227996724, "grad_norm": 21.25, "learning_rate": 3.850074780569688e-06, "loss": 1.9875717163085938, "step": 5990 }, { "epoch": 1.0906525894238646, "grad_norm": 27.25, "learning_rate": 3.8493812189768266e-06, "loss": 0.8094217777252197, "step": 5992 }, { "epoch": 1.0910166560480568, "grad_norm": 18.5, "learning_rate": 3.848687532740713e-06, "loss": 1.0909367799758911, "step": 5994 }, { "epoch": 1.091380722672249, "grad_norm": 25.125, "learning_rate": 3.847993721963143e-06, "loss": 1.292501449584961, "step": 5996 }, { "epoch": 1.0917447892964414, "grad_norm": 3.828125, "learning_rate": 3.847299786745931e-06, "loss": 0.9557967185974121, "step": 5998 }, { "epoch": 1.0921088559206336, "grad_norm": 16.75, "learning_rate": 3.846605727190911e-06, "loss": 1.5790600776672363, "step": 6000 }, { "epoch": 1.0924729225448258, "grad_norm": 15.0, "learning_rate": 3.845911543399931e-06, "loss": 0.7537775039672852, "step": 6002 }, { "epoch": 1.092836989169018, "grad_norm": 15.1875, "learning_rate": 3.8452172354748585e-06, "loss": 1.8361414670944214, "step": 6004 }, { "epoch": 1.0932010557932101, "grad_norm": 24.25, "learning_rate": 3.844522803517583e-06, "loss": 1.5165725946426392, "step": 6006 }, { "epoch": 1.0935651224174023, "grad_norm": 28.375, "learning_rate": 3.84382824763001e-06, "loss": 1.5054811239242554, "step": 6008 }, { "epoch": 1.0939291890415945, "grad_norm": 13.5625, "learning_rate": 3.8431335679140595e-06, "loss": 0.9894658923149109, "step": 6010 }, { "epoch": 1.0942932556657867, "grad_norm": 14.3125, "learning_rate": 3.842438764471674e-06, "loss": 1.6074790954589844, "step": 6012 }, { "epoch": 1.0946573222899791, "grad_norm": 9.5625, "learning_rate": 3.841743837404815e-06, "loss": 1.5319018363952637, "step": 6014 }, { "epoch": 1.0950213889141713, "grad_norm": 27.625, "learning_rate": 3.84104878681546e-06, "loss": 1.6745892763137817, "step": 6016 }, { "epoch": 1.0953854555383635, "grad_norm": 48.0, "learning_rate": 3.840353612805604e-06, "loss": 2.157043695449829, "step": 6018 }, { "epoch": 1.0957495221625557, "grad_norm": 8.25, "learning_rate": 3.83965831547726e-06, "loss": 1.0617197751998901, "step": 6020 }, { "epoch": 1.096113588786748, "grad_norm": 19.0, "learning_rate": 3.838962894932462e-06, "loss": 1.6075338125228882, "step": 6022 }, { "epoch": 1.09647765541094, "grad_norm": 15.5, "learning_rate": 3.838267351273258e-06, "loss": 1.4352613687515259, "step": 6024 }, { "epoch": 1.0968417220351325, "grad_norm": 4.0625, "learning_rate": 3.837571684601718e-06, "loss": 1.0723764896392822, "step": 6026 }, { "epoch": 1.0972057886593247, "grad_norm": 2.890625, "learning_rate": 3.836875895019928e-06, "loss": 1.1767336130142212, "step": 6028 }, { "epoch": 1.097569855283517, "grad_norm": 17.0, "learning_rate": 3.836179982629992e-06, "loss": 1.2921134233474731, "step": 6030 }, { "epoch": 1.097933921907709, "grad_norm": 12.3125, "learning_rate": 3.8354839475340325e-06, "loss": 1.462576985359192, "step": 6032 }, { "epoch": 1.0982979885319013, "grad_norm": 5.09375, "learning_rate": 3.8347877898341875e-06, "loss": 1.1341749429702759, "step": 6034 }, { "epoch": 1.0986620551560935, "grad_norm": 4.9375, "learning_rate": 3.834091509632619e-06, "loss": 1.294270396232605, "step": 6036 }, { "epoch": 1.0990261217802857, "grad_norm": 4.125, "learning_rate": 3.833395107031503e-06, "loss": 1.5328009128570557, "step": 6038 }, { "epoch": 1.099390188404478, "grad_norm": 4.59375, "learning_rate": 3.832698582133031e-06, "loss": 1.438232183456421, "step": 6040 }, { "epoch": 1.0997542550286703, "grad_norm": 2.84375, "learning_rate": 3.8320019350394165e-06, "loss": 1.1747462749481201, "step": 6042 }, { "epoch": 1.1001183216528625, "grad_norm": 25.875, "learning_rate": 3.831305165852891e-06, "loss": 1.3242974281311035, "step": 6044 }, { "epoch": 1.1004823882770547, "grad_norm": 21.75, "learning_rate": 3.8306082746757e-06, "loss": 0.571100115776062, "step": 6046 }, { "epoch": 1.1008464549012469, "grad_norm": 8.5, "learning_rate": 3.829911261610112e-06, "loss": 1.338200569152832, "step": 6048 }, { "epoch": 1.101210521525439, "grad_norm": 10.4375, "learning_rate": 3.829214126758409e-06, "loss": 1.8243277072906494, "step": 6050 }, { "epoch": 1.1015745881496315, "grad_norm": 15.6875, "learning_rate": 3.828516870222894e-06, "loss": 1.8486000299453735, "step": 6052 }, { "epoch": 1.1019386547738237, "grad_norm": 7.8125, "learning_rate": 3.827819492105885e-06, "loss": 1.3602410554885864, "step": 6054 }, { "epoch": 1.1023027213980159, "grad_norm": 37.25, "learning_rate": 3.827121992509721e-06, "loss": 1.759181261062622, "step": 6056 }, { "epoch": 1.102666788022208, "grad_norm": 18.375, "learning_rate": 3.826424371536756e-06, "loss": 1.0174120664596558, "step": 6058 }, { "epoch": 1.1030308546464003, "grad_norm": 12.9375, "learning_rate": 3.8257266292893655e-06, "loss": 1.3128403425216675, "step": 6060 }, { "epoch": 1.1033949212705925, "grad_norm": 9.4375, "learning_rate": 3.8250287658699366e-06, "loss": 1.3344205617904663, "step": 6062 }, { "epoch": 1.1037589878947847, "grad_norm": 39.75, "learning_rate": 3.824330781380882e-06, "loss": 0.82818603515625, "step": 6064 }, { "epoch": 1.104123054518977, "grad_norm": 12.4375, "learning_rate": 3.823632675924623e-06, "loss": 1.4115170240402222, "step": 6066 }, { "epoch": 1.1044871211431693, "grad_norm": 12.4375, "learning_rate": 3.8229344496036094e-06, "loss": 1.4783213138580322, "step": 6068 }, { "epoch": 1.1048511877673615, "grad_norm": 18.125, "learning_rate": 3.822236102520299e-06, "loss": 1.4205418825149536, "step": 6070 }, { "epoch": 1.1052152543915537, "grad_norm": 14.5625, "learning_rate": 3.821537634777173e-06, "loss": 1.6862735748291016, "step": 6072 }, { "epoch": 1.1055793210157459, "grad_norm": 27.25, "learning_rate": 3.820839046476728e-06, "loss": 2.1544601917266846, "step": 6074 }, { "epoch": 1.105943387639938, "grad_norm": 10.5, "learning_rate": 3.820140337721481e-06, "loss": 1.4112235307693481, "step": 6076 }, { "epoch": 1.1063074542641302, "grad_norm": 8.8125, "learning_rate": 3.819441508613962e-06, "loss": 1.6237759590148926, "step": 6078 }, { "epoch": 1.1066715208883227, "grad_norm": 25.125, "learning_rate": 3.818742559256723e-06, "loss": 1.4444926977157593, "step": 6080 }, { "epoch": 1.1070355875125149, "grad_norm": 8.5, "learning_rate": 3.8180434897523315e-06, "loss": 1.264920949935913, "step": 6082 }, { "epoch": 1.107399654136707, "grad_norm": 14.25, "learning_rate": 3.817344300203373e-06, "loss": 1.087512493133545, "step": 6084 }, { "epoch": 1.1077637207608992, "grad_norm": 10.8125, "learning_rate": 3.81664499071245e-06, "loss": 1.535239338874817, "step": 6086 }, { "epoch": 1.1081277873850914, "grad_norm": 2.515625, "learning_rate": 3.815945561382185e-06, "loss": 1.3006737232208252, "step": 6088 }, { "epoch": 1.1084918540092836, "grad_norm": 14.625, "learning_rate": 3.815246012315216e-06, "loss": 1.7061011791229248, "step": 6090 }, { "epoch": 1.1088559206334758, "grad_norm": 122.0, "learning_rate": 3.8145463436141973e-06, "loss": 1.8039379119873047, "step": 6092 }, { "epoch": 1.1092199872576682, "grad_norm": 6.25, "learning_rate": 3.813846555381805e-06, "loss": 1.220511794090271, "step": 6094 }, { "epoch": 1.1095840538818604, "grad_norm": 17.75, "learning_rate": 3.813146647720728e-06, "loss": 1.2950901985168457, "step": 6096 }, { "epoch": 1.1099481205060526, "grad_norm": 10.8125, "learning_rate": 3.8124466207336765e-06, "loss": 1.3862860202789307, "step": 6098 }, { "epoch": 1.1103121871302448, "grad_norm": 8.4375, "learning_rate": 3.811746474523376e-06, "loss": 1.4157158136367798, "step": 6100 }, { "epoch": 1.110676253754437, "grad_norm": 12.5, "learning_rate": 3.8110462091925694e-06, "loss": 1.5394641160964966, "step": 6102 }, { "epoch": 1.1110403203786292, "grad_norm": 16.875, "learning_rate": 3.8103458248440196e-06, "loss": 1.4767036437988281, "step": 6104 }, { "epoch": 1.1114043870028216, "grad_norm": 7.71875, "learning_rate": 3.809645321580503e-06, "loss": 1.1851692199707031, "step": 6106 }, { "epoch": 1.1117684536270138, "grad_norm": 13.0625, "learning_rate": 3.8089446995048174e-06, "loss": 1.2467987537384033, "step": 6108 }, { "epoch": 1.112132520251206, "grad_norm": 8.8125, "learning_rate": 3.808243958719775e-06, "loss": 1.4570598602294922, "step": 6110 }, { "epoch": 1.1124965868753982, "grad_norm": 4.25, "learning_rate": 3.807543099328207e-06, "loss": 1.2031669616699219, "step": 6112 }, { "epoch": 1.1128606534995904, "grad_norm": 8.4375, "learning_rate": 3.806842121432962e-06, "loss": 1.3806991577148438, "step": 6114 }, { "epoch": 1.1132247201237826, "grad_norm": 26.25, "learning_rate": 3.806141025136906e-06, "loss": 1.5639595985412598, "step": 6116 }, { "epoch": 1.1135887867479748, "grad_norm": 9.75, "learning_rate": 3.8054398105429212e-06, "loss": 1.386110782623291, "step": 6118 }, { "epoch": 1.1139528533721672, "grad_norm": 16.75, "learning_rate": 3.8047384777539088e-06, "loss": 1.7756434679031372, "step": 6120 }, { "epoch": 1.1143169199963594, "grad_norm": 16.125, "learning_rate": 3.8040370268727855e-06, "loss": 1.5200403928756714, "step": 6122 }, { "epoch": 1.1146809866205516, "grad_norm": 7.4375, "learning_rate": 3.8033354580024875e-06, "loss": 1.5931994915008545, "step": 6124 }, { "epoch": 1.1150450532447438, "grad_norm": 218.0, "learning_rate": 3.802633771245966e-06, "loss": 1.413737416267395, "step": 6126 }, { "epoch": 1.115409119868936, "grad_norm": 52.5, "learning_rate": 3.8019319667061926e-06, "loss": 1.872003436088562, "step": 6128 }, { "epoch": 1.1157731864931282, "grad_norm": 8.875, "learning_rate": 3.8012300444861514e-06, "loss": 1.4467624425888062, "step": 6130 }, { "epoch": 1.1161372531173204, "grad_norm": 11.5, "learning_rate": 3.8005280046888494e-06, "loss": 1.5086885690689087, "step": 6132 }, { "epoch": 1.1165013197415128, "grad_norm": 15.1875, "learning_rate": 3.7998258474173067e-06, "loss": 1.60709810256958, "step": 6134 }, { "epoch": 1.116865386365705, "grad_norm": 12.5, "learning_rate": 3.7991235727745622e-06, "loss": 1.5082638263702393, "step": 6136 }, { "epoch": 1.1172294529898972, "grad_norm": 9.5625, "learning_rate": 3.798421180863673e-06, "loss": 1.2955968379974365, "step": 6138 }, { "epoch": 1.1175935196140894, "grad_norm": 15.25, "learning_rate": 3.7977186717877103e-06, "loss": 1.392683744430542, "step": 6140 }, { "epoch": 1.1179575862382816, "grad_norm": 20.875, "learning_rate": 3.7970160456497652e-06, "loss": 1.4873758554458618, "step": 6142 }, { "epoch": 1.1183216528624738, "grad_norm": 8.1875, "learning_rate": 3.7963133025529454e-06, "loss": 0.9370136260986328, "step": 6144 }, { "epoch": 1.118685719486666, "grad_norm": 14.25, "learning_rate": 3.795610442600376e-06, "loss": 1.475476622581482, "step": 6146 }, { "epoch": 1.1190497861108584, "grad_norm": 16.875, "learning_rate": 3.794907465895198e-06, "loss": 2.063793420791626, "step": 6148 }, { "epoch": 1.1194138527350506, "grad_norm": 5.6875, "learning_rate": 3.7942043725405707e-06, "loss": 0.985072135925293, "step": 6150 }, { "epoch": 1.1197779193592428, "grad_norm": 8.1875, "learning_rate": 3.793501162639671e-06, "loss": 0.963082492351532, "step": 6152 }, { "epoch": 1.120141985983435, "grad_norm": 7.03125, "learning_rate": 3.792797836295691e-06, "loss": 0.8796142339706421, "step": 6154 }, { "epoch": 1.1205060526076271, "grad_norm": 5.125, "learning_rate": 3.7920943936118415e-06, "loss": 0.9691740274429321, "step": 6156 }, { "epoch": 1.1208701192318193, "grad_norm": 11.1875, "learning_rate": 3.79139083469135e-06, "loss": 1.4237829446792603, "step": 6158 }, { "epoch": 1.1212341858560118, "grad_norm": 14.5625, "learning_rate": 3.790687159637462e-06, "loss": 1.4156314134597778, "step": 6160 }, { "epoch": 1.121598252480204, "grad_norm": 9.3125, "learning_rate": 3.789983368553436e-06, "loss": 1.5332188606262207, "step": 6162 }, { "epoch": 1.1219623191043961, "grad_norm": 7.875, "learning_rate": 3.789279461542552e-06, "loss": 1.516160488128662, "step": 6164 }, { "epoch": 1.1223263857285883, "grad_norm": 38.5, "learning_rate": 3.7885754387081065e-06, "loss": 2.1352405548095703, "step": 6166 }, { "epoch": 1.1226904523527805, "grad_norm": 8.5625, "learning_rate": 3.7878713001534106e-06, "loss": 1.156064748764038, "step": 6168 }, { "epoch": 1.1230545189769727, "grad_norm": 19.375, "learning_rate": 3.7871670459817956e-06, "loss": 1.2078046798706055, "step": 6170 }, { "epoch": 1.123418585601165, "grad_norm": 82.5, "learning_rate": 3.786462676296606e-06, "loss": 1.2897133827209473, "step": 6172 }, { "epoch": 1.1237826522253573, "grad_norm": 8.0, "learning_rate": 3.7857581912012054e-06, "loss": 1.4413726329803467, "step": 6174 }, { "epoch": 1.1241467188495495, "grad_norm": 8.5, "learning_rate": 3.785053590798975e-06, "loss": 1.313905119895935, "step": 6176 }, { "epoch": 1.1245107854737417, "grad_norm": 3.84375, "learning_rate": 3.7843488751933123e-06, "loss": 0.956428587436676, "step": 6178 }, { "epoch": 1.124874852097934, "grad_norm": 10.0625, "learning_rate": 3.78364404448763e-06, "loss": 1.476861834526062, "step": 6180 }, { "epoch": 1.1252389187221261, "grad_norm": 7.5, "learning_rate": 3.7829390987853596e-06, "loss": 1.4707458019256592, "step": 6182 }, { "epoch": 1.1256029853463183, "grad_norm": 7.8125, "learning_rate": 3.78223403818995e-06, "loss": 1.4864747524261475, "step": 6184 }, { "epoch": 1.1259670519705107, "grad_norm": 9.125, "learning_rate": 3.7815288628048664e-06, "loss": 1.188392162322998, "step": 6186 }, { "epoch": 1.126331118594703, "grad_norm": 3.921875, "learning_rate": 3.7808235727335884e-06, "loss": 1.194756031036377, "step": 6188 }, { "epoch": 1.1266951852188951, "grad_norm": 15.875, "learning_rate": 3.780118168079615e-06, "loss": 1.4416488409042358, "step": 6190 }, { "epoch": 1.1270592518430873, "grad_norm": 9.25, "learning_rate": 3.7794126489464635e-06, "loss": 1.4451711177825928, "step": 6192 }, { "epoch": 1.1274233184672795, "grad_norm": 22.875, "learning_rate": 3.7787070154376624e-06, "loss": 2.0071449279785156, "step": 6194 }, { "epoch": 1.1277873850914717, "grad_norm": 12.0, "learning_rate": 3.7780012676567645e-06, "loss": 1.4033293724060059, "step": 6196 }, { "epoch": 1.1281514517156639, "grad_norm": 78.5, "learning_rate": 3.777295405707333e-06, "loss": 1.1076610088348389, "step": 6198 }, { "epoch": 1.128515518339856, "grad_norm": 10.125, "learning_rate": 3.7765894296929505e-06, "loss": 1.2459193468093872, "step": 6200 }, { "epoch": 1.1288795849640485, "grad_norm": 21.25, "learning_rate": 3.7758833397172166e-06, "loss": 1.7686487436294556, "step": 6202 }, { "epoch": 1.1292436515882407, "grad_norm": 3.65625, "learning_rate": 3.7751771358837476e-06, "loss": 0.9775285720825195, "step": 6204 }, { "epoch": 1.1296077182124329, "grad_norm": 12.25, "learning_rate": 3.7744708182961742e-06, "loss": 1.4092994928359985, "step": 6206 }, { "epoch": 1.129971784836625, "grad_norm": 13.375, "learning_rate": 3.7737643870581474e-06, "loss": 0.5858030319213867, "step": 6208 }, { "epoch": 1.1303358514608173, "grad_norm": 35.75, "learning_rate": 3.7730578422733334e-06, "loss": 1.3565285205841064, "step": 6210 }, { "epoch": 1.1306999180850095, "grad_norm": 14.125, "learning_rate": 3.772351184045413e-06, "loss": 0.991074800491333, "step": 6212 }, { "epoch": 1.1310639847092019, "grad_norm": 10.0, "learning_rate": 3.771644412478086e-06, "loss": 1.883345127105713, "step": 6214 }, { "epoch": 1.131428051333394, "grad_norm": 19.75, "learning_rate": 3.770937527675069e-06, "loss": 1.526034951210022, "step": 6216 }, { "epoch": 1.1317921179575863, "grad_norm": 11.125, "learning_rate": 3.7702305297400955e-06, "loss": 1.4189389944076538, "step": 6218 }, { "epoch": 1.1321561845817785, "grad_norm": 12.3125, "learning_rate": 3.7695234187769114e-06, "loss": 1.7195016145706177, "step": 6220 }, { "epoch": 1.1325202512059707, "grad_norm": 15.0, "learning_rate": 3.7688161948892854e-06, "loss": 1.805555820465088, "step": 6222 }, { "epoch": 1.1328843178301629, "grad_norm": 10.6875, "learning_rate": 3.7681088581809975e-06, "loss": 0.8778680562973022, "step": 6224 }, { "epoch": 1.133248384454355, "grad_norm": 3.0, "learning_rate": 3.7674014087558487e-06, "loss": 1.0607777833938599, "step": 6226 }, { "epoch": 1.1336124510785475, "grad_norm": 10.875, "learning_rate": 3.7666938467176527e-06, "loss": 1.3322945833206177, "step": 6228 }, { "epoch": 1.1339765177027397, "grad_norm": 7.0625, "learning_rate": 3.7659861721702416e-06, "loss": 1.4955732822418213, "step": 6230 }, { "epoch": 1.1343405843269319, "grad_norm": 10.0, "learning_rate": 3.7652783852174647e-06, "loss": 1.3816795349121094, "step": 6232 }, { "epoch": 1.134704650951124, "grad_norm": 13.25, "learning_rate": 3.7645704859631848e-06, "loss": 2.000274896621704, "step": 6234 }, { "epoch": 1.1350687175753162, "grad_norm": 8.4375, "learning_rate": 3.763862474511286e-06, "loss": 1.4250493049621582, "step": 6236 }, { "epoch": 1.1354327841995084, "grad_norm": 10.1875, "learning_rate": 3.763154350965664e-06, "loss": 1.4186378717422485, "step": 6238 }, { "epoch": 1.1357968508237009, "grad_norm": 20.75, "learning_rate": 3.762446115430235e-06, "loss": 0.6588457822799683, "step": 6240 }, { "epoch": 1.136160917447893, "grad_norm": 17.625, "learning_rate": 3.7617377680089274e-06, "loss": 1.4386787414550781, "step": 6242 }, { "epoch": 1.1365249840720852, "grad_norm": 6.65625, "learning_rate": 3.7610293088056894e-06, "loss": 1.1824570894241333, "step": 6244 }, { "epoch": 1.1368890506962774, "grad_norm": 11.0, "learning_rate": 3.7603207379244843e-06, "loss": 1.5339453220367432, "step": 6246 }, { "epoch": 1.1372531173204696, "grad_norm": 11.625, "learning_rate": 3.7596120554692916e-06, "loss": 1.4633429050445557, "step": 6248 }, { "epoch": 1.1376171839446618, "grad_norm": 8.75, "learning_rate": 3.7589032615441102e-06, "loss": 1.4041224718093872, "step": 6250 }, { "epoch": 1.137981250568854, "grad_norm": 29.625, "learning_rate": 3.7581943562529487e-06, "loss": 1.233878493309021, "step": 6252 }, { "epoch": 1.1383453171930462, "grad_norm": 8.75, "learning_rate": 3.757485339699839e-06, "loss": 1.4670976400375366, "step": 6254 }, { "epoch": 1.1387093838172386, "grad_norm": 5.5, "learning_rate": 3.7567762119888262e-06, "loss": 1.093485713005066, "step": 6256 }, { "epoch": 1.1390734504414308, "grad_norm": 14.625, "learning_rate": 3.7560669732239698e-06, "loss": 1.4327590465545654, "step": 6258 }, { "epoch": 1.139437517065623, "grad_norm": 9.5625, "learning_rate": 3.7553576235093503e-06, "loss": 1.494154930114746, "step": 6260 }, { "epoch": 1.1398015836898152, "grad_norm": 16.625, "learning_rate": 3.7546481629490606e-06, "loss": 1.3798234462738037, "step": 6262 }, { "epoch": 1.1401656503140074, "grad_norm": 7.875, "learning_rate": 3.7539385916472116e-06, "loss": 1.4378374814987183, "step": 6264 }, { "epoch": 1.1405297169381996, "grad_norm": 11.8125, "learning_rate": 3.753228909707929e-06, "loss": 1.4703234434127808, "step": 6266 }, { "epoch": 1.140893783562392, "grad_norm": 14.0, "learning_rate": 3.752519117235356e-06, "loss": 1.3794260025024414, "step": 6268 }, { "epoch": 1.1412578501865842, "grad_norm": 27.5, "learning_rate": 3.751809214333654e-06, "loss": 1.4715687036514282, "step": 6270 }, { "epoch": 1.1416219168107764, "grad_norm": 50.25, "learning_rate": 3.7510992011069946e-06, "loss": 1.395397424697876, "step": 6272 }, { "epoch": 1.1419859834349686, "grad_norm": 11.875, "learning_rate": 3.750389077659573e-06, "loss": 1.6279557943344116, "step": 6274 }, { "epoch": 1.1423500500591608, "grad_norm": 33.5, "learning_rate": 3.7496788440955946e-06, "loss": 1.9665371179580688, "step": 6276 }, { "epoch": 1.142714116683353, "grad_norm": 27.125, "learning_rate": 3.7489685005192834e-06, "loss": 1.6656520366668701, "step": 6278 }, { "epoch": 1.1430781833075452, "grad_norm": 11.25, "learning_rate": 3.7482580470348805e-06, "loss": 1.1945672035217285, "step": 6280 }, { "epoch": 1.1434422499317376, "grad_norm": 6.6875, "learning_rate": 3.747547483746643e-06, "loss": 1.0218722820281982, "step": 6282 }, { "epoch": 1.1438063165559298, "grad_norm": 9.1875, "learning_rate": 3.7468368107588405e-06, "loss": 1.4882116317749023, "step": 6284 }, { "epoch": 1.144170383180122, "grad_norm": 21.875, "learning_rate": 3.7461260281757627e-06, "loss": 1.4471235275268555, "step": 6286 }, { "epoch": 1.1445344498043142, "grad_norm": 9.125, "learning_rate": 3.7454151361017143e-06, "loss": 1.3163316249847412, "step": 6288 }, { "epoch": 1.1448985164285064, "grad_norm": 6.96875, "learning_rate": 3.744704134641015e-06, "loss": 1.174575686454773, "step": 6290 }, { "epoch": 1.1452625830526986, "grad_norm": 20.0, "learning_rate": 3.7439930238980026e-06, "loss": 2.2616584300994873, "step": 6292 }, { "epoch": 1.145626649676891, "grad_norm": 3.53125, "learning_rate": 3.743281803977029e-06, "loss": 1.545267105102539, "step": 6294 }, { "epoch": 1.1459907163010832, "grad_norm": 6.1875, "learning_rate": 3.742570474982463e-06, "loss": 1.1953632831573486, "step": 6296 }, { "epoch": 1.1463547829252754, "grad_norm": 7.625, "learning_rate": 3.741859037018688e-06, "loss": 1.3737146854400635, "step": 6298 }, { "epoch": 1.1467188495494676, "grad_norm": 53.75, "learning_rate": 3.741147490190108e-06, "loss": 1.0242600440979004, "step": 6300 }, { "epoch": 1.1470829161736598, "grad_norm": 25.25, "learning_rate": 3.740435834601136e-06, "loss": 1.9867749214172363, "step": 6302 }, { "epoch": 1.147446982797852, "grad_norm": 35.25, "learning_rate": 3.7397240703562064e-06, "loss": 2.025033473968506, "step": 6304 }, { "epoch": 1.1478110494220441, "grad_norm": 50.75, "learning_rate": 3.739012197559767e-06, "loss": 0.8330847024917603, "step": 6306 }, { "epoch": 1.1481751160462363, "grad_norm": 15.9375, "learning_rate": 3.738300216316282e-06, "loss": 1.5945284366607666, "step": 6308 }, { "epoch": 1.1485391826704288, "grad_norm": 12.1875, "learning_rate": 3.737588126730233e-06, "loss": 1.3502593040466309, "step": 6310 }, { "epoch": 1.148903249294621, "grad_norm": 17.125, "learning_rate": 3.736875928906116e-06, "loss": 1.414579153060913, "step": 6312 }, { "epoch": 1.1492673159188131, "grad_norm": 17.125, "learning_rate": 3.736163622948442e-06, "loss": 1.363706111907959, "step": 6314 }, { "epoch": 1.1496313825430053, "grad_norm": 14.5625, "learning_rate": 3.7354512089617412e-06, "loss": 1.519248366355896, "step": 6316 }, { "epoch": 1.1499954491671975, "grad_norm": 14.6875, "learning_rate": 3.734738687050554e-06, "loss": 1.5777974128723145, "step": 6318 }, { "epoch": 1.1503595157913897, "grad_norm": 16.25, "learning_rate": 3.734026057319443e-06, "loss": 1.163029432296753, "step": 6320 }, { "epoch": 1.1507235824155821, "grad_norm": 21.125, "learning_rate": 3.733313319872983e-06, "loss": 0.8242368698120117, "step": 6322 }, { "epoch": 1.1510876490397743, "grad_norm": 25.0, "learning_rate": 3.732600474815765e-06, "loss": 1.5769751071929932, "step": 6324 }, { "epoch": 1.1514517156639665, "grad_norm": 6.53125, "learning_rate": 3.7318875222523964e-06, "loss": 1.1115974187850952, "step": 6326 }, { "epoch": 1.1518157822881587, "grad_norm": 8.3125, "learning_rate": 3.7311744622875e-06, "loss": 1.3775323629379272, "step": 6328 }, { "epoch": 1.152179848912351, "grad_norm": 7.0625, "learning_rate": 3.7304612950257134e-06, "loss": 0.9680616855621338, "step": 6330 }, { "epoch": 1.1525439155365431, "grad_norm": 16.875, "learning_rate": 3.729748020571694e-06, "loss": 1.9834116697311401, "step": 6332 }, { "epoch": 1.1529079821607353, "grad_norm": 14.4375, "learning_rate": 3.7290346390301092e-06, "loss": 1.4998986721038818, "step": 6334 }, { "epoch": 1.1532720487849277, "grad_norm": 8.5, "learning_rate": 3.728321150505645e-06, "loss": 1.357297420501709, "step": 6336 }, { "epoch": 1.15363611540912, "grad_norm": 14.25, "learning_rate": 3.727607555103003e-06, "loss": 1.3547457456588745, "step": 6338 }, { "epoch": 1.1540001820333121, "grad_norm": 16.25, "learning_rate": 3.7268938529269026e-06, "loss": 1.536704421043396, "step": 6340 }, { "epoch": 1.1543642486575043, "grad_norm": 9.6875, "learning_rate": 3.726180044082075e-06, "loss": 1.663135290145874, "step": 6342 }, { "epoch": 1.1547283152816965, "grad_norm": 16.25, "learning_rate": 3.7254661286732685e-06, "loss": 2.0617427825927734, "step": 6344 }, { "epoch": 1.1550923819058887, "grad_norm": 9.875, "learning_rate": 3.724752106805247e-06, "loss": 1.4491593837738037, "step": 6346 }, { "epoch": 1.1554564485300811, "grad_norm": 10.5, "learning_rate": 3.724037978582792e-06, "loss": 1.3757569789886475, "step": 6348 }, { "epoch": 1.1558205151542733, "grad_norm": 31.5, "learning_rate": 3.723323744110697e-06, "loss": 1.3985414505004883, "step": 6350 }, { "epoch": 1.1561845817784655, "grad_norm": 31.5, "learning_rate": 3.7226094034937754e-06, "loss": 1.372774362564087, "step": 6352 }, { "epoch": 1.1565486484026577, "grad_norm": 2.8125, "learning_rate": 3.721894956836851e-06, "loss": 1.2280420064926147, "step": 6354 }, { "epoch": 1.15691271502685, "grad_norm": 11.1875, "learning_rate": 3.7211804042447676e-06, "loss": 1.279487133026123, "step": 6356 }, { "epoch": 1.157276781651042, "grad_norm": 13.0, "learning_rate": 3.7204657458223825e-06, "loss": 0.48636630177497864, "step": 6358 }, { "epoch": 1.1576408482752343, "grad_norm": 7.78125, "learning_rate": 3.7197509816745693e-06, "loss": 1.4522373676300049, "step": 6360 }, { "epoch": 1.1580049148994265, "grad_norm": 7.1875, "learning_rate": 3.719036111906217e-06, "loss": 1.3992928266525269, "step": 6362 }, { "epoch": 1.158368981523619, "grad_norm": 22.875, "learning_rate": 3.7183211366222283e-06, "loss": 1.4296058416366577, "step": 6364 }, { "epoch": 1.158733048147811, "grad_norm": 7.75, "learning_rate": 3.717606055927524e-06, "loss": 1.2124700546264648, "step": 6366 }, { "epoch": 1.1590971147720033, "grad_norm": 9.375, "learning_rate": 3.7168908699270388e-06, "loss": 1.5632113218307495, "step": 6368 }, { "epoch": 1.1594611813961955, "grad_norm": 13.125, "learning_rate": 3.7161755787257237e-06, "loss": 1.3403748273849487, "step": 6370 }, { "epoch": 1.1598252480203877, "grad_norm": 20.75, "learning_rate": 3.715460182428546e-06, "loss": 1.4441494941711426, "step": 6372 }, { "epoch": 1.1601893146445799, "grad_norm": 21.125, "learning_rate": 3.7147446811404855e-06, "loss": 1.587996482849121, "step": 6374 }, { "epoch": 1.1605533812687723, "grad_norm": 10.25, "learning_rate": 3.7140290749665397e-06, "loss": 1.7855424880981445, "step": 6376 }, { "epoch": 1.1609174478929645, "grad_norm": 13.3125, "learning_rate": 3.7133133640117203e-06, "loss": 1.5762220621109009, "step": 6378 }, { "epoch": 1.1612815145171567, "grad_norm": 15.5, "learning_rate": 3.7125975483810562e-06, "loss": 2.0644497871398926, "step": 6380 }, { "epoch": 1.1616455811413489, "grad_norm": 8.6875, "learning_rate": 3.71188162817959e-06, "loss": 1.3270652294158936, "step": 6382 }, { "epoch": 1.162009647765541, "grad_norm": 8.875, "learning_rate": 3.7111656035123787e-06, "loss": 1.0801115036010742, "step": 6384 }, { "epoch": 1.1623737143897332, "grad_norm": 9.75, "learning_rate": 3.7104494744844975e-06, "loss": 1.1640353202819824, "step": 6386 }, { "epoch": 1.1627377810139254, "grad_norm": 3.765625, "learning_rate": 3.7097332412010357e-06, "loss": 0.9398530721664429, "step": 6388 }, { "epoch": 1.1631018476381179, "grad_norm": 14.625, "learning_rate": 3.7090169037670963e-06, "loss": 1.0305564403533936, "step": 6390 }, { "epoch": 1.16346591426231, "grad_norm": 8.6875, "learning_rate": 3.708300462287802e-06, "loss": 1.4037699699401855, "step": 6392 }, { "epoch": 1.1638299808865022, "grad_norm": 35.0, "learning_rate": 3.7075839168682824e-06, "loss": 1.5099254846572876, "step": 6394 }, { "epoch": 1.1641940475106944, "grad_norm": 38.0, "learning_rate": 3.706867267613693e-06, "loss": 1.5266382694244385, "step": 6396 }, { "epoch": 1.1645581141348866, "grad_norm": 19.375, "learning_rate": 3.7061505146291943e-06, "loss": 1.4627496004104614, "step": 6398 }, { "epoch": 1.1649221807590788, "grad_norm": 59.75, "learning_rate": 3.7054336580199714e-06, "loss": 1.4535367488861084, "step": 6400 }, { "epoch": 1.1652862473832712, "grad_norm": 4.34375, "learning_rate": 3.7047166978912165e-06, "loss": 1.1168949604034424, "step": 6402 }, { "epoch": 1.1656503140074634, "grad_norm": 7.03125, "learning_rate": 3.7039996343481434e-06, "loss": 1.052788496017456, "step": 6404 }, { "epoch": 1.1660143806316556, "grad_norm": 16.75, "learning_rate": 3.7032824674959765e-06, "loss": 0.997197151184082, "step": 6406 }, { "epoch": 1.1663784472558478, "grad_norm": 5.5625, "learning_rate": 3.7025651974399567e-06, "loss": 1.4710323810577393, "step": 6408 }, { "epoch": 1.16674251388004, "grad_norm": 16.625, "learning_rate": 3.7018478242853427e-06, "loss": 1.5309149026870728, "step": 6410 }, { "epoch": 1.1671065805042322, "grad_norm": 11.6875, "learning_rate": 3.701130348137405e-06, "loss": 1.3008434772491455, "step": 6412 }, { "epoch": 1.1674706471284244, "grad_norm": 19.375, "learning_rate": 3.7004127691014303e-06, "loss": 1.6052995920181274, "step": 6414 }, { "epoch": 1.1678347137526166, "grad_norm": 15.75, "learning_rate": 3.699695087282719e-06, "loss": 1.5112760066986084, "step": 6416 }, { "epoch": 1.168198780376809, "grad_norm": 21.125, "learning_rate": 3.6989773027865892e-06, "loss": 1.1832265853881836, "step": 6418 }, { "epoch": 1.1685628470010012, "grad_norm": 19.25, "learning_rate": 3.698259415718374e-06, "loss": 1.4476916790008545, "step": 6420 }, { "epoch": 1.1689269136251934, "grad_norm": 13.5, "learning_rate": 3.6975414261834185e-06, "loss": 1.552721381187439, "step": 6422 }, { "epoch": 1.1692909802493856, "grad_norm": 14.9375, "learning_rate": 3.696823334287086e-06, "loss": 1.6933646202087402, "step": 6424 }, { "epoch": 1.1696550468735778, "grad_norm": 12.0, "learning_rate": 3.6961051401347537e-06, "loss": 1.9006317853927612, "step": 6426 }, { "epoch": 1.1700191134977702, "grad_norm": 10.25, "learning_rate": 3.695386843831813e-06, "loss": 1.4743210077285767, "step": 6428 }, { "epoch": 1.1703831801219624, "grad_norm": 10.1875, "learning_rate": 3.69466844548367e-06, "loss": 1.2118438482284546, "step": 6430 }, { "epoch": 1.1707472467461546, "grad_norm": 18.0, "learning_rate": 3.6939499451957494e-06, "loss": 1.3957982063293457, "step": 6432 }, { "epoch": 1.1711113133703468, "grad_norm": 8.625, "learning_rate": 3.6932313430734856e-06, "loss": 1.3583909273147583, "step": 6434 }, { "epoch": 1.171475379994539, "grad_norm": 13.6875, "learning_rate": 3.692512639222332e-06, "loss": 1.5858925580978394, "step": 6436 }, { "epoch": 1.1718394466187312, "grad_norm": 24.5, "learning_rate": 3.6917938337477543e-06, "loss": 1.5727481842041016, "step": 6438 }, { "epoch": 1.1722035132429234, "grad_norm": 16.75, "learning_rate": 3.6910749267552358e-06, "loss": 1.4769070148468018, "step": 6440 }, { "epoch": 1.1725675798671156, "grad_norm": 8.6875, "learning_rate": 3.6903559183502734e-06, "loss": 1.0711969137191772, "step": 6442 }, { "epoch": 1.172931646491308, "grad_norm": 10.5625, "learning_rate": 3.6896368086383773e-06, "loss": 1.4186828136444092, "step": 6444 }, { "epoch": 1.1732957131155002, "grad_norm": 8.625, "learning_rate": 3.6889175977250735e-06, "loss": 1.549869179725647, "step": 6446 }, { "epoch": 1.1736597797396924, "grad_norm": 10.5625, "learning_rate": 3.688198285715904e-06, "loss": 1.3686096668243408, "step": 6448 }, { "epoch": 1.1740238463638846, "grad_norm": 10.0, "learning_rate": 3.6874788727164267e-06, "loss": 1.558035969734192, "step": 6450 }, { "epoch": 1.1743879129880768, "grad_norm": 13.0, "learning_rate": 3.686759358832209e-06, "loss": 1.6602004766464233, "step": 6452 }, { "epoch": 1.174751979612269, "grad_norm": 14.4375, "learning_rate": 3.68603974416884e-06, "loss": 1.914360523223877, "step": 6454 }, { "epoch": 1.1751160462364614, "grad_norm": 13.9375, "learning_rate": 3.6853200288319192e-06, "loss": 1.4059867858886719, "step": 6456 }, { "epoch": 1.1754801128606536, "grad_norm": 10.5625, "learning_rate": 3.6846002129270595e-06, "loss": 1.4582817554473877, "step": 6458 }, { "epoch": 1.1758441794848458, "grad_norm": 25.5, "learning_rate": 3.683880296559894e-06, "loss": 1.557992696762085, "step": 6460 }, { "epoch": 1.176208246109038, "grad_norm": 18.75, "learning_rate": 3.683160279836068e-06, "loss": 2.259274482727051, "step": 6462 }, { "epoch": 1.1765723127332302, "grad_norm": 7.65625, "learning_rate": 3.682440162861237e-06, "loss": 1.3134491443634033, "step": 6464 }, { "epoch": 1.1769363793574223, "grad_norm": 4.40625, "learning_rate": 3.6817199457410802e-06, "loss": 1.1946327686309814, "step": 6466 }, { "epoch": 1.1773004459816145, "grad_norm": 8.5625, "learning_rate": 3.680999628581282e-06, "loss": 1.3952946662902832, "step": 6468 }, { "epoch": 1.177664512605807, "grad_norm": 12.1875, "learning_rate": 3.6802792114875494e-06, "loss": 1.6258931159973145, "step": 6470 }, { "epoch": 1.1780285792299992, "grad_norm": 15.0, "learning_rate": 3.6795586945655992e-06, "loss": 1.6228737831115723, "step": 6472 }, { "epoch": 1.1783926458541913, "grad_norm": 9.0, "learning_rate": 3.678838077921165e-06, "loss": 1.9807703495025635, "step": 6474 }, { "epoch": 1.1787567124783835, "grad_norm": 9.0, "learning_rate": 3.678117361659993e-06, "loss": 1.339045524597168, "step": 6476 }, { "epoch": 1.1791207791025757, "grad_norm": 6.78125, "learning_rate": 3.677396545887847e-06, "loss": 1.327804446220398, "step": 6478 }, { "epoch": 1.179484845726768, "grad_norm": 10.0625, "learning_rate": 3.6766756307105025e-06, "loss": 1.1708130836486816, "step": 6480 }, { "epoch": 1.1798489123509603, "grad_norm": 41.75, "learning_rate": 3.6759546162337525e-06, "loss": 1.1664369106292725, "step": 6482 }, { "epoch": 1.1802129789751525, "grad_norm": 19.625, "learning_rate": 3.675233502563401e-06, "loss": 1.2675286531448364, "step": 6484 }, { "epoch": 1.1805770455993447, "grad_norm": 10.0625, "learning_rate": 3.674512289805271e-06, "loss": 1.5202103853225708, "step": 6486 }, { "epoch": 1.180941112223537, "grad_norm": 8.125, "learning_rate": 3.6737909780651936e-06, "loss": 1.4262146949768066, "step": 6488 }, { "epoch": 1.1813051788477291, "grad_norm": 56.0, "learning_rate": 3.6730695674490226e-06, "loss": 1.6234575510025024, "step": 6490 }, { "epoch": 1.1816692454719213, "grad_norm": 28.0, "learning_rate": 3.6723480580626203e-06, "loss": 2.057462453842163, "step": 6492 }, { "epoch": 1.1820333120961135, "grad_norm": 7.71875, "learning_rate": 3.671626450011865e-06, "loss": 1.234393835067749, "step": 6494 }, { "epoch": 1.1823973787203057, "grad_norm": 11.625, "learning_rate": 3.67090474340265e-06, "loss": 1.1700392961502075, "step": 6496 }, { "epoch": 1.1827614453444981, "grad_norm": 13.375, "learning_rate": 3.6701829383408814e-06, "loss": 0.8894488215446472, "step": 6498 }, { "epoch": 1.1831255119686903, "grad_norm": 13.4375, "learning_rate": 3.6694610349324843e-06, "loss": 1.0495619773864746, "step": 6500 }, { "epoch": 1.1834895785928825, "grad_norm": 12.125, "learning_rate": 3.6687390332833937e-06, "loss": 0.1490705907344818, "step": 6502 }, { "epoch": 1.1838536452170747, "grad_norm": 6.5625, "learning_rate": 3.6680169334995595e-06, "loss": 0.5033633708953857, "step": 6504 }, { "epoch": 1.184217711841267, "grad_norm": 11.25, "learning_rate": 3.667294735686947e-06, "loss": 1.5920605659484863, "step": 6506 }, { "epoch": 1.184581778465459, "grad_norm": 26.25, "learning_rate": 3.6665724399515367e-06, "loss": 1.7717106342315674, "step": 6508 }, { "epoch": 1.1849458450896515, "grad_norm": 13.5, "learning_rate": 3.665850046399323e-06, "loss": 1.6689963340759277, "step": 6510 }, { "epoch": 1.1853099117138437, "grad_norm": 14.9375, "learning_rate": 3.665127555136313e-06, "loss": 2.112014055252075, "step": 6512 }, { "epoch": 1.185673978338036, "grad_norm": 10.4375, "learning_rate": 3.66440496626853e-06, "loss": 1.7579913139343262, "step": 6514 }, { "epoch": 1.186038044962228, "grad_norm": 22.625, "learning_rate": 3.66368227990201e-06, "loss": 1.1560434103012085, "step": 6516 }, { "epoch": 1.1864021115864203, "grad_norm": 13.9375, "learning_rate": 3.662959496142805e-06, "loss": 1.8474457263946533, "step": 6518 }, { "epoch": 1.1867661782106125, "grad_norm": 8.75, "learning_rate": 3.6622366150969813e-06, "loss": 1.4579943418502808, "step": 6520 }, { "epoch": 1.1871302448348047, "grad_norm": 27.875, "learning_rate": 3.6615136368706185e-06, "loss": 1.1896178722381592, "step": 6522 }, { "epoch": 1.187494311458997, "grad_norm": 8.3125, "learning_rate": 3.66079056156981e-06, "loss": 1.6693971157073975, "step": 6524 }, { "epoch": 1.1878583780831893, "grad_norm": 14.75, "learning_rate": 3.6600673893006646e-06, "loss": 1.5194038152694702, "step": 6526 }, { "epoch": 1.1882224447073815, "grad_norm": 13.0625, "learning_rate": 3.6593441201693035e-06, "loss": 1.5235917568206787, "step": 6528 }, { "epoch": 1.1885865113315737, "grad_norm": 11.5, "learning_rate": 3.6586207542818664e-06, "loss": 1.4025870561599731, "step": 6530 }, { "epoch": 1.1889505779557659, "grad_norm": 11.5625, "learning_rate": 3.657897291744503e-06, "loss": 1.5682439804077148, "step": 6532 }, { "epoch": 1.189314644579958, "grad_norm": 10.75, "learning_rate": 3.6571737326633783e-06, "loss": 1.4926438331604004, "step": 6534 }, { "epoch": 1.1896787112041505, "grad_norm": 10.875, "learning_rate": 3.65645007714467e-06, "loss": 1.401459813117981, "step": 6536 }, { "epoch": 1.1900427778283427, "grad_norm": 6.90625, "learning_rate": 3.655726325294574e-06, "loss": 1.2278746366500854, "step": 6538 }, { "epoch": 1.1904068444525349, "grad_norm": 9.8125, "learning_rate": 3.655002477219297e-06, "loss": 1.2134888172149658, "step": 6540 }, { "epoch": 1.190770911076727, "grad_norm": 15.0625, "learning_rate": 3.654278533025063e-06, "loss": 1.4659929275512695, "step": 6542 }, { "epoch": 1.1911349777009193, "grad_norm": 10.0625, "learning_rate": 3.653554492818103e-06, "loss": 1.490921139717102, "step": 6544 }, { "epoch": 1.1914990443251114, "grad_norm": 10.1875, "learning_rate": 3.6528303567046717e-06, "loss": 1.394912838935852, "step": 6546 }, { "epoch": 1.1918631109493036, "grad_norm": 16.5, "learning_rate": 3.6521061247910296e-06, "loss": 1.4490734338760376, "step": 6548 }, { "epoch": 1.1922271775734958, "grad_norm": 9.3125, "learning_rate": 3.6513817971834574e-06, "loss": 1.504162073135376, "step": 6550 }, { "epoch": 1.1925912441976882, "grad_norm": 27.0, "learning_rate": 3.6506573739882468e-06, "loss": 1.6482129096984863, "step": 6552 }, { "epoch": 1.1929553108218804, "grad_norm": 15.125, "learning_rate": 3.649932855311703e-06, "loss": 1.8429118394851685, "step": 6554 }, { "epoch": 1.1933193774460726, "grad_norm": 10.25, "learning_rate": 3.649208241260147e-06, "loss": 1.273280143737793, "step": 6556 }, { "epoch": 1.1936834440702648, "grad_norm": 4.0, "learning_rate": 3.6484835319399113e-06, "loss": 0.8494876623153687, "step": 6558 }, { "epoch": 1.194047510694457, "grad_norm": 7.25, "learning_rate": 3.647758727457347e-06, "loss": 1.2064921855926514, "step": 6560 }, { "epoch": 1.1944115773186492, "grad_norm": 17.5, "learning_rate": 3.6470338279188143e-06, "loss": 1.4824676513671875, "step": 6562 }, { "epoch": 1.1947756439428416, "grad_norm": 12.875, "learning_rate": 3.6463088334306886e-06, "loss": 1.432023525238037, "step": 6564 }, { "epoch": 1.1951397105670338, "grad_norm": 52.25, "learning_rate": 3.6455837440993614e-06, "loss": 1.0832397937774658, "step": 6566 }, { "epoch": 1.195503777191226, "grad_norm": 10.375, "learning_rate": 3.6448585600312357e-06, "loss": 1.2010561227798462, "step": 6568 }, { "epoch": 1.1958678438154182, "grad_norm": 7.125, "learning_rate": 3.64413328133273e-06, "loss": 1.109591007232666, "step": 6570 }, { "epoch": 1.1962319104396104, "grad_norm": 12.375, "learning_rate": 3.6434079081102757e-06, "loss": 1.6361263990402222, "step": 6572 }, { "epoch": 1.1965959770638026, "grad_norm": 12.375, "learning_rate": 3.6426824404703186e-06, "loss": 1.4606677293777466, "step": 6574 }, { "epoch": 1.1969600436879948, "grad_norm": 12.6875, "learning_rate": 3.641956878519318e-06, "loss": 1.3994128704071045, "step": 6576 }, { "epoch": 1.1973241103121872, "grad_norm": 10.0625, "learning_rate": 3.641231222363746e-06, "loss": 1.3531302213668823, "step": 6578 }, { "epoch": 1.1976881769363794, "grad_norm": 6.34375, "learning_rate": 3.640505472110092e-06, "loss": 1.086106300354004, "step": 6580 }, { "epoch": 1.1980522435605716, "grad_norm": 5.53125, "learning_rate": 3.6397796278648555e-06, "loss": 1.4456672668457031, "step": 6582 }, { "epoch": 1.1984163101847638, "grad_norm": 5.0625, "learning_rate": 3.6390536897345517e-06, "loss": 0.8753069639205933, "step": 6584 }, { "epoch": 1.198780376808956, "grad_norm": 8.25, "learning_rate": 3.6383276578257074e-06, "loss": 1.2965389490127563, "step": 6586 }, { "epoch": 1.1991444434331482, "grad_norm": 32.0, "learning_rate": 3.6376015322448677e-06, "loss": 1.5784127712249756, "step": 6588 }, { "epoch": 1.1995085100573406, "grad_norm": 44.75, "learning_rate": 3.6368753130985866e-06, "loss": 1.6178051233291626, "step": 6590 }, { "epoch": 1.1998725766815328, "grad_norm": 6.96875, "learning_rate": 3.636149000493434e-06, "loss": 1.0059093236923218, "step": 6592 }, { "epoch": 1.200236643305725, "grad_norm": 5.9375, "learning_rate": 3.6354225945359944e-06, "loss": 1.343648910522461, "step": 6594 }, { "epoch": 1.2006007099299172, "grad_norm": 49.5, "learning_rate": 3.6346960953328637e-06, "loss": 1.422285556793213, "step": 6596 }, { "epoch": 1.2009647765541094, "grad_norm": 9.125, "learning_rate": 3.633969502990653e-06, "loss": 1.8281251192092896, "step": 6598 }, { "epoch": 1.2013288431783016, "grad_norm": 7.53125, "learning_rate": 3.6332428176159873e-06, "loss": 1.6549746990203857, "step": 6600 }, { "epoch": 1.2016929098024938, "grad_norm": 9.25, "learning_rate": 3.6325160393155047e-06, "loss": 1.582465648651123, "step": 6602 }, { "epoch": 1.202056976426686, "grad_norm": 8.1875, "learning_rate": 3.6317891681958562e-06, "loss": 1.3914471864700317, "step": 6604 }, { "epoch": 1.2024210430508784, "grad_norm": 18.5, "learning_rate": 3.631062204363708e-06, "loss": 1.3895189762115479, "step": 6606 }, { "epoch": 1.2027851096750706, "grad_norm": 3.625, "learning_rate": 3.6303351479257387e-06, "loss": 1.2460017204284668, "step": 6608 }, { "epoch": 1.2031491762992628, "grad_norm": 13.1875, "learning_rate": 3.629607998988641e-06, "loss": 1.2483797073364258, "step": 6610 }, { "epoch": 1.203513242923455, "grad_norm": 11.375, "learning_rate": 3.6288807576591213e-06, "loss": 1.703126072883606, "step": 6612 }, { "epoch": 1.2038773095476472, "grad_norm": 14.875, "learning_rate": 3.6281534240438986e-06, "loss": 1.4523193836212158, "step": 6614 }, { "epoch": 1.2042413761718394, "grad_norm": 17.75, "learning_rate": 3.627425998249706e-06, "loss": 1.8474994897842407, "step": 6616 }, { "epoch": 1.2046054427960318, "grad_norm": 17.875, "learning_rate": 3.6266984803832917e-06, "loss": 1.423680067062378, "step": 6618 }, { "epoch": 1.204969509420224, "grad_norm": 9.0, "learning_rate": 3.625970870551415e-06, "loss": 1.3614366054534912, "step": 6620 }, { "epoch": 1.2053335760444162, "grad_norm": 19.375, "learning_rate": 3.62524316886085e-06, "loss": 0.9116806983947754, "step": 6622 }, { "epoch": 1.2056976426686083, "grad_norm": 3.484375, "learning_rate": 3.6245153754183836e-06, "loss": 1.1547564268112183, "step": 6624 }, { "epoch": 1.2060617092928005, "grad_norm": 5.25, "learning_rate": 3.6237874903308177e-06, "loss": 0.923206090927124, "step": 6626 }, { "epoch": 1.2064257759169927, "grad_norm": 11.25, "learning_rate": 3.623059513704964e-06, "loss": 1.1630260944366455, "step": 6628 }, { "epoch": 1.206789842541185, "grad_norm": 12.0625, "learning_rate": 3.6223314456476533e-06, "loss": 1.4146908521652222, "step": 6630 }, { "epoch": 1.2071539091653773, "grad_norm": 22.75, "learning_rate": 3.621603286265725e-06, "loss": 1.576790452003479, "step": 6632 }, { "epoch": 1.2075179757895695, "grad_norm": 14.4375, "learning_rate": 3.6208750356660327e-06, "loss": 1.4390062093734741, "step": 6634 }, { "epoch": 1.2078820424137617, "grad_norm": 15.9375, "learning_rate": 3.6201466939554453e-06, "loss": 1.4026565551757812, "step": 6636 }, { "epoch": 1.208246109037954, "grad_norm": 14.1875, "learning_rate": 3.6194182612408433e-06, "loss": 1.3882112503051758, "step": 6638 }, { "epoch": 1.2086101756621461, "grad_norm": 8.6875, "learning_rate": 3.618689737629122e-06, "loss": 1.3948407173156738, "step": 6640 }, { "epoch": 1.2089742422863383, "grad_norm": 5.15625, "learning_rate": 3.6179611232271904e-06, "loss": 1.226584553718567, "step": 6642 }, { "epoch": 1.2093383089105307, "grad_norm": 21.5, "learning_rate": 3.6172324181419673e-06, "loss": 0.8317216038703918, "step": 6644 }, { "epoch": 1.209702375534723, "grad_norm": 7.65625, "learning_rate": 3.6165036224803874e-06, "loss": 0.9816555976867676, "step": 6646 }, { "epoch": 1.2100664421589151, "grad_norm": 6.84375, "learning_rate": 3.6157747363494e-06, "loss": 0.4452129006385803, "step": 6648 }, { "epoch": 1.2104305087831073, "grad_norm": 8.125, "learning_rate": 3.615045759855965e-06, "loss": 1.451916217803955, "step": 6650 }, { "epoch": 1.2107945754072995, "grad_norm": 6.59375, "learning_rate": 3.6143166931070596e-06, "loss": 0.9785503149032593, "step": 6652 }, { "epoch": 1.2111586420314917, "grad_norm": 7.28125, "learning_rate": 3.6135875362096675e-06, "loss": 1.3907877206802368, "step": 6654 }, { "epoch": 1.211522708655684, "grad_norm": 26.125, "learning_rate": 3.612858289270791e-06, "loss": 1.390721321105957, "step": 6656 }, { "epoch": 1.211886775279876, "grad_norm": 20.25, "learning_rate": 3.6121289523974436e-06, "loss": 1.5638270378112793, "step": 6658 }, { "epoch": 1.2122508419040685, "grad_norm": 53.25, "learning_rate": 3.611399525696654e-06, "loss": 0.5643367767333984, "step": 6660 }, { "epoch": 1.2126149085282607, "grad_norm": 4.71875, "learning_rate": 3.6106700092754623e-06, "loss": 1.0857794284820557, "step": 6662 }, { "epoch": 1.212978975152453, "grad_norm": 21.125, "learning_rate": 3.6099404032409212e-06, "loss": 1.6155925989151, "step": 6664 }, { "epoch": 1.213343041776645, "grad_norm": 9.0, "learning_rate": 3.6092107077000982e-06, "loss": 1.5053106546401978, "step": 6666 }, { "epoch": 1.2137071084008373, "grad_norm": 13.3125, "learning_rate": 3.6084809227600713e-06, "loss": 1.4572757482528687, "step": 6668 }, { "epoch": 1.2140711750250297, "grad_norm": 13.625, "learning_rate": 3.6077510485279356e-06, "loss": 1.404142141342163, "step": 6670 }, { "epoch": 1.214435241649222, "grad_norm": 5.53125, "learning_rate": 3.607021085110798e-06, "loss": 1.0271953344345093, "step": 6672 }, { "epoch": 1.214799308273414, "grad_norm": 13.25, "learning_rate": 3.606291032615774e-06, "loss": 1.3324660062789917, "step": 6674 }, { "epoch": 1.2151633748976063, "grad_norm": 21.75, "learning_rate": 3.6055608911499993e-06, "loss": 1.538636326789856, "step": 6676 }, { "epoch": 1.2155274415217985, "grad_norm": 39.75, "learning_rate": 3.6048306608206174e-06, "loss": 1.9539988040924072, "step": 6678 }, { "epoch": 1.2158915081459907, "grad_norm": 11.375, "learning_rate": 3.6041003417347873e-06, "loss": 1.4843626022338867, "step": 6680 }, { "epoch": 1.2162555747701829, "grad_norm": 15.6875, "learning_rate": 3.6033699339996808e-06, "loss": 1.4113197326660156, "step": 6682 }, { "epoch": 1.216619641394375, "grad_norm": 22.25, "learning_rate": 3.6026394377224817e-06, "loss": 1.407170295715332, "step": 6684 }, { "epoch": 1.2169837080185675, "grad_norm": 12.9375, "learning_rate": 3.6019088530103863e-06, "loss": 1.5715916156768799, "step": 6686 }, { "epoch": 1.2173477746427597, "grad_norm": 17.625, "learning_rate": 3.601178179970607e-06, "loss": 1.336016058921814, "step": 6688 }, { "epoch": 1.2177118412669519, "grad_norm": 7.625, "learning_rate": 3.6004474187103656e-06, "loss": 1.3757257461547852, "step": 6690 }, { "epoch": 1.218075907891144, "grad_norm": 13.0, "learning_rate": 3.5997165693368996e-06, "loss": 1.4690946340560913, "step": 6692 }, { "epoch": 1.2184399745153363, "grad_norm": 10.0, "learning_rate": 3.598985631957458e-06, "loss": 1.1817655563354492, "step": 6694 }, { "epoch": 1.2188040411395284, "grad_norm": 11.5, "learning_rate": 3.598254606679301e-06, "loss": 0.7359495759010315, "step": 6696 }, { "epoch": 1.2191681077637209, "grad_norm": 11.6875, "learning_rate": 3.5975234936097048e-06, "loss": 0.5830755233764648, "step": 6698 }, { "epoch": 1.219532174387913, "grad_norm": 8.8125, "learning_rate": 3.5967922928559586e-06, "loss": 1.1584774255752563, "step": 6700 }, { "epoch": 1.2198962410121053, "grad_norm": 12.875, "learning_rate": 3.5960610045253618e-06, "loss": 1.0171111822128296, "step": 6702 }, { "epoch": 1.2202603076362974, "grad_norm": 15.3125, "learning_rate": 3.595329628725227e-06, "loss": 1.5831761360168457, "step": 6704 }, { "epoch": 1.2206243742604896, "grad_norm": 35.25, "learning_rate": 3.5945981655628838e-06, "loss": 1.4115028381347656, "step": 6706 }, { "epoch": 1.2209884408846818, "grad_norm": 10.6875, "learning_rate": 3.593866615145668e-06, "loss": 1.3418809175491333, "step": 6708 }, { "epoch": 1.221352507508874, "grad_norm": 6.5, "learning_rate": 3.5931349775809334e-06, "loss": 1.3783400058746338, "step": 6710 }, { "epoch": 1.2217165741330664, "grad_norm": 4.90625, "learning_rate": 3.5924032529760454e-06, "loss": 1.1789112091064453, "step": 6712 }, { "epoch": 1.2220806407572586, "grad_norm": 8.5625, "learning_rate": 3.59167144143838e-06, "loss": 1.1209945678710938, "step": 6714 }, { "epoch": 1.2224447073814508, "grad_norm": 8.4375, "learning_rate": 3.590939543075329e-06, "loss": 1.131537914276123, "step": 6716 }, { "epoch": 1.222808774005643, "grad_norm": 59.75, "learning_rate": 3.590207557994294e-06, "loss": 0.8571414351463318, "step": 6718 }, { "epoch": 1.2231728406298352, "grad_norm": 9.0625, "learning_rate": 3.5894754863026926e-06, "loss": 1.321110486984253, "step": 6720 }, { "epoch": 1.2235369072540274, "grad_norm": 7.1875, "learning_rate": 3.588743328107953e-06, "loss": 1.334371566772461, "step": 6722 }, { "epoch": 1.2239009738782198, "grad_norm": 11.8125, "learning_rate": 3.5880110835175154e-06, "loss": 1.2903462648391724, "step": 6724 }, { "epoch": 1.224265040502412, "grad_norm": 46.5, "learning_rate": 3.5872787526388343e-06, "loss": 1.5546529293060303, "step": 6726 }, { "epoch": 1.2246291071266042, "grad_norm": 23.375, "learning_rate": 3.586546335579375e-06, "loss": 1.2964411973953247, "step": 6728 }, { "epoch": 1.2249931737507964, "grad_norm": 8.9375, "learning_rate": 3.5858138324466195e-06, "loss": 1.2439881563186646, "step": 6730 }, { "epoch": 1.2253572403749886, "grad_norm": 18.375, "learning_rate": 3.5850812433480586e-06, "loss": 0.9473875164985657, "step": 6732 }, { "epoch": 1.2257213069991808, "grad_norm": 14.5, "learning_rate": 3.584348568391195e-06, "loss": 0.9904845356941223, "step": 6734 }, { "epoch": 1.226085373623373, "grad_norm": 13.5, "learning_rate": 3.583615807683548e-06, "loss": 1.6508527994155884, "step": 6736 }, { "epoch": 1.2264494402475652, "grad_norm": 7.53125, "learning_rate": 3.5828829613326467e-06, "loss": 1.1980913877487183, "step": 6738 }, { "epoch": 1.2268135068717576, "grad_norm": 11.8125, "learning_rate": 3.582150029446032e-06, "loss": 1.087984323501587, "step": 6740 }, { "epoch": 1.2271775734959498, "grad_norm": 11.625, "learning_rate": 3.581417012131261e-06, "loss": 1.362506628036499, "step": 6742 }, { "epoch": 1.227541640120142, "grad_norm": 9.75, "learning_rate": 3.580683909495898e-06, "loss": 1.174209713935852, "step": 6744 }, { "epoch": 1.2279057067443342, "grad_norm": 13.9375, "learning_rate": 3.579950721647526e-06, "loss": 0.804348349571228, "step": 6746 }, { "epoch": 1.2282697733685264, "grad_norm": 11.875, "learning_rate": 3.579217448693735e-06, "loss": 0.9139540791511536, "step": 6748 }, { "epoch": 1.2286338399927186, "grad_norm": 14.625, "learning_rate": 3.5784840907421315e-06, "loss": 1.4969446659088135, "step": 6750 }, { "epoch": 1.228997906616911, "grad_norm": 18.125, "learning_rate": 3.577750647900332e-06, "loss": 1.7645078897476196, "step": 6752 }, { "epoch": 1.2293619732411032, "grad_norm": 12.875, "learning_rate": 3.5770171202759664e-06, "loss": 1.380998969078064, "step": 6754 }, { "epoch": 1.2297260398652954, "grad_norm": 10.25, "learning_rate": 3.576283507976677e-06, "loss": 1.277035117149353, "step": 6756 }, { "epoch": 1.2300901064894876, "grad_norm": 14.875, "learning_rate": 3.575549811110117e-06, "loss": 1.4455209970474243, "step": 6758 }, { "epoch": 1.2304541731136798, "grad_norm": 11.0625, "learning_rate": 3.574816029783956e-06, "loss": 1.5612605810165405, "step": 6760 }, { "epoch": 1.230818239737872, "grad_norm": 13.5, "learning_rate": 3.5740821641058722e-06, "loss": 1.288191795349121, "step": 6762 }, { "epoch": 1.2311823063620642, "grad_norm": 12.0, "learning_rate": 3.5733482141835575e-06, "loss": 1.1193962097167969, "step": 6764 }, { "epoch": 1.2315463729862566, "grad_norm": 13.4375, "learning_rate": 3.572614180124715e-06, "loss": 1.4512017965316772, "step": 6766 }, { "epoch": 1.2319104396104488, "grad_norm": 7.71875, "learning_rate": 3.571880062037062e-06, "loss": 0.7806037664413452, "step": 6768 }, { "epoch": 1.232274506234641, "grad_norm": 13.9375, "learning_rate": 3.5711458600283277e-06, "loss": 1.3409650325775146, "step": 6770 }, { "epoch": 1.2326385728588332, "grad_norm": 13.1875, "learning_rate": 3.570411574206254e-06, "loss": 0.956801176071167, "step": 6772 }, { "epoch": 1.2330026394830254, "grad_norm": 9.125, "learning_rate": 3.5696772046785935e-06, "loss": 1.628990650177002, "step": 6774 }, { "epoch": 1.2333667061072175, "grad_norm": 16.75, "learning_rate": 3.5689427515531117e-06, "loss": 1.4250504970550537, "step": 6776 }, { "epoch": 1.23373077273141, "grad_norm": 13.0, "learning_rate": 3.568208214937586e-06, "loss": 1.4149599075317383, "step": 6778 }, { "epoch": 1.2340948393556022, "grad_norm": 13.875, "learning_rate": 3.5674735949398075e-06, "loss": 1.4843947887420654, "step": 6780 }, { "epoch": 1.2344589059797944, "grad_norm": 36.5, "learning_rate": 3.5667388916675795e-06, "loss": 0.9764255285263062, "step": 6782 }, { "epoch": 1.2348229726039865, "grad_norm": 7.9375, "learning_rate": 3.5660041052287165e-06, "loss": 0.49985015392303467, "step": 6784 }, { "epoch": 1.2351870392281787, "grad_norm": 34.25, "learning_rate": 3.5652692357310435e-06, "loss": 1.3247759342193604, "step": 6786 }, { "epoch": 1.235551105852371, "grad_norm": 7.5, "learning_rate": 3.5645342832824002e-06, "loss": 1.0177068710327148, "step": 6788 }, { "epoch": 1.2359151724765631, "grad_norm": 30.125, "learning_rate": 3.5637992479906415e-06, "loss": 1.0612064599990845, "step": 6790 }, { "epoch": 1.2362792391007553, "grad_norm": 31.125, "learning_rate": 3.563064129963626e-06, "loss": 0.7122699618339539, "step": 6792 }, { "epoch": 1.2366433057249477, "grad_norm": 24.875, "learning_rate": 3.5623289293092322e-06, "loss": 1.4799864292144775, "step": 6794 }, { "epoch": 1.23700737234914, "grad_norm": 7.03125, "learning_rate": 3.5615936461353473e-06, "loss": 1.387568712234497, "step": 6796 }, { "epoch": 1.2373714389733321, "grad_norm": 16.75, "learning_rate": 3.560858280549869e-06, "loss": 1.446279525756836, "step": 6798 }, { "epoch": 1.2377355055975243, "grad_norm": 18.875, "learning_rate": 3.5601228326607118e-06, "loss": 1.6540460586547852, "step": 6800 }, { "epoch": 1.2380995722217165, "grad_norm": 10.0, "learning_rate": 3.5593873025757995e-06, "loss": 1.781539797782898, "step": 6802 }, { "epoch": 1.2384636388459087, "grad_norm": 2.21875, "learning_rate": 3.5586516904030676e-06, "loss": 0.8537502884864807, "step": 6804 }, { "epoch": 1.2388277054701011, "grad_norm": 23.375, "learning_rate": 3.557915996250464e-06, "loss": 1.2302873134613037, "step": 6806 }, { "epoch": 1.2391917720942933, "grad_norm": 12.5, "learning_rate": 3.5571802202259476e-06, "loss": 1.3988693952560425, "step": 6808 }, { "epoch": 1.2395558387184855, "grad_norm": 10.6875, "learning_rate": 3.5564443624374935e-06, "loss": 1.5080784559249878, "step": 6810 }, { "epoch": 1.2399199053426777, "grad_norm": 9.1875, "learning_rate": 3.555708422993084e-06, "loss": 1.3585219383239746, "step": 6812 }, { "epoch": 1.24028397196687, "grad_norm": 22.25, "learning_rate": 3.554972402000716e-06, "loss": 0.9604939818382263, "step": 6814 }, { "epoch": 1.240648038591062, "grad_norm": 11.375, "learning_rate": 3.5542362995683967e-06, "loss": 1.9124616384506226, "step": 6816 }, { "epoch": 1.2410121052152543, "grad_norm": 68.5, "learning_rate": 3.553500115804146e-06, "loss": 1.7408808469772339, "step": 6818 }, { "epoch": 1.2413761718394467, "grad_norm": 13.6875, "learning_rate": 3.552763850815997e-06, "loss": 1.4779905080795288, "step": 6820 }, { "epoch": 1.241740238463639, "grad_norm": 7.875, "learning_rate": 3.5520275047119925e-06, "loss": 1.4016799926757812, "step": 6822 }, { "epoch": 1.242104305087831, "grad_norm": 10.875, "learning_rate": 3.5512910776001897e-06, "loss": 1.6315690279006958, "step": 6824 }, { "epoch": 1.2424683717120233, "grad_norm": 10.5, "learning_rate": 3.5505545695886546e-06, "loss": 1.6864829063415527, "step": 6826 }, { "epoch": 1.2428324383362155, "grad_norm": 11.4375, "learning_rate": 3.549817980785467e-06, "loss": 1.4699666500091553, "step": 6828 }, { "epoch": 1.2431965049604077, "grad_norm": 16.375, "learning_rate": 3.54908131129872e-06, "loss": 1.476232886314392, "step": 6830 }, { "epoch": 1.2435605715846, "grad_norm": 13.75, "learning_rate": 3.5483445612365163e-06, "loss": 1.5693073272705078, "step": 6832 }, { "epoch": 1.2439246382087923, "grad_norm": 17.75, "learning_rate": 3.54760773070697e-06, "loss": 1.441408395767212, "step": 6834 }, { "epoch": 1.2442887048329845, "grad_norm": 16.75, "learning_rate": 3.546870819818209e-06, "loss": 1.4684395790100098, "step": 6836 }, { "epoch": 1.2446527714571767, "grad_norm": 4.90625, "learning_rate": 3.5461338286783698e-06, "loss": 1.380609154701233, "step": 6838 }, { "epoch": 1.2450168380813689, "grad_norm": 9.875, "learning_rate": 3.5453967573956063e-06, "loss": 1.4816055297851562, "step": 6840 }, { "epoch": 1.245380904705561, "grad_norm": 12.875, "learning_rate": 3.544659606078078e-06, "loss": 1.7490373849868774, "step": 6842 }, { "epoch": 1.2457449713297533, "grad_norm": 49.75, "learning_rate": 3.54392237483396e-06, "loss": 1.4835480451583862, "step": 6844 }, { "epoch": 1.2461090379539455, "grad_norm": 12.5, "learning_rate": 3.543185063771438e-06, "loss": 1.6208269596099854, "step": 6846 }, { "epoch": 1.2464731045781379, "grad_norm": 11.4375, "learning_rate": 3.542447672998709e-06, "loss": 1.4339570999145508, "step": 6848 }, { "epoch": 1.24683717120233, "grad_norm": 6.5625, "learning_rate": 3.5417102026239823e-06, "loss": 1.2629786729812622, "step": 6850 }, { "epoch": 1.2472012378265223, "grad_norm": 15.6875, "learning_rate": 3.540972652755479e-06, "loss": 1.4285693168640137, "step": 6852 }, { "epoch": 1.2475653044507145, "grad_norm": 33.25, "learning_rate": 3.5402350235014317e-06, "loss": 1.492363452911377, "step": 6854 }, { "epoch": 1.2479293710749066, "grad_norm": 2.921875, "learning_rate": 3.539497314970083e-06, "loss": 0.8729903697967529, "step": 6856 }, { "epoch": 1.2482934376990988, "grad_norm": 13.1875, "learning_rate": 3.5387595272696895e-06, "loss": 1.1616034507751465, "step": 6858 }, { "epoch": 1.2486575043232913, "grad_norm": 37.25, "learning_rate": 3.5380216605085205e-06, "loss": 1.5804818868637085, "step": 6860 }, { "epoch": 1.2490215709474835, "grad_norm": 12.25, "learning_rate": 3.5372837147948515e-06, "loss": 1.4918934106826782, "step": 6862 }, { "epoch": 1.2493856375716756, "grad_norm": 14.0, "learning_rate": 3.5365456902369755e-06, "loss": 1.38448965549469, "step": 6864 }, { "epoch": 1.2497497041958678, "grad_norm": 14.9375, "learning_rate": 3.535807586943194e-06, "loss": 1.46933913230896, "step": 6866 }, { "epoch": 1.25011377082006, "grad_norm": 6.28125, "learning_rate": 3.5350694050218205e-06, "loss": 1.2928518056869507, "step": 6868 }, { "epoch": 1.2504778374442522, "grad_norm": 7.9375, "learning_rate": 3.53433114458118e-06, "loss": 1.463866949081421, "step": 6870 }, { "epoch": 1.2508419040684444, "grad_norm": 13.375, "learning_rate": 3.53359280572961e-06, "loss": 1.5643519163131714, "step": 6872 }, { "epoch": 1.2512059706926366, "grad_norm": 9.4375, "learning_rate": 3.5328543885754583e-06, "loss": 1.3523356914520264, "step": 6874 }, { "epoch": 1.251570037316829, "grad_norm": 9.6875, "learning_rate": 3.532115893227084e-06, "loss": 1.6246832609176636, "step": 6876 }, { "epoch": 1.2519341039410212, "grad_norm": 10.0, "learning_rate": 3.531377319792858e-06, "loss": 1.4115750789642334, "step": 6878 }, { "epoch": 1.2522981705652134, "grad_norm": 19.25, "learning_rate": 3.5306386683811655e-06, "loss": 1.6279051303863525, "step": 6880 }, { "epoch": 1.2526622371894056, "grad_norm": 10.4375, "learning_rate": 3.5298999391003986e-06, "loss": 1.4184452295303345, "step": 6882 }, { "epoch": 1.2530263038135978, "grad_norm": 9.0, "learning_rate": 3.5291611320589624e-06, "loss": 1.5353273153305054, "step": 6884 }, { "epoch": 1.2533903704377902, "grad_norm": 10.9375, "learning_rate": 3.5284222473652752e-06, "loss": 1.601741075515747, "step": 6886 }, { "epoch": 1.2537544370619824, "grad_norm": 13.25, "learning_rate": 3.5276832851277643e-06, "loss": 1.9086626768112183, "step": 6888 }, { "epoch": 1.2541185036861746, "grad_norm": 24.875, "learning_rate": 3.5269442454548698e-06, "loss": 1.7674446105957031, "step": 6890 }, { "epoch": 1.2544825703103668, "grad_norm": 9.875, "learning_rate": 3.5262051284550424e-06, "loss": 1.538628101348877, "step": 6892 }, { "epoch": 1.254846636934559, "grad_norm": 43.75, "learning_rate": 3.525465934236746e-06, "loss": 1.3958988189697266, "step": 6894 }, { "epoch": 1.2552107035587512, "grad_norm": 27.75, "learning_rate": 3.5247266629084533e-06, "loss": 1.5351393222808838, "step": 6896 }, { "epoch": 1.2555747701829434, "grad_norm": 14.75, "learning_rate": 3.5239873145786484e-06, "loss": 1.6492743492126465, "step": 6898 }, { "epoch": 1.2559388368071356, "grad_norm": 13.75, "learning_rate": 3.5232478893558288e-06, "loss": 1.2367078065872192, "step": 6900 }, { "epoch": 1.256302903431328, "grad_norm": 13.4375, "learning_rate": 3.522508387348502e-06, "loss": 1.450238823890686, "step": 6902 }, { "epoch": 1.2566669700555202, "grad_norm": 13.875, "learning_rate": 3.521768808665188e-06, "loss": 1.2624263763427734, "step": 6904 }, { "epoch": 1.2570310366797124, "grad_norm": 10.5, "learning_rate": 3.5210291534144147e-06, "loss": 1.4558993577957153, "step": 6906 }, { "epoch": 1.2573951033039046, "grad_norm": 14.3125, "learning_rate": 3.5202894217047247e-06, "loss": 1.3276500701904297, "step": 6908 }, { "epoch": 1.2577591699280968, "grad_norm": 6.71875, "learning_rate": 3.5195496136446706e-06, "loss": 1.300504207611084, "step": 6910 }, { "epoch": 1.2581232365522892, "grad_norm": 8.9375, "learning_rate": 3.518809729342817e-06, "loss": 1.1391650438308716, "step": 6912 }, { "epoch": 1.2584873031764814, "grad_norm": 8.375, "learning_rate": 3.518069768907738e-06, "loss": 1.3528811931610107, "step": 6914 }, { "epoch": 1.2588513698006736, "grad_norm": 13.875, "learning_rate": 3.5173297324480195e-06, "loss": 1.6795876026153564, "step": 6916 }, { "epoch": 1.2592154364248658, "grad_norm": 20.875, "learning_rate": 3.5165896200722582e-06, "loss": 2.002683162689209, "step": 6918 }, { "epoch": 1.259579503049058, "grad_norm": 8.6875, "learning_rate": 3.515849431889066e-06, "loss": 1.4503512382507324, "step": 6920 }, { "epoch": 1.2599435696732502, "grad_norm": 7.9375, "learning_rate": 3.5151091680070594e-06, "loss": 1.5432205200195312, "step": 6922 }, { "epoch": 1.2603076362974424, "grad_norm": 5.15625, "learning_rate": 3.5143688285348697e-06, "loss": 1.4279499053955078, "step": 6924 }, { "epoch": 1.2606717029216346, "grad_norm": 8.5625, "learning_rate": 3.5136284135811393e-06, "loss": 0.8904904127120972, "step": 6926 }, { "epoch": 1.261035769545827, "grad_norm": 19.125, "learning_rate": 3.51288792325452e-06, "loss": 0.6826170682907104, "step": 6928 }, { "epoch": 1.2613998361700192, "grad_norm": 10.875, "learning_rate": 3.5121473576636765e-06, "loss": 1.694131851196289, "step": 6930 }, { "epoch": 1.2617639027942114, "grad_norm": 10.8125, "learning_rate": 3.5114067169172848e-06, "loss": 1.2076680660247803, "step": 6932 }, { "epoch": 1.2621279694184036, "grad_norm": 8.9375, "learning_rate": 3.51066600112403e-06, "loss": 1.386491298675537, "step": 6934 }, { "epoch": 1.2624920360425957, "grad_norm": 5.09375, "learning_rate": 3.5099252103926085e-06, "loss": 1.598430871963501, "step": 6936 }, { "epoch": 1.2628561026667882, "grad_norm": 9.1875, "learning_rate": 3.5091843448317285e-06, "loss": 1.107453465461731, "step": 6938 }, { "epoch": 1.2632201692909804, "grad_norm": 13.4375, "learning_rate": 3.5084434045501094e-06, "loss": 1.2815744876861572, "step": 6940 }, { "epoch": 1.2635842359151725, "grad_norm": 7.875, "learning_rate": 3.5077023896564823e-06, "loss": 1.457084059715271, "step": 6942 }, { "epoch": 1.2639483025393647, "grad_norm": 9.625, "learning_rate": 3.506961300259587e-06, "loss": 1.4279173612594604, "step": 6944 }, { "epoch": 1.264312369163557, "grad_norm": 14.9375, "learning_rate": 3.5062201364681748e-06, "loss": 1.5036423206329346, "step": 6946 }, { "epoch": 1.2646764357877491, "grad_norm": 21.375, "learning_rate": 3.5054788983910082e-06, "loss": 1.290251612663269, "step": 6948 }, { "epoch": 1.2650405024119413, "grad_norm": 17.375, "learning_rate": 3.5047375861368626e-06, "loss": 1.0799905061721802, "step": 6950 }, { "epoch": 1.2654045690361335, "grad_norm": 22.25, "learning_rate": 3.5039961998145222e-06, "loss": 1.8073190450668335, "step": 6952 }, { "epoch": 1.2657686356603257, "grad_norm": 30.5, "learning_rate": 3.5032547395327823e-06, "loss": 1.4099798202514648, "step": 6954 }, { "epoch": 1.2661327022845181, "grad_norm": 11.0625, "learning_rate": 3.5025132054004487e-06, "loss": 1.5092816352844238, "step": 6956 }, { "epoch": 1.2664967689087103, "grad_norm": 7.90625, "learning_rate": 3.5017715975263377e-06, "loss": 1.4591829776763916, "step": 6958 }, { "epoch": 1.2668608355329025, "grad_norm": 11.625, "learning_rate": 3.5010299160192786e-06, "loss": 1.397963523864746, "step": 6960 }, { "epoch": 1.2672249021570947, "grad_norm": 42.5, "learning_rate": 3.50028816098811e-06, "loss": 1.8981044292449951, "step": 6962 }, { "epoch": 1.267588968781287, "grad_norm": 17.0, "learning_rate": 3.4995463325416823e-06, "loss": 1.1766111850738525, "step": 6964 }, { "epoch": 1.2679530354054793, "grad_norm": 33.0, "learning_rate": 3.4988044307888537e-06, "loss": 1.437781810760498, "step": 6966 }, { "epoch": 1.2683171020296715, "grad_norm": 9.4375, "learning_rate": 3.4980624558384956e-06, "loss": 1.4637047052383423, "step": 6968 }, { "epoch": 1.2686811686538637, "grad_norm": 21.375, "learning_rate": 3.4973204077994915e-06, "loss": 1.5192077159881592, "step": 6970 }, { "epoch": 1.269045235278056, "grad_norm": 17.875, "learning_rate": 3.496578286780733e-06, "loss": 1.261083722114563, "step": 6972 }, { "epoch": 1.269409301902248, "grad_norm": 8.1875, "learning_rate": 3.495836092891124e-06, "loss": 1.743212342262268, "step": 6974 }, { "epoch": 1.2697733685264403, "grad_norm": 13.25, "learning_rate": 3.4950938262395774e-06, "loss": 1.2227096557617188, "step": 6976 }, { "epoch": 1.2701374351506325, "grad_norm": 17.875, "learning_rate": 3.4943514869350176e-06, "loss": 1.5062861442565918, "step": 6978 }, { "epoch": 1.2705015017748247, "grad_norm": 13.875, "learning_rate": 3.4936090750863816e-06, "loss": 1.361649513244629, "step": 6980 }, { "epoch": 1.270865568399017, "grad_norm": 11.6875, "learning_rate": 3.4928665908026135e-06, "loss": 1.785667896270752, "step": 6982 }, { "epoch": 1.2712296350232093, "grad_norm": 23.0, "learning_rate": 3.492124034192671e-06, "loss": 1.785082221031189, "step": 6984 }, { "epoch": 1.2715937016474015, "grad_norm": 15.0, "learning_rate": 3.4913814053655205e-06, "loss": 1.7949333190917969, "step": 6986 }, { "epoch": 1.2719577682715937, "grad_norm": 7.8125, "learning_rate": 3.4906387044301393e-06, "loss": 1.6175525188446045, "step": 6988 }, { "epoch": 1.2723218348957859, "grad_norm": 20.375, "learning_rate": 3.4898959314955185e-06, "loss": 1.064249038696289, "step": 6990 }, { "epoch": 1.2726859015199783, "grad_norm": 45.25, "learning_rate": 3.4891530866706534e-06, "loss": 1.0330097675323486, "step": 6992 }, { "epoch": 1.2730499681441705, "grad_norm": 27.875, "learning_rate": 3.488410170064557e-06, "loss": 1.45759117603302, "step": 6994 }, { "epoch": 1.2734140347683627, "grad_norm": 5.5625, "learning_rate": 3.487667181786246e-06, "loss": 0.9440405964851379, "step": 6996 }, { "epoch": 1.2737781013925549, "grad_norm": 15.9375, "learning_rate": 3.486924121944753e-06, "loss": 1.4143867492675781, "step": 6998 }, { "epoch": 1.274142168016747, "grad_norm": 17.125, "learning_rate": 3.4861809906491182e-06, "loss": 1.859867811203003, "step": 7000 }, { "epoch": 1.2745062346409393, "grad_norm": 18.625, "learning_rate": 3.4854377880083932e-06, "loss": 1.5393643379211426, "step": 7002 }, { "epoch": 1.2748703012651315, "grad_norm": 13.6875, "learning_rate": 3.4846945141316413e-06, "loss": 1.5204248428344727, "step": 7004 }, { "epoch": 1.2752343678893237, "grad_norm": 27.375, "learning_rate": 3.4839511691279327e-06, "loss": 1.5938551425933838, "step": 7006 }, { "epoch": 1.2755984345135158, "grad_norm": 13.6875, "learning_rate": 3.4832077531063514e-06, "loss": 1.9839081764221191, "step": 7008 }, { "epoch": 1.2759625011377083, "grad_norm": 8.125, "learning_rate": 3.48246426617599e-06, "loss": 1.6252151727676392, "step": 7010 }, { "epoch": 1.2763265677619005, "grad_norm": 9.5, "learning_rate": 3.4817207084459537e-06, "loss": 1.3268239498138428, "step": 7012 }, { "epoch": 1.2766906343860926, "grad_norm": 5.84375, "learning_rate": 3.480977080025356e-06, "loss": 1.513934850692749, "step": 7014 }, { "epoch": 1.2770547010102848, "grad_norm": 2.859375, "learning_rate": 3.48023338102332e-06, "loss": 1.0915807485580444, "step": 7016 }, { "epoch": 1.277418767634477, "grad_norm": 19.5, "learning_rate": 3.479489611548982e-06, "loss": 1.2613751888275146, "step": 7018 }, { "epoch": 1.2777828342586695, "grad_norm": 10.8125, "learning_rate": 3.478745771711487e-06, "loss": 1.4437661170959473, "step": 7020 }, { "epoch": 1.2781469008828616, "grad_norm": 10.4375, "learning_rate": 3.4780018616199894e-06, "loss": 1.5811054706573486, "step": 7022 }, { "epoch": 1.2785109675070538, "grad_norm": 17.125, "learning_rate": 3.477257881383658e-06, "loss": 1.7467256784439087, "step": 7024 }, { "epoch": 1.278875034131246, "grad_norm": 81.0, "learning_rate": 3.4765138311116643e-06, "loss": 1.7535210847854614, "step": 7026 }, { "epoch": 1.2792391007554382, "grad_norm": 10.25, "learning_rate": 3.475769710913197e-06, "loss": 1.1360485553741455, "step": 7028 }, { "epoch": 1.2796031673796304, "grad_norm": 8.6875, "learning_rate": 3.475025520897454e-06, "loss": 1.4009417295455933, "step": 7030 }, { "epoch": 1.2799672340038226, "grad_norm": 9.0625, "learning_rate": 3.4742812611736397e-06, "loss": 1.3411486148834229, "step": 7032 }, { "epoch": 1.2803313006280148, "grad_norm": 10.0625, "learning_rate": 3.473536931850974e-06, "loss": 0.9136326909065247, "step": 7034 }, { "epoch": 1.2806953672522072, "grad_norm": 7.0, "learning_rate": 3.4727925330386814e-06, "loss": 1.4404561519622803, "step": 7036 }, { "epoch": 1.2810594338763994, "grad_norm": 10.1875, "learning_rate": 3.472048064846001e-06, "loss": 1.3468619585037231, "step": 7038 }, { "epoch": 1.2814235005005916, "grad_norm": 12.25, "learning_rate": 3.47130352738218e-06, "loss": 0.9277386665344238, "step": 7040 }, { "epoch": 1.2817875671247838, "grad_norm": 21.5, "learning_rate": 3.470558920756476e-06, "loss": 1.6297842264175415, "step": 7042 }, { "epoch": 1.282151633748976, "grad_norm": 15.875, "learning_rate": 3.4698142450781584e-06, "loss": 1.3305178880691528, "step": 7044 }, { "epoch": 1.2825157003731684, "grad_norm": 9.875, "learning_rate": 3.4690695004565044e-06, "loss": 1.3600209951400757, "step": 7046 }, { "epoch": 1.2828797669973606, "grad_norm": 12.6875, "learning_rate": 3.4683246870008015e-06, "loss": 1.4870193004608154, "step": 7048 }, { "epoch": 1.2832438336215528, "grad_norm": 12.5, "learning_rate": 3.467579804820348e-06, "loss": 1.4263830184936523, "step": 7050 }, { "epoch": 1.283607900245745, "grad_norm": 7.03125, "learning_rate": 3.4668348540244547e-06, "loss": 1.4817838668823242, "step": 7052 }, { "epoch": 1.2839719668699372, "grad_norm": 10.25, "learning_rate": 3.4660898347224377e-06, "loss": 1.239769697189331, "step": 7054 }, { "epoch": 1.2843360334941294, "grad_norm": 5.875, "learning_rate": 3.4653447470236258e-06, "loss": 1.0721032619476318, "step": 7056 }, { "epoch": 1.2847001001183216, "grad_norm": 10.3125, "learning_rate": 3.4645995910373585e-06, "loss": 1.2548054456710815, "step": 7058 }, { "epoch": 1.2850641667425138, "grad_norm": 8.5, "learning_rate": 3.463854366872984e-06, "loss": 1.3587526082992554, "step": 7060 }, { "epoch": 1.285428233366706, "grad_norm": 6.90625, "learning_rate": 3.4631090746398614e-06, "loss": 1.4787750244140625, "step": 7062 }, { "epoch": 1.2857922999908984, "grad_norm": 6.71875, "learning_rate": 3.462363714447359e-06, "loss": 1.5639283657073975, "step": 7064 }, { "epoch": 1.2861563666150906, "grad_norm": 9.4375, "learning_rate": 3.461618286404855e-06, "loss": 1.121364712715149, "step": 7066 }, { "epoch": 1.2865204332392828, "grad_norm": 15.625, "learning_rate": 3.4608727906217387e-06, "loss": 1.3390510082244873, "step": 7068 }, { "epoch": 1.286884499863475, "grad_norm": 8.25, "learning_rate": 3.460127227207407e-06, "loss": 1.2885761260986328, "step": 7070 }, { "epoch": 1.2872485664876672, "grad_norm": 11.25, "learning_rate": 3.459381596271271e-06, "loss": 1.5833566188812256, "step": 7072 }, { "epoch": 1.2876126331118596, "grad_norm": 20.125, "learning_rate": 3.458635897922746e-06, "loss": 1.2856498956680298, "step": 7074 }, { "epoch": 1.2879766997360518, "grad_norm": 11.5, "learning_rate": 3.457890132271263e-06, "loss": 1.6123485565185547, "step": 7076 }, { "epoch": 1.288340766360244, "grad_norm": 11.8125, "learning_rate": 3.4571442994262572e-06, "loss": 1.3298993110656738, "step": 7078 }, { "epoch": 1.2887048329844362, "grad_norm": 9.4375, "learning_rate": 3.4563983994971794e-06, "loss": 1.0125114917755127, "step": 7080 }, { "epoch": 1.2890688996086284, "grad_norm": 2.875, "learning_rate": 3.455652432593486e-06, "loss": 1.1476223468780518, "step": 7082 }, { "epoch": 1.2894329662328206, "grad_norm": 3.96875, "learning_rate": 3.4549063988246445e-06, "loss": 1.3313473463058472, "step": 7084 }, { "epoch": 1.2897970328570127, "grad_norm": 9.25, "learning_rate": 3.4541602983001322e-06, "loss": 1.0937621593475342, "step": 7086 }, { "epoch": 1.290161099481205, "grad_norm": 11.0, "learning_rate": 3.4534141311294368e-06, "loss": 1.4370759725570679, "step": 7088 }, { "epoch": 1.2905251661053974, "grad_norm": 6.65625, "learning_rate": 3.4526678974220552e-06, "loss": 1.4123831987380981, "step": 7090 }, { "epoch": 1.2908892327295896, "grad_norm": 6.65625, "learning_rate": 3.4519215972874942e-06, "loss": 1.4852166175842285, "step": 7092 }, { "epoch": 1.2912532993537817, "grad_norm": 9.3125, "learning_rate": 3.451175230835271e-06, "loss": 1.3300797939300537, "step": 7094 }, { "epoch": 1.291617365977974, "grad_norm": 16.5, "learning_rate": 3.4504287981749103e-06, "loss": 1.1346608400344849, "step": 7096 }, { "epoch": 1.2919814326021661, "grad_norm": 9.0625, "learning_rate": 3.4496822994159483e-06, "loss": 0.5941730737686157, "step": 7098 }, { "epoch": 1.2923454992263586, "grad_norm": 7.03125, "learning_rate": 3.448935734667932e-06, "loss": 1.2391228675842285, "step": 7100 }, { "epoch": 1.2927095658505507, "grad_norm": 7.78125, "learning_rate": 3.448189104040416e-06, "loss": 1.575432300567627, "step": 7102 }, { "epoch": 1.293073632474743, "grad_norm": 16.25, "learning_rate": 3.4474424076429658e-06, "loss": 1.3727679252624512, "step": 7104 }, { "epoch": 1.2934376990989351, "grad_norm": 8.1875, "learning_rate": 3.446695645585155e-06, "loss": 1.3660389184951782, "step": 7106 }, { "epoch": 1.2938017657231273, "grad_norm": 62.75, "learning_rate": 3.4459488179765683e-06, "loss": 1.4868298768997192, "step": 7108 }, { "epoch": 1.2941658323473195, "grad_norm": 18.25, "learning_rate": 3.4452019249268004e-06, "loss": 1.3551559448242188, "step": 7110 }, { "epoch": 1.2945298989715117, "grad_norm": 3.90625, "learning_rate": 3.444454966545454e-06, "loss": 1.2077412605285645, "step": 7112 }, { "epoch": 1.294893965595704, "grad_norm": 9.9375, "learning_rate": 3.443707942942143e-06, "loss": 1.2375282049179077, "step": 7114 }, { "epoch": 1.295258032219896, "grad_norm": 5.65625, "learning_rate": 3.442960854226489e-06, "loss": 1.2822661399841309, "step": 7116 }, { "epoch": 1.2956220988440885, "grad_norm": 14.875, "learning_rate": 3.4422137005081245e-06, "loss": 1.3456642627716064, "step": 7118 }, { "epoch": 1.2959861654682807, "grad_norm": 21.5, "learning_rate": 3.441466481896692e-06, "loss": 1.228467583656311, "step": 7120 }, { "epoch": 1.296350232092473, "grad_norm": 21.5, "learning_rate": 3.440719198501842e-06, "loss": 1.5472502708435059, "step": 7122 }, { "epoch": 1.296714298716665, "grad_norm": 8.6875, "learning_rate": 3.439971850433237e-06, "loss": 1.807754397392273, "step": 7124 }, { "epoch": 1.2970783653408573, "grad_norm": 9.0625, "learning_rate": 3.4392244378005445e-06, "loss": 1.0491812229156494, "step": 7126 }, { "epoch": 1.2974424319650497, "grad_norm": 23.5, "learning_rate": 3.438476960713446e-06, "loss": 1.295921802520752, "step": 7128 }, { "epoch": 1.297806498589242, "grad_norm": 6.25, "learning_rate": 3.4377294192816315e-06, "loss": 0.13925348222255707, "step": 7130 }, { "epoch": 1.298170565213434, "grad_norm": 25.125, "learning_rate": 3.4369818136147976e-06, "loss": 0.4353587031364441, "step": 7132 }, { "epoch": 1.2985346318376263, "grad_norm": 17.25, "learning_rate": 3.436234143822654e-06, "loss": 1.3399690389633179, "step": 7134 }, { "epoch": 1.2988986984618185, "grad_norm": 40.5, "learning_rate": 3.4354864100149175e-06, "loss": 0.9734708666801453, "step": 7136 }, { "epoch": 1.2992627650860107, "grad_norm": 5.84375, "learning_rate": 3.434738612301315e-06, "loss": 1.5604196786880493, "step": 7138 }, { "epoch": 1.2996268317102029, "grad_norm": 6.03125, "learning_rate": 3.433990750791584e-06, "loss": 1.4461499452590942, "step": 7140 }, { "epoch": 1.299990898334395, "grad_norm": 6.1875, "learning_rate": 3.4332428255954686e-06, "loss": 0.9172034859657288, "step": 7142 }, { "epoch": 1.3003549649585875, "grad_norm": 10.0625, "learning_rate": 3.432494836822724e-06, "loss": 1.5358330011367798, "step": 7144 }, { "epoch": 1.3007190315827797, "grad_norm": 10.0625, "learning_rate": 3.4317467845831153e-06, "loss": 1.2882869243621826, "step": 7146 }, { "epoch": 1.3010830982069719, "grad_norm": 15.0625, "learning_rate": 3.430998668986415e-06, "loss": 1.06436026096344, "step": 7148 }, { "epoch": 1.301447164831164, "grad_norm": 14.875, "learning_rate": 3.430250490142407e-06, "loss": 1.4392189979553223, "step": 7150 }, { "epoch": 1.3018112314553563, "grad_norm": 41.5, "learning_rate": 3.429502248160883e-06, "loss": 1.3414818048477173, "step": 7152 }, { "epoch": 1.3021752980795487, "grad_norm": 13.9375, "learning_rate": 3.428753943151646e-06, "loss": 0.6288173794746399, "step": 7154 }, { "epoch": 1.3025393647037409, "grad_norm": 338.0, "learning_rate": 3.4280055752245044e-06, "loss": 1.8327256441116333, "step": 7156 }, { "epoch": 1.302903431327933, "grad_norm": 37.25, "learning_rate": 3.427257144489279e-06, "loss": 1.4023244380950928, "step": 7158 }, { "epoch": 1.3032674979521253, "grad_norm": 13.6875, "learning_rate": 3.4265086510557986e-06, "loss": 1.4952585697174072, "step": 7160 }, { "epoch": 1.3036315645763175, "grad_norm": 14.4375, "learning_rate": 3.425760095033903e-06, "loss": 1.4060511589050293, "step": 7162 }, { "epoch": 1.3039956312005097, "grad_norm": 16.5, "learning_rate": 3.4250114765334397e-06, "loss": 1.3897628784179688, "step": 7164 }, { "epoch": 1.3043596978247018, "grad_norm": 7.375, "learning_rate": 3.424262795664264e-06, "loss": 0.8717978596687317, "step": 7166 }, { "epoch": 1.304723764448894, "grad_norm": 26.125, "learning_rate": 3.4235140525362414e-06, "loss": 1.1149710416793823, "step": 7168 }, { "epoch": 1.3050878310730865, "grad_norm": 3.546875, "learning_rate": 3.4227652472592487e-06, "loss": 0.5271800756454468, "step": 7170 }, { "epoch": 1.3054518976972787, "grad_norm": 7.71875, "learning_rate": 3.4220163799431693e-06, "loss": 1.3105355501174927, "step": 7172 }, { "epoch": 1.3058159643214708, "grad_norm": 53.75, "learning_rate": 3.421267450697897e-06, "loss": 1.281732201576233, "step": 7174 }, { "epoch": 1.306180030945663, "grad_norm": 7.96875, "learning_rate": 3.420518459633333e-06, "loss": 1.5820822715759277, "step": 7176 }, { "epoch": 1.3065440975698552, "grad_norm": 4.0625, "learning_rate": 3.419769406859389e-06, "loss": 0.9697157740592957, "step": 7178 }, { "epoch": 1.3069081641940474, "grad_norm": 8.0625, "learning_rate": 3.419020292485986e-06, "loss": 1.304370641708374, "step": 7180 }, { "epoch": 1.3072722308182398, "grad_norm": 8.375, "learning_rate": 3.418271116623053e-06, "loss": 1.3723695278167725, "step": 7182 }, { "epoch": 1.307636297442432, "grad_norm": 6.75, "learning_rate": 3.4175218793805297e-06, "loss": 1.2795096635818481, "step": 7184 }, { "epoch": 1.3080003640666242, "grad_norm": 9.9375, "learning_rate": 3.416772580868362e-06, "loss": 1.1621792316436768, "step": 7186 }, { "epoch": 1.3083644306908164, "grad_norm": 12.0625, "learning_rate": 3.4160232211965072e-06, "loss": 1.3562220335006714, "step": 7188 }, { "epoch": 1.3087284973150086, "grad_norm": 41.5, "learning_rate": 3.4152738004749297e-06, "loss": 1.2786340713500977, "step": 7190 }, { "epoch": 1.3090925639392008, "grad_norm": 22.5, "learning_rate": 3.414524318813606e-06, "loss": 0.7081055045127869, "step": 7192 }, { "epoch": 1.309456630563393, "grad_norm": 8.5, "learning_rate": 3.4137747763225186e-06, "loss": 1.072160243988037, "step": 7194 }, { "epoch": 1.3098206971875852, "grad_norm": 15.375, "learning_rate": 3.413025173111659e-06, "loss": 1.3887577056884766, "step": 7196 }, { "epoch": 1.3101847638117776, "grad_norm": 16.5, "learning_rate": 3.41227550929103e-06, "loss": 1.5575710535049438, "step": 7198 }, { "epoch": 1.3105488304359698, "grad_norm": 20.5, "learning_rate": 3.4115257849706394e-06, "loss": 1.4033201932907104, "step": 7200 }, { "epoch": 1.310912897060162, "grad_norm": 8.125, "learning_rate": 3.4107760002605086e-06, "loss": 1.2694982290267944, "step": 7202 }, { "epoch": 1.3112769636843542, "grad_norm": 26.875, "learning_rate": 3.410026155270665e-06, "loss": 1.1874176263809204, "step": 7204 }, { "epoch": 1.3116410303085464, "grad_norm": 17.0, "learning_rate": 3.409276250111143e-06, "loss": 1.0976594686508179, "step": 7206 }, { "epoch": 1.3120050969327388, "grad_norm": 6.1875, "learning_rate": 3.408526284891991e-06, "loss": 1.2550384998321533, "step": 7208 }, { "epoch": 1.312369163556931, "grad_norm": 12.125, "learning_rate": 3.407776259723262e-06, "loss": 1.362528920173645, "step": 7210 }, { "epoch": 1.3127332301811232, "grad_norm": 8.375, "learning_rate": 3.4070261747150203e-06, "loss": 1.1947346925735474, "step": 7212 }, { "epoch": 1.3130972968053154, "grad_norm": 12.875, "learning_rate": 3.4062760299773368e-06, "loss": 1.1582088470458984, "step": 7214 }, { "epoch": 1.3134613634295076, "grad_norm": 84.5, "learning_rate": 3.4055258256202926e-06, "loss": 1.554925799369812, "step": 7216 }, { "epoch": 1.3138254300536998, "grad_norm": 17.75, "learning_rate": 3.4047755617539755e-06, "loss": 1.7379704713821411, "step": 7218 }, { "epoch": 1.314189496677892, "grad_norm": 14.375, "learning_rate": 3.4040252384884862e-06, "loss": 1.432013988494873, "step": 7220 }, { "epoch": 1.3145535633020842, "grad_norm": 9.25, "learning_rate": 3.40327485593393e-06, "loss": 1.4521740674972534, "step": 7222 }, { "epoch": 1.3149176299262766, "grad_norm": 12.5625, "learning_rate": 3.4025244142004244e-06, "loss": 1.1514098644256592, "step": 7224 }, { "epoch": 1.3152816965504688, "grad_norm": 12.0625, "learning_rate": 3.401773913398091e-06, "loss": 1.4416508674621582, "step": 7226 }, { "epoch": 1.315645763174661, "grad_norm": 8.3125, "learning_rate": 3.401023353637064e-06, "loss": 1.4151341915130615, "step": 7228 }, { "epoch": 1.3160098297988532, "grad_norm": 12.375, "learning_rate": 3.4002727350274855e-06, "loss": 1.0667016506195068, "step": 7230 }, { "epoch": 1.3163738964230454, "grad_norm": 10.25, "learning_rate": 3.399522057679505e-06, "loss": 1.279942274093628, "step": 7232 }, { "epoch": 1.3167379630472378, "grad_norm": 10.375, "learning_rate": 3.3987713217032826e-06, "loss": 1.5324357748031616, "step": 7234 }, { "epoch": 1.31710202967143, "grad_norm": 11.1875, "learning_rate": 3.3980205272089837e-06, "loss": 1.7289388179779053, "step": 7236 }, { "epoch": 1.3174660962956222, "grad_norm": 21.375, "learning_rate": 3.3972696743067856e-06, "loss": 1.7450295686721802, "step": 7238 }, { "epoch": 1.3178301629198144, "grad_norm": 14.8125, "learning_rate": 3.396518763106873e-06, "loss": 1.9419946670532227, "step": 7240 }, { "epoch": 1.3181942295440066, "grad_norm": 4.15625, "learning_rate": 3.395767793719439e-06, "loss": 1.2483052015304565, "step": 7242 }, { "epoch": 1.3185582961681988, "grad_norm": 12.1875, "learning_rate": 3.395016766254685e-06, "loss": 1.0632487535476685, "step": 7244 }, { "epoch": 1.318922362792391, "grad_norm": 11.0625, "learning_rate": 3.394265680822822e-06, "loss": 1.5236999988555908, "step": 7246 }, { "epoch": 1.3192864294165831, "grad_norm": 9.4375, "learning_rate": 3.3935145375340673e-06, "loss": 1.2758264541625977, "step": 7248 }, { "epoch": 1.3196504960407753, "grad_norm": 12.8125, "learning_rate": 3.392763336498649e-06, "loss": 1.4826488494873047, "step": 7250 }, { "epoch": 1.3200145626649677, "grad_norm": 10.75, "learning_rate": 3.3920120778268032e-06, "loss": 1.2545771598815918, "step": 7252 }, { "epoch": 1.32037862928916, "grad_norm": 7.46875, "learning_rate": 3.391260761628774e-06, "loss": 1.2271801233291626, "step": 7254 }, { "epoch": 1.3207426959133521, "grad_norm": 5.3125, "learning_rate": 3.390509388014813e-06, "loss": 1.0805290937423706, "step": 7256 }, { "epoch": 1.3211067625375443, "grad_norm": 9.125, "learning_rate": 3.3897579570951824e-06, "loss": 1.6068825721740723, "step": 7258 }, { "epoch": 1.3214708291617365, "grad_norm": 7.625, "learning_rate": 3.3890064689801504e-06, "loss": 1.3820632696151733, "step": 7260 }, { "epoch": 1.321834895785929, "grad_norm": 3.703125, "learning_rate": 3.3882549237799965e-06, "loss": 1.2232885360717773, "step": 7262 }, { "epoch": 1.3221989624101211, "grad_norm": 10.625, "learning_rate": 3.387503321605006e-06, "loss": 1.37892746925354, "step": 7264 }, { "epoch": 1.3225630290343133, "grad_norm": 10.0, "learning_rate": 3.386751662565473e-06, "loss": 1.171010971069336, "step": 7266 }, { "epoch": 1.3229270956585055, "grad_norm": 13.1875, "learning_rate": 3.3859999467717007e-06, "loss": 0.4496696889400482, "step": 7268 }, { "epoch": 1.3232911622826977, "grad_norm": 8.0625, "learning_rate": 3.3852481743340006e-06, "loss": 1.2991750240325928, "step": 7270 }, { "epoch": 1.32365522890689, "grad_norm": 6.875, "learning_rate": 3.384496345362692e-06, "loss": 1.5770032405853271, "step": 7272 }, { "epoch": 1.324019295531082, "grad_norm": 35.5, "learning_rate": 3.383744459968104e-06, "loss": 1.4898180961608887, "step": 7274 }, { "epoch": 1.3243833621552743, "grad_norm": 19.375, "learning_rate": 3.3829925182605717e-06, "loss": 1.7032684087753296, "step": 7276 }, { "epoch": 1.3247474287794667, "grad_norm": 24.875, "learning_rate": 3.3822405203504383e-06, "loss": 1.516749620437622, "step": 7278 }, { "epoch": 1.325111495403659, "grad_norm": 8.375, "learning_rate": 3.381488466348058e-06, "loss": 1.2130012512207031, "step": 7280 }, { "epoch": 1.325475562027851, "grad_norm": 20.625, "learning_rate": 3.3807363563637907e-06, "loss": 1.6764572858810425, "step": 7282 }, { "epoch": 1.3258396286520433, "grad_norm": 13.6875, "learning_rate": 3.379984190508008e-06, "loss": 1.885354995727539, "step": 7284 }, { "epoch": 1.3262036952762355, "grad_norm": 11.375, "learning_rate": 3.3792319688910843e-06, "loss": 1.601582407951355, "step": 7286 }, { "epoch": 1.326567761900428, "grad_norm": 6.21875, "learning_rate": 3.378479691623405e-06, "loss": 1.2295979261398315, "step": 7288 }, { "epoch": 1.32693182852462, "grad_norm": 20.375, "learning_rate": 3.377727358815366e-06, "loss": 0.8671035170555115, "step": 7290 }, { "epoch": 1.3272958951488123, "grad_norm": 28.125, "learning_rate": 3.376974970577367e-06, "loss": 0.6735749840736389, "step": 7292 }, { "epoch": 1.3276599617730045, "grad_norm": 28.875, "learning_rate": 3.3762225270198198e-06, "loss": 1.7925301790237427, "step": 7294 }, { "epoch": 1.3280240283971967, "grad_norm": 18.125, "learning_rate": 3.375470028253141e-06, "loss": 1.4346858263015747, "step": 7296 }, { "epoch": 1.3283880950213889, "grad_norm": 11.625, "learning_rate": 3.374717474387757e-06, "loss": 1.4641952514648438, "step": 7298 }, { "epoch": 1.328752161645581, "grad_norm": 7.59375, "learning_rate": 3.3739648655341027e-06, "loss": 1.3167438507080078, "step": 7300 }, { "epoch": 1.3291162282697733, "grad_norm": 6.59375, "learning_rate": 3.373212201802619e-06, "loss": 1.076387643814087, "step": 7302 }, { "epoch": 1.3294802948939655, "grad_norm": 5.625, "learning_rate": 3.3724594833037583e-06, "loss": 0.8748537302017212, "step": 7304 }, { "epoch": 1.3298443615181579, "grad_norm": 21.25, "learning_rate": 3.3717067101479778e-06, "loss": 1.14386785030365, "step": 7306 }, { "epoch": 1.33020842814235, "grad_norm": 20.875, "learning_rate": 3.3709538824457432e-06, "loss": 1.7059259414672852, "step": 7308 }, { "epoch": 1.3305724947665423, "grad_norm": 17.625, "learning_rate": 3.3702010003075303e-06, "loss": 1.6073601245880127, "step": 7310 }, { "epoch": 1.3309365613907345, "grad_norm": 14.0, "learning_rate": 3.369448063843821e-06, "loss": 1.433451533317566, "step": 7312 }, { "epoch": 1.3313006280149267, "grad_norm": 19.875, "learning_rate": 3.3686950731651048e-06, "loss": 1.8865445852279663, "step": 7314 }, { "epoch": 1.331664694639119, "grad_norm": 11.625, "learning_rate": 3.3679420283818814e-06, "loss": 1.3903053998947144, "step": 7316 }, { "epoch": 1.3320287612633113, "grad_norm": 22.0, "learning_rate": 3.3671889296046567e-06, "loss": 1.2783299684524536, "step": 7318 }, { "epoch": 1.3323928278875035, "grad_norm": 30.625, "learning_rate": 3.366435776943944e-06, "loss": 0.7890225648880005, "step": 7320 }, { "epoch": 1.3327568945116957, "grad_norm": 7.59375, "learning_rate": 3.3656825705102657e-06, "loss": 0.903577446937561, "step": 7322 }, { "epoch": 1.3331209611358878, "grad_norm": 6.0, "learning_rate": 3.3649293104141534e-06, "loss": 1.0759109258651733, "step": 7324 }, { "epoch": 1.33348502776008, "grad_norm": 16.625, "learning_rate": 3.3641759967661435e-06, "loss": 1.5572078227996826, "step": 7326 }, { "epoch": 1.3338490943842722, "grad_norm": 11.3125, "learning_rate": 3.3634226296767815e-06, "loss": 1.291471242904663, "step": 7328 }, { "epoch": 1.3342131610084644, "grad_norm": 272.0, "learning_rate": 3.3626692092566214e-06, "loss": 1.1058037281036377, "step": 7330 }, { "epoch": 1.3345772276326568, "grad_norm": 17.25, "learning_rate": 3.3619157356162245e-06, "loss": 1.7693865299224854, "step": 7332 }, { "epoch": 1.334941294256849, "grad_norm": 6.53125, "learning_rate": 3.3611622088661605e-06, "loss": 0.9201260209083557, "step": 7334 }, { "epoch": 1.3353053608810412, "grad_norm": 12.75, "learning_rate": 3.360408629117007e-06, "loss": 1.304374098777771, "step": 7336 }, { "epoch": 1.3356694275052334, "grad_norm": 7.875, "learning_rate": 3.3596549964793457e-06, "loss": 1.7526408433914185, "step": 7338 }, { "epoch": 1.3360334941294256, "grad_norm": 6.9375, "learning_rate": 3.3589013110637718e-06, "loss": 1.1788661479949951, "step": 7340 }, { "epoch": 1.336397560753618, "grad_norm": 12.375, "learning_rate": 3.3581475729808856e-06, "loss": 1.585633397102356, "step": 7342 }, { "epoch": 1.3367616273778102, "grad_norm": 18.75, "learning_rate": 3.3573937823412945e-06, "loss": 1.6532328128814697, "step": 7344 }, { "epoch": 1.3371256940020024, "grad_norm": 13.0, "learning_rate": 3.3566399392556137e-06, "loss": 1.3763723373413086, "step": 7346 }, { "epoch": 1.3374897606261946, "grad_norm": 27.625, "learning_rate": 3.3558860438344674e-06, "loss": 1.4979712963104248, "step": 7348 }, { "epoch": 1.3378538272503868, "grad_norm": 11.125, "learning_rate": 3.355132096188487e-06, "loss": 1.8226686716079712, "step": 7350 }, { "epoch": 1.338217893874579, "grad_norm": 12.75, "learning_rate": 3.35437809642831e-06, "loss": 1.198443055152893, "step": 7352 }, { "epoch": 1.3385819604987712, "grad_norm": 13.25, "learning_rate": 3.3536240446645833e-06, "loss": 0.7959929704666138, "step": 7354 }, { "epoch": 1.3389460271229634, "grad_norm": 20.125, "learning_rate": 3.3528699410079624e-06, "loss": 1.5173218250274658, "step": 7356 }, { "epoch": 1.3393100937471556, "grad_norm": 22.625, "learning_rate": 3.3521157855691067e-06, "loss": 1.6894701719284058, "step": 7358 }, { "epoch": 1.339674160371348, "grad_norm": 6.78125, "learning_rate": 3.351361578458686e-06, "loss": 1.22219979763031, "step": 7360 }, { "epoch": 1.3400382269955402, "grad_norm": 25.875, "learning_rate": 3.350607319787379e-06, "loss": 1.163588047027588, "step": 7362 }, { "epoch": 1.3404022936197324, "grad_norm": 10.9375, "learning_rate": 3.349853009665868e-06, "loss": 1.2345460653305054, "step": 7364 }, { "epoch": 1.3407663602439246, "grad_norm": 13.0625, "learning_rate": 3.349098648204846e-06, "loss": 1.3027905225753784, "step": 7366 }, { "epoch": 1.3411304268681168, "grad_norm": 17.75, "learning_rate": 3.3483442355150115e-06, "loss": 1.7057050466537476, "step": 7368 }, { "epoch": 1.3414944934923092, "grad_norm": 16.125, "learning_rate": 3.347589771707072e-06, "loss": 2.037951946258545, "step": 7370 }, { "epoch": 1.3418585601165014, "grad_norm": 6.40625, "learning_rate": 3.346835256891743e-06, "loss": 1.3593381643295288, "step": 7372 }, { "epoch": 1.3422226267406936, "grad_norm": 23.5, "learning_rate": 3.346080691179745e-06, "loss": 1.3068010807037354, "step": 7374 }, { "epoch": 1.3425866933648858, "grad_norm": 27.25, "learning_rate": 3.3453260746818093e-06, "loss": 2.273064613342285, "step": 7376 }, { "epoch": 1.342950759989078, "grad_norm": 30.75, "learning_rate": 3.34457140750867e-06, "loss": 1.2065379619598389, "step": 7378 }, { "epoch": 1.3433148266132702, "grad_norm": 12.75, "learning_rate": 3.343816689771074e-06, "loss": 0.9221192002296448, "step": 7380 }, { "epoch": 1.3436788932374624, "grad_norm": 9.0625, "learning_rate": 3.3430619215797717e-06, "loss": 1.5145831108093262, "step": 7382 }, { "epoch": 1.3440429598616546, "grad_norm": 13.5, "learning_rate": 3.3423071030455236e-06, "loss": 1.5732219219207764, "step": 7384 }, { "epoch": 1.344407026485847, "grad_norm": 48.75, "learning_rate": 3.341552234279094e-06, "loss": 1.4417767524719238, "step": 7386 }, { "epoch": 1.3447710931100392, "grad_norm": 16.125, "learning_rate": 3.340797315391259e-06, "loss": 1.5921684503555298, "step": 7388 }, { "epoch": 1.3451351597342314, "grad_norm": 14.6875, "learning_rate": 3.340042346492799e-06, "loss": 1.4667093753814697, "step": 7390 }, { "epoch": 1.3454992263584236, "grad_norm": 5.28125, "learning_rate": 3.3392873276945025e-06, "loss": 0.9612140655517578, "step": 7392 }, { "epoch": 1.3458632929826158, "grad_norm": 20.0, "learning_rate": 3.3385322591071663e-06, "loss": 0.946083664894104, "step": 7394 }, { "epoch": 1.3462273596068082, "grad_norm": 6.65625, "learning_rate": 3.3377771408415926e-06, "loss": 0.8163317441940308, "step": 7396 }, { "epoch": 1.3465914262310004, "grad_norm": 10.0625, "learning_rate": 3.3370219730085923e-06, "loss": 1.4150222539901733, "step": 7398 }, { "epoch": 1.3469554928551926, "grad_norm": 38.25, "learning_rate": 3.336266755718983e-06, "loss": 1.4804495573043823, "step": 7400 }, { "epoch": 1.3473195594793848, "grad_norm": 51.5, "learning_rate": 3.3355114890835917e-06, "loss": 1.352971076965332, "step": 7402 }, { "epoch": 1.347683626103577, "grad_norm": 18.625, "learning_rate": 3.3347561732132473e-06, "loss": 1.3537341356277466, "step": 7404 }, { "epoch": 1.3480476927277691, "grad_norm": 8.3125, "learning_rate": 3.3340008082187917e-06, "loss": 1.237910270690918, "step": 7406 }, { "epoch": 1.3484117593519613, "grad_norm": 9.875, "learning_rate": 3.333245394211071e-06, "loss": 0.7169888615608215, "step": 7408 }, { "epoch": 1.3487758259761535, "grad_norm": 5.875, "learning_rate": 3.3324899313009397e-06, "loss": 1.3834115266799927, "step": 7410 }, { "epoch": 1.3491398926003457, "grad_norm": 5.65625, "learning_rate": 3.331734419599258e-06, "loss": 1.1051740646362305, "step": 7412 }, { "epoch": 1.3495039592245381, "grad_norm": 20.375, "learning_rate": 3.3309788592168947e-06, "loss": 1.2976346015930176, "step": 7414 }, { "epoch": 1.3498680258487303, "grad_norm": 11.6875, "learning_rate": 3.3302232502647246e-06, "loss": 1.2372989654541016, "step": 7416 }, { "epoch": 1.3502320924729225, "grad_norm": 5.03125, "learning_rate": 3.329467592853631e-06, "loss": 0.9661650657653809, "step": 7418 }, { "epoch": 1.3505961590971147, "grad_norm": 10.875, "learning_rate": 3.3287118870945043e-06, "loss": 1.5318711996078491, "step": 7420 }, { "epoch": 1.350960225721307, "grad_norm": 10.0625, "learning_rate": 3.3279561330982403e-06, "loss": 1.3106411695480347, "step": 7422 }, { "epoch": 1.3513242923454993, "grad_norm": 24.375, "learning_rate": 3.3272003309757415e-06, "loss": 1.2864681482315063, "step": 7424 }, { "epoch": 1.3516883589696915, "grad_norm": 6.375, "learning_rate": 3.3264444808379214e-06, "loss": 1.2805230617523193, "step": 7426 }, { "epoch": 1.3520524255938837, "grad_norm": 24.125, "learning_rate": 3.3256885827956965e-06, "loss": 1.5450854301452637, "step": 7428 }, { "epoch": 1.352416492218076, "grad_norm": 11.625, "learning_rate": 3.324932636959991e-06, "loss": 1.7074167728424072, "step": 7430 }, { "epoch": 1.352780558842268, "grad_norm": 91.0, "learning_rate": 3.3241766434417386e-06, "loss": 1.6927354335784912, "step": 7432 }, { "epoch": 1.3531446254664603, "grad_norm": 14.625, "learning_rate": 3.3234206023518776e-06, "loss": 0.9706486463546753, "step": 7434 }, { "epoch": 1.3535086920906525, "grad_norm": 25.625, "learning_rate": 3.322664513801355e-06, "loss": 1.6820039749145508, "step": 7436 }, { "epoch": 1.3538727587148447, "grad_norm": 91.5, "learning_rate": 3.3219083779011204e-06, "loss": 2.1871206760406494, "step": 7438 }, { "epoch": 1.354236825339037, "grad_norm": 24.0, "learning_rate": 3.321152194762137e-06, "loss": 1.5362269878387451, "step": 7440 }, { "epoch": 1.3546008919632293, "grad_norm": 10.75, "learning_rate": 3.320395964495371e-06, "loss": 1.5154311656951904, "step": 7442 }, { "epoch": 1.3549649585874215, "grad_norm": 14.0625, "learning_rate": 3.3196396872117943e-06, "loss": 1.6648980379104614, "step": 7444 }, { "epoch": 1.3553290252116137, "grad_norm": 15.125, "learning_rate": 3.3188833630223905e-06, "loss": 1.7354077100753784, "step": 7446 }, { "epoch": 1.3556930918358059, "grad_norm": 9.25, "learning_rate": 3.318126992038144e-06, "loss": 1.5520142316818237, "step": 7448 }, { "epoch": 1.3560571584599983, "grad_norm": 11.0625, "learning_rate": 3.3173705743700517e-06, "loss": 1.2338981628417969, "step": 7450 }, { "epoch": 1.3564212250841905, "grad_norm": 17.0, "learning_rate": 3.316614110129114e-06, "loss": 1.5115392208099365, "step": 7452 }, { "epoch": 1.3567852917083827, "grad_norm": 16.875, "learning_rate": 3.3158575994263383e-06, "loss": 1.394870638847351, "step": 7454 }, { "epoch": 1.3571493583325749, "grad_norm": 30.0, "learning_rate": 3.3151010423727402e-06, "loss": 1.5799367427825928, "step": 7456 }, { "epoch": 1.357513424956767, "grad_norm": 12.9375, "learning_rate": 3.3143444390793404e-06, "loss": 1.8644154071807861, "step": 7458 }, { "epoch": 1.3578774915809593, "grad_norm": 6.375, "learning_rate": 3.313587789657169e-06, "loss": 1.3400384187698364, "step": 7460 }, { "epoch": 1.3582415582051515, "grad_norm": 9.0, "learning_rate": 3.312831094217259e-06, "loss": 1.5915063619613647, "step": 7462 }, { "epoch": 1.3586056248293437, "grad_norm": 10.125, "learning_rate": 3.3120743528706556e-06, "loss": 1.1541141271591187, "step": 7464 }, { "epoch": 1.358969691453536, "grad_norm": 16.625, "learning_rate": 3.3113175657284048e-06, "loss": 0.5486347675323486, "step": 7466 }, { "epoch": 1.3593337580777283, "grad_norm": 7.3125, "learning_rate": 3.310560732901562e-06, "loss": 1.2431353330612183, "step": 7468 }, { "epoch": 1.3596978247019205, "grad_norm": 15.4375, "learning_rate": 3.3098038545011914e-06, "loss": 1.6641631126403809, "step": 7470 }, { "epoch": 1.3600618913261127, "grad_norm": 12.5, "learning_rate": 3.30904693063836e-06, "loss": 1.1571792364120483, "step": 7472 }, { "epoch": 1.3604259579503049, "grad_norm": 13.3125, "learning_rate": 3.308289961424145e-06, "loss": 1.4395567178726196, "step": 7474 }, { "epoch": 1.3607900245744973, "grad_norm": 14.75, "learning_rate": 3.307532946969627e-06, "loss": 1.4999630451202393, "step": 7476 }, { "epoch": 1.3611540911986895, "grad_norm": 13.75, "learning_rate": 3.306775887385895e-06, "loss": 1.7517006397247314, "step": 7478 }, { "epoch": 1.3615181578228817, "grad_norm": 34.0, "learning_rate": 3.306018782784044e-06, "loss": 1.6111230850219727, "step": 7480 }, { "epoch": 1.3618822244470739, "grad_norm": 6.8125, "learning_rate": 3.3052616332751785e-06, "loss": 1.0036317110061646, "step": 7482 }, { "epoch": 1.362246291071266, "grad_norm": 24.5, "learning_rate": 3.304504438970404e-06, "loss": 1.4738763570785522, "step": 7484 }, { "epoch": 1.3626103576954582, "grad_norm": 12.375, "learning_rate": 3.3037471999808383e-06, "loss": 0.6481763124465942, "step": 7486 }, { "epoch": 1.3629744243196504, "grad_norm": 13.75, "learning_rate": 3.3029899164176015e-06, "loss": 1.3967748880386353, "step": 7488 }, { "epoch": 1.3633384909438426, "grad_norm": 31.25, "learning_rate": 3.3022325883918226e-06, "loss": 1.840897798538208, "step": 7490 }, { "epoch": 1.3637025575680348, "grad_norm": 10.75, "learning_rate": 3.301475216014636e-06, "loss": 1.540225625038147, "step": 7492 }, { "epoch": 1.3640666241922272, "grad_norm": 18.25, "learning_rate": 3.300717799397183e-06, "loss": 1.142910122871399, "step": 7494 }, { "epoch": 1.3644306908164194, "grad_norm": 15.9375, "learning_rate": 3.299960338650612e-06, "loss": 0.9440374374389648, "step": 7496 }, { "epoch": 1.3647947574406116, "grad_norm": 6.34375, "learning_rate": 3.2992028338860758e-06, "loss": 1.3031357526779175, "step": 7498 }, { "epoch": 1.3651588240648038, "grad_norm": 3.484375, "learning_rate": 3.2984452852147376e-06, "loss": 1.032446265220642, "step": 7500 }, { "epoch": 1.365522890688996, "grad_norm": 6.71875, "learning_rate": 3.297687692747763e-06, "loss": 1.207587480545044, "step": 7502 }, { "epoch": 1.3658869573131884, "grad_norm": 6.6875, "learning_rate": 3.296930056596326e-06, "loss": 1.4006855487823486, "step": 7504 }, { "epoch": 1.3662510239373806, "grad_norm": 8.1875, "learning_rate": 3.296172376871607e-06, "loss": 1.3404649496078491, "step": 7506 }, { "epoch": 1.3666150905615728, "grad_norm": 10.9375, "learning_rate": 3.295414653684791e-06, "loss": 1.4360241889953613, "step": 7508 }, { "epoch": 1.366979157185765, "grad_norm": 7.21875, "learning_rate": 3.294656887147072e-06, "loss": 1.2095770835876465, "step": 7510 }, { "epoch": 1.3673432238099572, "grad_norm": 29.875, "learning_rate": 3.2938990773696493e-06, "loss": 1.4648830890655518, "step": 7512 }, { "epoch": 1.3677072904341494, "grad_norm": 20.125, "learning_rate": 3.293141224463728e-06, "loss": 1.4174734354019165, "step": 7514 }, { "epoch": 1.3680713570583416, "grad_norm": 16.25, "learning_rate": 3.2923833285405206e-06, "loss": 1.0182284116744995, "step": 7516 }, { "epoch": 1.3684354236825338, "grad_norm": 6.4375, "learning_rate": 3.2916253897112426e-06, "loss": 1.3506553173065186, "step": 7518 }, { "epoch": 1.3687994903067262, "grad_norm": 8.5, "learning_rate": 3.290867408087122e-06, "loss": 1.3320130109786987, "step": 7520 }, { "epoch": 1.3691635569309184, "grad_norm": 8.0625, "learning_rate": 3.2901093837793884e-06, "loss": 1.3439966440200806, "step": 7522 }, { "epoch": 1.3695276235551106, "grad_norm": 12.5625, "learning_rate": 3.2893513168992773e-06, "loss": 1.424302339553833, "step": 7524 }, { "epoch": 1.3698916901793028, "grad_norm": 23.125, "learning_rate": 3.2885932075580352e-06, "loss": 1.4320043325424194, "step": 7526 }, { "epoch": 1.370255756803495, "grad_norm": 9.625, "learning_rate": 3.287835055866907e-06, "loss": 1.3840255737304688, "step": 7528 }, { "epoch": 1.3706198234276874, "grad_norm": 8.875, "learning_rate": 3.287076861937152e-06, "loss": 1.166704535484314, "step": 7530 }, { "epoch": 1.3709838900518796, "grad_norm": 9.5625, "learning_rate": 3.2863186258800307e-06, "loss": 1.2772608995437622, "step": 7532 }, { "epoch": 1.3713479566760718, "grad_norm": 12.625, "learning_rate": 3.2855603478068114e-06, "loss": 1.3996284008026123, "step": 7534 }, { "epoch": 1.371712023300264, "grad_norm": 14.25, "learning_rate": 3.284802027828769e-06, "loss": 1.4800236225128174, "step": 7536 }, { "epoch": 1.3720760899244562, "grad_norm": 14.6875, "learning_rate": 3.2840436660571815e-06, "loss": 1.622351884841919, "step": 7538 }, { "epoch": 1.3724401565486484, "grad_norm": 11.0, "learning_rate": 3.2832852626033383e-06, "loss": 1.3180458545684814, "step": 7540 }, { "epoch": 1.3728042231728406, "grad_norm": 45.0, "learning_rate": 3.2825268175785312e-06, "loss": 1.598387360572815, "step": 7542 }, { "epoch": 1.3731682897970328, "grad_norm": 6.125, "learning_rate": 3.2817683310940584e-06, "loss": 1.235984444618225, "step": 7544 }, { "epoch": 1.373532356421225, "grad_norm": 12.25, "learning_rate": 3.2810098032612246e-06, "loss": 1.0352967977523804, "step": 7546 }, { "epoch": 1.3738964230454174, "grad_norm": 15.1875, "learning_rate": 3.280251234191341e-06, "loss": 1.1905550956726074, "step": 7548 }, { "epoch": 1.3742604896696096, "grad_norm": 11.75, "learning_rate": 3.2794926239957246e-06, "loss": 1.5297881364822388, "step": 7550 }, { "epoch": 1.3746245562938018, "grad_norm": 10.0625, "learning_rate": 3.2787339727856993e-06, "loss": 1.4633986949920654, "step": 7552 }, { "epoch": 1.374988622917994, "grad_norm": 22.25, "learning_rate": 3.277975280672592e-06, "loss": 1.7917207479476929, "step": 7554 }, { "epoch": 1.3753526895421861, "grad_norm": 23.125, "learning_rate": 3.2772165477677394e-06, "loss": 1.7453250885009766, "step": 7556 }, { "epoch": 1.3757167561663786, "grad_norm": 13.3125, "learning_rate": 3.276457774182481e-06, "loss": 1.842512845993042, "step": 7558 }, { "epoch": 1.3760808227905708, "grad_norm": 10.375, "learning_rate": 3.2756989600281654e-06, "loss": 1.3612158298492432, "step": 7560 }, { "epoch": 1.376444889414763, "grad_norm": 41.25, "learning_rate": 3.2749401054161446e-06, "loss": 1.6692206859588623, "step": 7562 }, { "epoch": 1.3768089560389551, "grad_norm": 13.5, "learning_rate": 3.274181210457777e-06, "loss": 1.8831231594085693, "step": 7564 }, { "epoch": 1.3771730226631473, "grad_norm": 15.125, "learning_rate": 3.2734222752644283e-06, "loss": 1.5002413988113403, "step": 7566 }, { "epoch": 1.3775370892873395, "grad_norm": 14.5, "learning_rate": 3.272663299947468e-06, "loss": 1.6364710330963135, "step": 7568 }, { "epoch": 1.3779011559115317, "grad_norm": 7.21875, "learning_rate": 3.2719042846182746e-06, "loss": 0.9296509027481079, "step": 7570 }, { "epoch": 1.378265222535724, "grad_norm": 8.9375, "learning_rate": 3.2711452293882295e-06, "loss": 0.6318345665931702, "step": 7572 }, { "epoch": 1.3786292891599163, "grad_norm": 3.46875, "learning_rate": 3.2703861343687206e-06, "loss": 1.0426054000854492, "step": 7574 }, { "epoch": 1.3789933557841085, "grad_norm": 10.375, "learning_rate": 3.2696269996711417e-06, "loss": 1.0668225288391113, "step": 7576 }, { "epoch": 1.3793574224083007, "grad_norm": 39.75, "learning_rate": 3.268867825406894e-06, "loss": 1.3814303874969482, "step": 7578 }, { "epoch": 1.379721489032493, "grad_norm": 11.5, "learning_rate": 3.2681086116873817e-06, "loss": 1.4663279056549072, "step": 7580 }, { "epoch": 1.3800855556566851, "grad_norm": 18.25, "learning_rate": 3.267349358624018e-06, "loss": 1.427017331123352, "step": 7582 }, { "epoch": 1.3804496222808775, "grad_norm": 11.5625, "learning_rate": 3.266590066328219e-06, "loss": 0.8903486132621765, "step": 7584 }, { "epoch": 1.3808136889050697, "grad_norm": 14.4375, "learning_rate": 3.2658307349114083e-06, "loss": 1.2753279209136963, "step": 7586 }, { "epoch": 1.381177755529262, "grad_norm": 13.5625, "learning_rate": 3.2650713644850142e-06, "loss": 1.7756521701812744, "step": 7588 }, { "epoch": 1.3815418221534541, "grad_norm": 10.9375, "learning_rate": 3.2643119551604718e-06, "loss": 1.5580977201461792, "step": 7590 }, { "epoch": 1.3819058887776463, "grad_norm": 7.09375, "learning_rate": 3.2635525070492213e-06, "loss": 1.588334321975708, "step": 7592 }, { "epoch": 1.3822699554018385, "grad_norm": 9.875, "learning_rate": 3.2627930202627077e-06, "loss": 1.3302747011184692, "step": 7594 }, { "epoch": 1.3826340220260307, "grad_norm": 16.125, "learning_rate": 3.262033494912385e-06, "loss": 1.4324854612350464, "step": 7596 }, { "epoch": 1.382998088650223, "grad_norm": 15.3125, "learning_rate": 3.2612739311097073e-06, "loss": 1.295045256614685, "step": 7598 }, { "epoch": 1.383362155274415, "grad_norm": 11.875, "learning_rate": 3.26051432896614e-06, "loss": 0.9888509511947632, "step": 7600 }, { "epoch": 1.3837262218986075, "grad_norm": 10.4375, "learning_rate": 3.259754688593151e-06, "loss": 1.4764946699142456, "step": 7602 }, { "epoch": 1.3840902885227997, "grad_norm": 16.5, "learning_rate": 3.2589950101022127e-06, "loss": 1.529345154762268, "step": 7604 }, { "epoch": 1.3844543551469919, "grad_norm": 12.4375, "learning_rate": 3.258235293604808e-06, "loss": 1.0732531547546387, "step": 7606 }, { "epoch": 1.384818421771184, "grad_norm": 4.9375, "learning_rate": 3.257475539212419e-06, "loss": 1.0899908542633057, "step": 7608 }, { "epoch": 1.3851824883953763, "grad_norm": 16.875, "learning_rate": 3.2567157470365386e-06, "loss": 1.6812312602996826, "step": 7610 }, { "epoch": 1.3855465550195687, "grad_norm": 26.5, "learning_rate": 3.255955917188663e-06, "loss": 1.6087589263916016, "step": 7612 }, { "epoch": 1.3859106216437609, "grad_norm": 14.4375, "learning_rate": 3.2551960497802937e-06, "loss": 1.6894302368164062, "step": 7614 }, { "epoch": 1.386274688267953, "grad_norm": 27.875, "learning_rate": 3.254436144922939e-06, "loss": 1.8321233987808228, "step": 7616 }, { "epoch": 1.3866387548921453, "grad_norm": 10.4375, "learning_rate": 3.2536762027281092e-06, "loss": 1.5329599380493164, "step": 7618 }, { "epoch": 1.3870028215163375, "grad_norm": 8.3125, "learning_rate": 3.2529162233073263e-06, "loss": 1.4889445304870605, "step": 7620 }, { "epoch": 1.3873668881405297, "grad_norm": 8.25, "learning_rate": 3.2521562067721126e-06, "loss": 1.4516929388046265, "step": 7622 }, { "epoch": 1.3877309547647219, "grad_norm": 8.75, "learning_rate": 3.2513961532339965e-06, "loss": 1.1148942708969116, "step": 7624 }, { "epoch": 1.388095021388914, "grad_norm": 16.125, "learning_rate": 3.2506360628045153e-06, "loss": 1.6145505905151367, "step": 7626 }, { "epoch": 1.3884590880131065, "grad_norm": 8.1875, "learning_rate": 3.249875935595206e-06, "loss": 1.5374705791473389, "step": 7628 }, { "epoch": 1.3888231546372987, "grad_norm": 11.375, "learning_rate": 3.2491157717176157e-06, "loss": 1.519486665725708, "step": 7630 }, { "epoch": 1.3891872212614909, "grad_norm": 17.125, "learning_rate": 3.248355571283297e-06, "loss": 1.7875479459762573, "step": 7632 }, { "epoch": 1.389551287885683, "grad_norm": 16.625, "learning_rate": 3.2475953344038037e-06, "loss": 1.1493209600448608, "step": 7634 }, { "epoch": 1.3899153545098752, "grad_norm": 15.5, "learning_rate": 3.2468350611906997e-06, "loss": 1.1954141855239868, "step": 7636 }, { "epoch": 1.3902794211340677, "grad_norm": 13.125, "learning_rate": 3.2460747517555493e-06, "loss": 1.4001014232635498, "step": 7638 }, { "epoch": 1.3906434877582599, "grad_norm": 12.9375, "learning_rate": 3.245314406209926e-06, "loss": 1.8974583148956299, "step": 7640 }, { "epoch": 1.391007554382452, "grad_norm": 16.125, "learning_rate": 3.2445540246654095e-06, "loss": 1.4770878553390503, "step": 7642 }, { "epoch": 1.3913716210066442, "grad_norm": 7.125, "learning_rate": 3.2437936072335795e-06, "loss": 1.1289637088775635, "step": 7644 }, { "epoch": 1.3917356876308364, "grad_norm": 7.375, "learning_rate": 3.2430331540260275e-06, "loss": 1.5044817924499512, "step": 7646 }, { "epoch": 1.3920997542550286, "grad_norm": 13.1875, "learning_rate": 3.242272665154343e-06, "loss": 1.4327861070632935, "step": 7648 }, { "epoch": 1.3924638208792208, "grad_norm": 7.0625, "learning_rate": 3.2415121407301274e-06, "loss": 1.3112051486968994, "step": 7650 }, { "epoch": 1.392827887503413, "grad_norm": 15.0, "learning_rate": 3.2407515808649846e-06, "loss": 1.1587635278701782, "step": 7652 }, { "epoch": 1.3931919541276052, "grad_norm": 20.75, "learning_rate": 3.2399909856705224e-06, "loss": 1.9742004871368408, "step": 7654 }, { "epoch": 1.3935560207517976, "grad_norm": 7.78125, "learning_rate": 3.239230355258356e-06, "loss": 1.7030959129333496, "step": 7656 }, { "epoch": 1.3939200873759898, "grad_norm": 17.625, "learning_rate": 3.2384696897401036e-06, "loss": 1.2186884880065918, "step": 7658 }, { "epoch": 1.394284154000182, "grad_norm": 18.25, "learning_rate": 3.2377089892273917e-06, "loss": 1.8685083389282227, "step": 7660 }, { "epoch": 1.3946482206243742, "grad_norm": 7.21875, "learning_rate": 3.2369482538318485e-06, "loss": 1.0573270320892334, "step": 7662 }, { "epoch": 1.3950122872485664, "grad_norm": 10.6875, "learning_rate": 3.2361874836651085e-06, "loss": 1.3933029174804688, "step": 7664 }, { "epoch": 1.3953763538727588, "grad_norm": 15.3125, "learning_rate": 3.2354266788388146e-06, "loss": 1.525225043296814, "step": 7666 }, { "epoch": 1.395740420496951, "grad_norm": 18.0, "learning_rate": 3.234665839464608e-06, "loss": 0.7856537699699402, "step": 7668 }, { "epoch": 1.3961044871211432, "grad_norm": 11.125, "learning_rate": 3.2339049656541404e-06, "loss": 0.7443108558654785, "step": 7670 }, { "epoch": 1.3964685537453354, "grad_norm": 10.3125, "learning_rate": 3.2331440575190678e-06, "loss": 1.477005958557129, "step": 7672 }, { "epoch": 1.3968326203695276, "grad_norm": 15.1875, "learning_rate": 3.2323831151710494e-06, "loss": 1.56203031539917, "step": 7674 }, { "epoch": 1.3971966869937198, "grad_norm": 17.0, "learning_rate": 3.2316221387217506e-06, "loss": 1.8665440082550049, "step": 7676 }, { "epoch": 1.397560753617912, "grad_norm": 24.375, "learning_rate": 3.2308611282828415e-06, "loss": 1.714219570159912, "step": 7678 }, { "epoch": 1.3979248202421042, "grad_norm": 14.125, "learning_rate": 3.2301000839659972e-06, "loss": 0.9606533050537109, "step": 7680 }, { "epoch": 1.3982888868662966, "grad_norm": 11.1875, "learning_rate": 3.229339005882899e-06, "loss": 1.4967529773712158, "step": 7682 }, { "epoch": 1.3986529534904888, "grad_norm": 10.9375, "learning_rate": 3.2285778941452297e-06, "loss": 1.2466202974319458, "step": 7684 }, { "epoch": 1.399017020114681, "grad_norm": 6.03125, "learning_rate": 3.2278167488646826e-06, "loss": 1.1179149150848389, "step": 7686 }, { "epoch": 1.3993810867388732, "grad_norm": 21.75, "learning_rate": 3.2270555701529496e-06, "loss": 1.2939362525939941, "step": 7688 }, { "epoch": 1.3997451533630654, "grad_norm": 24.625, "learning_rate": 3.2262943581217313e-06, "loss": 1.6130499839782715, "step": 7690 }, { "epoch": 1.4001092199872578, "grad_norm": 13.5, "learning_rate": 3.225533112882734e-06, "loss": 0.6802541017532349, "step": 7692 }, { "epoch": 1.40047328661145, "grad_norm": 10.875, "learning_rate": 3.2247718345476662e-06, "loss": 1.5096487998962402, "step": 7694 }, { "epoch": 1.4008373532356422, "grad_norm": 12.9375, "learning_rate": 3.2240105232282433e-06, "loss": 1.4196727275848389, "step": 7696 }, { "epoch": 1.4012014198598344, "grad_norm": 8.5, "learning_rate": 3.2232491790361832e-06, "loss": 1.2425036430358887, "step": 7698 }, { "epoch": 1.4015654864840266, "grad_norm": 3.5625, "learning_rate": 3.2224878020832105e-06, "loss": 1.2976609468460083, "step": 7700 }, { "epoch": 1.4019295531082188, "grad_norm": 4.90625, "learning_rate": 3.221726392481055e-06, "loss": 1.1224021911621094, "step": 7702 }, { "epoch": 1.402293619732411, "grad_norm": 9.1875, "learning_rate": 3.22096495034145e-06, "loss": 1.3450913429260254, "step": 7704 }, { "epoch": 1.4026576863566032, "grad_norm": 23.625, "learning_rate": 3.2202034757761343e-06, "loss": 1.555578589439392, "step": 7706 }, { "epoch": 1.4030217529807956, "grad_norm": 20.875, "learning_rate": 3.21944196889685e-06, "loss": 1.7363042831420898, "step": 7708 }, { "epoch": 1.4033858196049878, "grad_norm": 23.625, "learning_rate": 3.218680429815346e-06, "loss": 1.8728492259979248, "step": 7710 }, { "epoch": 1.40374988622918, "grad_norm": 27.375, "learning_rate": 3.2179188586433763e-06, "loss": 1.9928492307662964, "step": 7712 }, { "epoch": 1.4041139528533721, "grad_norm": 9.0625, "learning_rate": 3.2171572554926966e-06, "loss": 1.4951566457748413, "step": 7714 }, { "epoch": 1.4044780194775643, "grad_norm": 10.3125, "learning_rate": 3.2163956204750703e-06, "loss": 1.5097541809082031, "step": 7716 }, { "epoch": 1.4048420861017568, "grad_norm": 6.46875, "learning_rate": 3.215633953702263e-06, "loss": 1.2711541652679443, "step": 7718 }, { "epoch": 1.405206152725949, "grad_norm": 11.3125, "learning_rate": 3.2148722552860466e-06, "loss": 1.2776074409484863, "step": 7720 }, { "epoch": 1.4055702193501411, "grad_norm": 123.0, "learning_rate": 3.214110525338199e-06, "loss": 1.2997921705245972, "step": 7722 }, { "epoch": 1.4059342859743333, "grad_norm": 18.125, "learning_rate": 3.2133487639704983e-06, "loss": 1.13933265209198, "step": 7724 }, { "epoch": 1.4062983525985255, "grad_norm": 9.375, "learning_rate": 3.2125869712947313e-06, "loss": 1.4762831926345825, "step": 7726 }, { "epoch": 1.4066624192227177, "grad_norm": 9.75, "learning_rate": 3.211825147422688e-06, "loss": 1.1391164064407349, "step": 7728 }, { "epoch": 1.40702648584691, "grad_norm": 13.3125, "learning_rate": 3.2110632924661623e-06, "loss": 1.6492605209350586, "step": 7730 }, { "epoch": 1.4073905524711021, "grad_norm": 10.3125, "learning_rate": 3.2103014065369543e-06, "loss": 1.4995006322860718, "step": 7732 }, { "epoch": 1.4077546190952943, "grad_norm": 12.8125, "learning_rate": 3.209539489746867e-06, "loss": 1.4288798570632935, "step": 7734 }, { "epoch": 1.4081186857194867, "grad_norm": 12.5, "learning_rate": 3.2087775422077087e-06, "loss": 1.4374701976776123, "step": 7736 }, { "epoch": 1.408482752343679, "grad_norm": 8.875, "learning_rate": 3.2080155640312925e-06, "loss": 1.3923237323760986, "step": 7738 }, { "epoch": 1.4088468189678711, "grad_norm": 23.75, "learning_rate": 3.2072535553294348e-06, "loss": 1.6418286561965942, "step": 7740 }, { "epoch": 1.4092108855920633, "grad_norm": 12.3125, "learning_rate": 3.2064915162139574e-06, "loss": 1.6202239990234375, "step": 7742 }, { "epoch": 1.4095749522162555, "grad_norm": 4.625, "learning_rate": 3.2057294467966882e-06, "loss": 1.4111257791519165, "step": 7744 }, { "epoch": 1.409939018840448, "grad_norm": 7.1875, "learning_rate": 3.204967347189456e-06, "loss": 1.1084165573120117, "step": 7746 }, { "epoch": 1.4103030854646401, "grad_norm": 9.625, "learning_rate": 3.2042052175040955e-06, "loss": 1.6573485136032104, "step": 7748 }, { "epoch": 1.4106671520888323, "grad_norm": 18.5, "learning_rate": 3.203443057852448e-06, "loss": 1.5492092370986938, "step": 7750 }, { "epoch": 1.4110312187130245, "grad_norm": 54.75, "learning_rate": 3.202680868346355e-06, "loss": 1.3896853923797607, "step": 7752 }, { "epoch": 1.4113952853372167, "grad_norm": 18.375, "learning_rate": 3.2019186490976667e-06, "loss": 1.7841663360595703, "step": 7754 }, { "epoch": 1.411759351961409, "grad_norm": 8.3125, "learning_rate": 3.201156400218235e-06, "loss": 1.4492237567901611, "step": 7756 }, { "epoch": 1.412123418585601, "grad_norm": 11.3125, "learning_rate": 3.2003941218199165e-06, "loss": 1.2060620784759521, "step": 7758 }, { "epoch": 1.4124874852097933, "grad_norm": 13.8125, "learning_rate": 3.1996318140145726e-06, "loss": 1.7670797109603882, "step": 7760 }, { "epoch": 1.4128515518339857, "grad_norm": 8.75, "learning_rate": 3.1988694769140695e-06, "loss": 1.3916168212890625, "step": 7762 }, { "epoch": 1.413215618458178, "grad_norm": 13.8125, "learning_rate": 3.1981071106302765e-06, "loss": 1.255885124206543, "step": 7764 }, { "epoch": 1.41357968508237, "grad_norm": 14.4375, "learning_rate": 3.197344715275068e-06, "loss": 1.4936281442642212, "step": 7766 }, { "epoch": 1.4139437517065623, "grad_norm": 9.75, "learning_rate": 3.196582290960322e-06, "loss": 1.6796693801879883, "step": 7768 }, { "epoch": 1.4143078183307545, "grad_norm": 14.5625, "learning_rate": 3.195819837797921e-06, "loss": 1.650527000427246, "step": 7770 }, { "epoch": 1.414671884954947, "grad_norm": 9.1875, "learning_rate": 3.1950573558997532e-06, "loss": 1.208836317062378, "step": 7772 }, { "epoch": 1.415035951579139, "grad_norm": 14.5, "learning_rate": 3.194294845377709e-06, "loss": 1.3354949951171875, "step": 7774 }, { "epoch": 1.4154000182033313, "grad_norm": 20.5, "learning_rate": 3.193532306343683e-06, "loss": 1.5038433074951172, "step": 7776 }, { "epoch": 1.4157640848275235, "grad_norm": 20.5, "learning_rate": 3.1927697389095756e-06, "loss": 1.6682833433151245, "step": 7778 }, { "epoch": 1.4161281514517157, "grad_norm": 6.90625, "learning_rate": 3.1920071431872903e-06, "loss": 1.2686760425567627, "step": 7780 }, { "epoch": 1.4164922180759079, "grad_norm": 11.75, "learning_rate": 3.1912445192887348e-06, "loss": 1.4714316129684448, "step": 7782 }, { "epoch": 1.4168562847001, "grad_norm": 30.25, "learning_rate": 3.1904818673258216e-06, "loss": 1.4260897636413574, "step": 7784 }, { "epoch": 1.4172203513242922, "grad_norm": 7.375, "learning_rate": 3.189719187410466e-06, "loss": 1.0889663696289062, "step": 7786 }, { "epoch": 1.4175844179484844, "grad_norm": 9.1875, "learning_rate": 3.188956479654588e-06, "loss": 1.4181095361709595, "step": 7788 }, { "epoch": 1.4179484845726769, "grad_norm": 4.4375, "learning_rate": 3.1881937441701126e-06, "loss": 1.2934268712997437, "step": 7790 }, { "epoch": 1.418312551196869, "grad_norm": 50.25, "learning_rate": 3.1874309810689686e-06, "loss": 1.2611124515533447, "step": 7792 }, { "epoch": 1.4186766178210612, "grad_norm": 6.5, "learning_rate": 3.1866681904630877e-06, "loss": 1.3409913778305054, "step": 7794 }, { "epoch": 1.4190406844452534, "grad_norm": 31.0, "learning_rate": 3.185905372464405e-06, "loss": 1.5416288375854492, "step": 7796 }, { "epoch": 1.4194047510694456, "grad_norm": 26.75, "learning_rate": 3.185142527184864e-06, "loss": 1.68964684009552, "step": 7798 }, { "epoch": 1.419768817693638, "grad_norm": 13.75, "learning_rate": 3.184379654736407e-06, "loss": 1.438903570175171, "step": 7800 }, { "epoch": 1.4201328843178302, "grad_norm": 22.25, "learning_rate": 3.1836167552309827e-06, "loss": 1.4650827646255493, "step": 7802 }, { "epoch": 1.4204969509420224, "grad_norm": 8.4375, "learning_rate": 3.1828538287805433e-06, "loss": 1.2642356157302856, "step": 7804 }, { "epoch": 1.4208610175662146, "grad_norm": 15.4375, "learning_rate": 3.1820908754970457e-06, "loss": 0.9210084676742554, "step": 7806 }, { "epoch": 1.4212250841904068, "grad_norm": 7.84375, "learning_rate": 3.1813278954924506e-06, "loss": 1.082798957824707, "step": 7808 }, { "epoch": 1.421589150814599, "grad_norm": 34.0, "learning_rate": 3.180564888878721e-06, "loss": 1.328012466430664, "step": 7810 }, { "epoch": 1.4219532174387912, "grad_norm": 12.875, "learning_rate": 3.1798018557678257e-06, "loss": 0.6483087539672852, "step": 7812 }, { "epoch": 1.4223172840629834, "grad_norm": 13.1875, "learning_rate": 3.179038796271737e-06, "loss": 1.42759108543396, "step": 7814 }, { "epoch": 1.4226813506871758, "grad_norm": 5.125, "learning_rate": 3.178275710502431e-06, "loss": 1.2675269842147827, "step": 7816 }, { "epoch": 1.423045417311368, "grad_norm": 3.65625, "learning_rate": 3.1775125985718864e-06, "loss": 0.97649085521698, "step": 7818 }, { "epoch": 1.4234094839355602, "grad_norm": 12.125, "learning_rate": 3.1767494605920877e-06, "loss": 1.5684438943862915, "step": 7820 }, { "epoch": 1.4237735505597524, "grad_norm": 11.75, "learning_rate": 3.175986296675022e-06, "loss": 1.8689886331558228, "step": 7822 }, { "epoch": 1.4241376171839446, "grad_norm": 15.5625, "learning_rate": 3.17522310693268e-06, "loss": 1.0635731220245361, "step": 7824 }, { "epoch": 1.424501683808137, "grad_norm": 15.1875, "learning_rate": 3.1744598914770576e-06, "loss": 1.0783973932266235, "step": 7826 }, { "epoch": 1.4248657504323292, "grad_norm": 18.5, "learning_rate": 3.1736966504201526e-06, "loss": 1.7604498863220215, "step": 7828 }, { "epoch": 1.4252298170565214, "grad_norm": 12.75, "learning_rate": 3.172933383873969e-06, "loss": 1.5512090921401978, "step": 7830 }, { "epoch": 1.4255938836807136, "grad_norm": 11.0625, "learning_rate": 3.1721700919505115e-06, "loss": 1.5707006454467773, "step": 7832 }, { "epoch": 1.4259579503049058, "grad_norm": 20.0, "learning_rate": 3.1714067747617906e-06, "loss": 0.6973305344581604, "step": 7834 }, { "epoch": 1.426322016929098, "grad_norm": 24.0, "learning_rate": 3.170643432419821e-06, "loss": 0.4689348340034485, "step": 7836 }, { "epoch": 1.4266860835532902, "grad_norm": 28.5, "learning_rate": 3.169880065036618e-06, "loss": 0.9014610052108765, "step": 7838 }, { "epoch": 1.4270501501774824, "grad_norm": 10.0625, "learning_rate": 3.169116672724205e-06, "loss": 1.4136472940444946, "step": 7840 }, { "epoch": 1.4274142168016746, "grad_norm": 16.25, "learning_rate": 3.1683532555946052e-06, "loss": 1.6703163385391235, "step": 7842 }, { "epoch": 1.427778283425867, "grad_norm": 8.9375, "learning_rate": 3.167589813759847e-06, "loss": 1.8979815244674683, "step": 7844 }, { "epoch": 1.4281423500500592, "grad_norm": 12.625, "learning_rate": 3.166826347331964e-06, "loss": 1.4808622598648071, "step": 7846 }, { "epoch": 1.4285064166742514, "grad_norm": 19.375, "learning_rate": 3.16606285642299e-06, "loss": 1.4716413021087646, "step": 7848 }, { "epoch": 1.4288704832984436, "grad_norm": 5.46875, "learning_rate": 3.165299341144964e-06, "loss": 1.107454538345337, "step": 7850 }, { "epoch": 1.4292345499226358, "grad_norm": 6.21875, "learning_rate": 3.1645358016099303e-06, "loss": 0.9434410333633423, "step": 7852 }, { "epoch": 1.4295986165468282, "grad_norm": 7.5, "learning_rate": 3.163772237929935e-06, "loss": 1.400646448135376, "step": 7854 }, { "epoch": 1.4299626831710204, "grad_norm": 7.28125, "learning_rate": 3.1630086502170266e-06, "loss": 1.440133810043335, "step": 7856 }, { "epoch": 1.4303267497952126, "grad_norm": 6.1875, "learning_rate": 3.16224503858326e-06, "loss": 1.3168493509292603, "step": 7858 }, { "epoch": 1.4306908164194048, "grad_norm": 7.5625, "learning_rate": 3.1614814031406914e-06, "loss": 1.3910198211669922, "step": 7860 }, { "epoch": 1.431054883043597, "grad_norm": 6.875, "learning_rate": 3.1607177440013816e-06, "loss": 1.4033300876617432, "step": 7862 }, { "epoch": 1.4314189496677892, "grad_norm": 4.3125, "learning_rate": 3.159954061277394e-06, "loss": 1.3206058740615845, "step": 7864 }, { "epoch": 1.4317830162919813, "grad_norm": 20.625, "learning_rate": 3.1591903550807955e-06, "loss": 1.4612363576889038, "step": 7866 }, { "epoch": 1.4321470829161735, "grad_norm": 6.59375, "learning_rate": 3.1584266255236582e-06, "loss": 1.1208163499832153, "step": 7868 }, { "epoch": 1.432511149540366, "grad_norm": 26.875, "learning_rate": 3.157662872718055e-06, "loss": 0.44327694177627563, "step": 7870 }, { "epoch": 1.4328752161645582, "grad_norm": 13.375, "learning_rate": 3.156899096776065e-06, "loss": 1.3511675596237183, "step": 7872 }, { "epoch": 1.4332392827887503, "grad_norm": 3.875, "learning_rate": 3.156135297809768e-06, "loss": 1.378699541091919, "step": 7874 }, { "epoch": 1.4336033494129425, "grad_norm": 15.875, "learning_rate": 3.155371475931249e-06, "loss": 1.449173927307129, "step": 7876 }, { "epoch": 1.4339674160371347, "grad_norm": 14.75, "learning_rate": 3.1546076312525955e-06, "loss": 1.5970392227172852, "step": 7878 }, { "epoch": 1.4343314826613272, "grad_norm": 12.25, "learning_rate": 3.153843763885899e-06, "loss": 1.430195689201355, "step": 7880 }, { "epoch": 1.4346955492855193, "grad_norm": 20.25, "learning_rate": 3.1530798739432526e-06, "loss": 1.3880562782287598, "step": 7882 }, { "epoch": 1.4350596159097115, "grad_norm": 12.4375, "learning_rate": 3.152315961536756e-06, "loss": 1.7094707489013672, "step": 7884 }, { "epoch": 1.4354236825339037, "grad_norm": 10.5625, "learning_rate": 3.1515520267785095e-06, "loss": 2.0341827869415283, "step": 7886 }, { "epoch": 1.435787749158096, "grad_norm": 10.8125, "learning_rate": 3.150788069780616e-06, "loss": 1.6291069984436035, "step": 7888 }, { "epoch": 1.4361518157822881, "grad_norm": 7.09375, "learning_rate": 3.150024090655186e-06, "loss": 1.4164053201675415, "step": 7890 }, { "epoch": 1.4365158824064803, "grad_norm": 8.25, "learning_rate": 3.1492600895143278e-06, "loss": 1.459969401359558, "step": 7892 }, { "epoch": 1.4368799490306725, "grad_norm": 28.0, "learning_rate": 3.1484960664701557e-06, "loss": 1.5984312295913696, "step": 7894 }, { "epoch": 1.4372440156548647, "grad_norm": 11.625, "learning_rate": 3.1477320216347885e-06, "loss": 1.398792028427124, "step": 7896 }, { "epoch": 1.4376080822790571, "grad_norm": 18.25, "learning_rate": 3.1469679551203456e-06, "loss": 1.5217562913894653, "step": 7898 }, { "epoch": 1.4379721489032493, "grad_norm": 18.875, "learning_rate": 3.146203867038951e-06, "loss": 1.6921648979187012, "step": 7900 }, { "epoch": 1.4383362155274415, "grad_norm": 6.53125, "learning_rate": 3.145439757502732e-06, "loss": 1.1033227443695068, "step": 7902 }, { "epoch": 1.4387002821516337, "grad_norm": 14.25, "learning_rate": 3.144675626623817e-06, "loss": 1.2729337215423584, "step": 7904 }, { "epoch": 1.439064348775826, "grad_norm": 25.125, "learning_rate": 3.143911474514341e-06, "loss": 1.6983637809753418, "step": 7906 }, { "epoch": 1.4394284154000183, "grad_norm": 7.71875, "learning_rate": 3.143147301286438e-06, "loss": 1.3148442506790161, "step": 7908 }, { "epoch": 1.4397924820242105, "grad_norm": 11.1875, "learning_rate": 3.1423831070522497e-06, "loss": 1.95399808883667, "step": 7910 }, { "epoch": 1.4401565486484027, "grad_norm": 14.75, "learning_rate": 3.141618891923918e-06, "loss": 1.3040627241134644, "step": 7912 }, { "epoch": 1.440520615272595, "grad_norm": 9.6875, "learning_rate": 3.1408546560135865e-06, "loss": 1.00283682346344, "step": 7914 }, { "epoch": 1.440884681896787, "grad_norm": 17.0, "learning_rate": 3.1400903994334054e-06, "loss": 2.068880558013916, "step": 7916 }, { "epoch": 1.4412487485209793, "grad_norm": 212.0, "learning_rate": 3.1393261222955263e-06, "loss": 1.2730417251586914, "step": 7918 }, { "epoch": 1.4416128151451715, "grad_norm": 6.96875, "learning_rate": 3.1385618247121035e-06, "loss": 1.3510453701019287, "step": 7920 }, { "epoch": 1.4419768817693637, "grad_norm": 7.125, "learning_rate": 3.137797506795295e-06, "loss": 1.0491981506347656, "step": 7922 }, { "epoch": 1.442340948393556, "grad_norm": 19.125, "learning_rate": 3.1370331686572597e-06, "loss": 1.7947553396224976, "step": 7924 }, { "epoch": 1.4427050150177483, "grad_norm": 8.75, "learning_rate": 3.1362688104101622e-06, "loss": 1.1688612699508667, "step": 7926 }, { "epoch": 1.4430690816419405, "grad_norm": 10.375, "learning_rate": 3.13550443216617e-06, "loss": 1.1563042402267456, "step": 7928 }, { "epoch": 1.4434331482661327, "grad_norm": 8.4375, "learning_rate": 3.134740034037451e-06, "loss": 1.5420507192611694, "step": 7930 }, { "epoch": 1.4437972148903249, "grad_norm": 21.875, "learning_rate": 3.133975616136178e-06, "loss": 1.727454423904419, "step": 7932 }, { "epoch": 1.4441612815145173, "grad_norm": 57.75, "learning_rate": 3.1332111785745255e-06, "loss": 1.6875288486480713, "step": 7934 }, { "epoch": 1.4445253481387095, "grad_norm": 3.796875, "learning_rate": 3.1324467214646736e-06, "loss": 1.2898133993148804, "step": 7936 }, { "epoch": 1.4448894147629017, "grad_norm": 5.09375, "learning_rate": 3.131682244918802e-06, "loss": 1.3041505813598633, "step": 7938 }, { "epoch": 1.4452534813870939, "grad_norm": 6.21875, "learning_rate": 3.1309177490490943e-06, "loss": 1.2383619546890259, "step": 7940 }, { "epoch": 1.445617548011286, "grad_norm": 13.6875, "learning_rate": 3.1301532339677375e-06, "loss": 1.3182579278945923, "step": 7942 }, { "epoch": 1.4459816146354783, "grad_norm": 15.25, "learning_rate": 3.129388699786922e-06, "loss": 1.2915605306625366, "step": 7944 }, { "epoch": 1.4463456812596704, "grad_norm": 9.5, "learning_rate": 3.1286241466188377e-06, "loss": 2.1337482929229736, "step": 7946 }, { "epoch": 1.4467097478838626, "grad_norm": 9.5625, "learning_rate": 3.127859574575681e-06, "loss": 1.3303313255310059, "step": 7948 }, { "epoch": 1.447073814508055, "grad_norm": 13.375, "learning_rate": 3.1270949837696508e-06, "loss": 1.4784893989562988, "step": 7950 }, { "epoch": 1.4474378811322473, "grad_norm": 12.5, "learning_rate": 3.126330374312947e-06, "loss": 1.3828274011611938, "step": 7952 }, { "epoch": 1.4478019477564394, "grad_norm": 8.75, "learning_rate": 3.1255657463177723e-06, "loss": 1.4443020820617676, "step": 7954 }, { "epoch": 1.4481660143806316, "grad_norm": 7.90625, "learning_rate": 3.1248010998963336e-06, "loss": 1.1901321411132812, "step": 7956 }, { "epoch": 1.4485300810048238, "grad_norm": 16.625, "learning_rate": 3.1240364351608386e-06, "loss": 1.2194774150848389, "step": 7958 }, { "epoch": 1.448894147629016, "grad_norm": 14.3125, "learning_rate": 3.1232717522235e-06, "loss": 1.8177173137664795, "step": 7960 }, { "epoch": 1.4492582142532084, "grad_norm": 4.90625, "learning_rate": 3.122507051196531e-06, "loss": 1.1955132484436035, "step": 7962 }, { "epoch": 1.4496222808774006, "grad_norm": 11.0625, "learning_rate": 3.1217423321921494e-06, "loss": 1.3393421173095703, "step": 7964 }, { "epoch": 1.4499863475015928, "grad_norm": 7.84375, "learning_rate": 3.120977595322573e-06, "loss": 1.3808822631835938, "step": 7966 }, { "epoch": 1.450350414125785, "grad_norm": 12.4375, "learning_rate": 3.1202128407000255e-06, "loss": 1.3850477933883667, "step": 7968 }, { "epoch": 1.4507144807499772, "grad_norm": 12.0625, "learning_rate": 3.1194480684367302e-06, "loss": 1.4826576709747314, "step": 7970 }, { "epoch": 1.4510785473741694, "grad_norm": 10.5, "learning_rate": 3.1186832786449152e-06, "loss": 1.3932586908340454, "step": 7972 }, { "epoch": 1.4514426139983616, "grad_norm": 7.4375, "learning_rate": 3.1179184714368094e-06, "loss": 1.2970638275146484, "step": 7974 }, { "epoch": 1.4518066806225538, "grad_norm": 21.25, "learning_rate": 3.1171536469246468e-06, "loss": 1.4657469987869263, "step": 7976 }, { "epoch": 1.4521707472467462, "grad_norm": 7.40625, "learning_rate": 3.116388805220661e-06, "loss": 1.4523301124572754, "step": 7978 }, { "epoch": 1.4525348138709384, "grad_norm": 7.125, "learning_rate": 3.1156239464370895e-06, "loss": 1.1368095874786377, "step": 7980 }, { "epoch": 1.4528988804951306, "grad_norm": 16.125, "learning_rate": 3.1148590706861725e-06, "loss": 1.4664783477783203, "step": 7982 }, { "epoch": 1.4532629471193228, "grad_norm": 9.1875, "learning_rate": 3.1140941780801524e-06, "loss": 1.066304326057434, "step": 7984 }, { "epoch": 1.453627013743515, "grad_norm": 8.1875, "learning_rate": 3.113329268731274e-06, "loss": 1.450404405593872, "step": 7986 }, { "epoch": 1.4539910803677074, "grad_norm": 15.625, "learning_rate": 3.112564342751785e-06, "loss": 1.0457264184951782, "step": 7988 }, { "epoch": 1.4543551469918996, "grad_norm": 32.75, "learning_rate": 3.111799400253934e-06, "loss": 0.5165541172027588, "step": 7990 }, { "epoch": 1.4547192136160918, "grad_norm": 17.5, "learning_rate": 3.111034441349975e-06, "loss": 1.372770071029663, "step": 7992 }, { "epoch": 1.455083280240284, "grad_norm": 14.5625, "learning_rate": 3.1102694661521615e-06, "loss": 1.4857652187347412, "step": 7994 }, { "epoch": 1.4554473468644762, "grad_norm": 6.65625, "learning_rate": 3.109504474772751e-06, "loss": 1.6825917959213257, "step": 7996 }, { "epoch": 1.4558114134886684, "grad_norm": 8.5625, "learning_rate": 3.1087394673240025e-06, "loss": 1.1532573699951172, "step": 7998 }, { "epoch": 1.4561754801128606, "grad_norm": 12.625, "learning_rate": 3.107974443918177e-06, "loss": 1.6383496522903442, "step": 8000 }, { "epoch": 1.4565395467370528, "grad_norm": 9.8125, "learning_rate": 3.1072094046675406e-06, "loss": 2.055211067199707, "step": 8002 }, { "epoch": 1.4569036133612452, "grad_norm": 23.5, "learning_rate": 3.1064443496843576e-06, "loss": 1.3486170768737793, "step": 8004 }, { "epoch": 1.4572676799854374, "grad_norm": 12.125, "learning_rate": 3.105679279080898e-06, "loss": 1.635927677154541, "step": 8006 }, { "epoch": 1.4576317466096296, "grad_norm": 24.375, "learning_rate": 3.1049141929694325e-06, "loss": 1.377577543258667, "step": 8008 }, { "epoch": 1.4579958132338218, "grad_norm": 18.375, "learning_rate": 3.1041490914622347e-06, "loss": 1.2570985555648804, "step": 8010 }, { "epoch": 1.458359879858014, "grad_norm": 3.796875, "learning_rate": 3.1033839746715793e-06, "loss": 1.173210620880127, "step": 8012 }, { "epoch": 1.4587239464822064, "grad_norm": 5.21875, "learning_rate": 3.1026188427097447e-06, "loss": 1.577376365661621, "step": 8014 }, { "epoch": 1.4590880131063986, "grad_norm": 11.5625, "learning_rate": 3.101853695689011e-06, "loss": 1.3778821229934692, "step": 8016 }, { "epoch": 1.4594520797305908, "grad_norm": 5.71875, "learning_rate": 3.1010885337216603e-06, "loss": 1.2397392988204956, "step": 8018 }, { "epoch": 1.459816146354783, "grad_norm": 16.375, "learning_rate": 3.100323356919977e-06, "loss": 1.34848952293396, "step": 8020 }, { "epoch": 1.4601802129789752, "grad_norm": 12.5, "learning_rate": 3.099558165396248e-06, "loss": 1.145046591758728, "step": 8022 }, { "epoch": 1.4605442796031674, "grad_norm": 13.1875, "learning_rate": 3.098792959262761e-06, "loss": 1.056328296661377, "step": 8024 }, { "epoch": 1.4609083462273595, "grad_norm": 32.5, "learning_rate": 3.098027738631808e-06, "loss": 1.5916104316711426, "step": 8026 }, { "epoch": 1.4612724128515517, "grad_norm": 9.0625, "learning_rate": 3.097262503615681e-06, "loss": 1.547664999961853, "step": 8028 }, { "epoch": 1.461636479475744, "grad_norm": 8.0625, "learning_rate": 3.096497254326676e-06, "loss": 1.4686845541000366, "step": 8030 }, { "epoch": 1.4620005460999363, "grad_norm": 6.53125, "learning_rate": 3.09573199087709e-06, "loss": 1.4751832485198975, "step": 8032 }, { "epoch": 1.4623646127241285, "grad_norm": 9.0625, "learning_rate": 3.094966713379223e-06, "loss": 1.283064842224121, "step": 8034 }, { "epoch": 1.4627286793483207, "grad_norm": 29.375, "learning_rate": 3.0942014219453755e-06, "loss": 1.478798508644104, "step": 8036 }, { "epoch": 1.463092745972513, "grad_norm": 11.4375, "learning_rate": 3.09343611668785e-06, "loss": 1.4340828657150269, "step": 8038 }, { "epoch": 1.4634568125967051, "grad_norm": 11.3125, "learning_rate": 3.0926707977189543e-06, "loss": 1.4324859380722046, "step": 8040 }, { "epoch": 1.4638208792208975, "grad_norm": 13.875, "learning_rate": 3.091905465150994e-06, "loss": 1.4226700067520142, "step": 8042 }, { "epoch": 1.4641849458450897, "grad_norm": 13.125, "learning_rate": 3.091140119096279e-06, "loss": 1.43776273727417, "step": 8044 }, { "epoch": 1.464549012469282, "grad_norm": 4.90625, "learning_rate": 3.090374759667121e-06, "loss": 1.3603628873825073, "step": 8046 }, { "epoch": 1.4649130790934741, "grad_norm": 5.5625, "learning_rate": 3.0896093869758336e-06, "loss": 1.3050345182418823, "step": 8048 }, { "epoch": 1.4652771457176663, "grad_norm": 8.5, "learning_rate": 3.088844001134732e-06, "loss": 1.2661861181259155, "step": 8050 }, { "epoch": 1.4656412123418585, "grad_norm": 84.5, "learning_rate": 3.088078602256133e-06, "loss": 2.0722508430480957, "step": 8052 }, { "epoch": 1.4660052789660507, "grad_norm": 6.59375, "learning_rate": 3.0873131904523557e-06, "loss": 1.0848398208618164, "step": 8054 }, { "epoch": 1.466369345590243, "grad_norm": 9.5, "learning_rate": 3.086547765835721e-06, "loss": 1.4588651657104492, "step": 8056 }, { "epoch": 1.4667334122144353, "grad_norm": 6.0625, "learning_rate": 3.085782328518554e-06, "loss": 1.3222707509994507, "step": 8058 }, { "epoch": 1.4670974788386275, "grad_norm": 6.09375, "learning_rate": 3.0850168786131766e-06, "loss": 1.1119022369384766, "step": 8060 }, { "epoch": 1.4674615454628197, "grad_norm": 14.75, "learning_rate": 3.0842514162319183e-06, "loss": 1.4139639139175415, "step": 8062 }, { "epoch": 1.467825612087012, "grad_norm": 5.125, "learning_rate": 3.0834859414871044e-06, "loss": 1.336667776107788, "step": 8064 }, { "epoch": 1.468189678711204, "grad_norm": 11.6875, "learning_rate": 3.0827204544910682e-06, "loss": 1.0249202251434326, "step": 8066 }, { "epoch": 1.4685537453353965, "grad_norm": 9.25, "learning_rate": 3.0819549553561397e-06, "loss": 1.8818554878234863, "step": 8068 }, { "epoch": 1.4689178119595887, "grad_norm": 22.75, "learning_rate": 3.081189444194654e-06, "loss": 1.6467548608779907, "step": 8070 }, { "epoch": 1.469281878583781, "grad_norm": 13.0625, "learning_rate": 3.0804239211189468e-06, "loss": 1.161402702331543, "step": 8072 }, { "epoch": 1.469645945207973, "grad_norm": 8.125, "learning_rate": 3.0796583862413544e-06, "loss": 1.1921584606170654, "step": 8074 }, { "epoch": 1.4700100118321653, "grad_norm": 12.25, "learning_rate": 3.078892839674217e-06, "loss": 1.2623369693756104, "step": 8076 }, { "epoch": 1.4703740784563575, "grad_norm": 29.375, "learning_rate": 3.0781272815298746e-06, "loss": 1.339120864868164, "step": 8078 }, { "epoch": 1.4707381450805497, "grad_norm": 25.125, "learning_rate": 3.077361711920671e-06, "loss": 1.0708491802215576, "step": 8080 }, { "epoch": 1.4711022117047419, "grad_norm": 12.6875, "learning_rate": 3.0765961309589493e-06, "loss": 1.5739586353302002, "step": 8082 }, { "epoch": 1.471466278328934, "grad_norm": 10.6875, "learning_rate": 3.0758305387570563e-06, "loss": 1.372029185295105, "step": 8084 }, { "epoch": 1.4718303449531265, "grad_norm": 54.5, "learning_rate": 3.0750649354273387e-06, "loss": 1.2132720947265625, "step": 8086 }, { "epoch": 1.4721944115773187, "grad_norm": 2.65625, "learning_rate": 3.074299321082146e-06, "loss": 1.1051228046417236, "step": 8088 }, { "epoch": 1.4725584782015109, "grad_norm": 6.625, "learning_rate": 3.0735336958338284e-06, "loss": 1.4189369678497314, "step": 8090 }, { "epoch": 1.472922544825703, "grad_norm": 42.5, "learning_rate": 3.0727680597947396e-06, "loss": 1.394530177116394, "step": 8092 }, { "epoch": 1.4732866114498953, "grad_norm": 5.40625, "learning_rate": 3.072002413077233e-06, "loss": 1.2035231590270996, "step": 8094 }, { "epoch": 1.4736506780740877, "grad_norm": 3.546875, "learning_rate": 3.071236755793664e-06, "loss": 1.2046468257904053, "step": 8096 }, { "epoch": 1.4740147446982799, "grad_norm": 5.90625, "learning_rate": 3.0704710880563893e-06, "loss": 0.879520833492279, "step": 8098 }, { "epoch": 1.474378811322472, "grad_norm": 27.625, "learning_rate": 3.069705409977769e-06, "loss": 0.37783581018447876, "step": 8100 }, { "epoch": 1.4747428779466643, "grad_norm": 20.875, "learning_rate": 3.068939721670162e-06, "loss": 0.9105134606361389, "step": 8102 }, { "epoch": 1.4751069445708564, "grad_norm": 5.5625, "learning_rate": 3.0681740232459297e-06, "loss": 1.276327133178711, "step": 8104 }, { "epoch": 1.4754710111950486, "grad_norm": 10.8125, "learning_rate": 3.067408314817436e-06, "loss": 1.510435700416565, "step": 8106 }, { "epoch": 1.4758350778192408, "grad_norm": 15.3125, "learning_rate": 3.0666425964970452e-06, "loss": 1.6176692247390747, "step": 8108 }, { "epoch": 1.476199144443433, "grad_norm": 5.78125, "learning_rate": 3.065876868397124e-06, "loss": 1.3561370372772217, "step": 8110 }, { "epoch": 1.4765632110676254, "grad_norm": 4.59375, "learning_rate": 3.0651111306300384e-06, "loss": 0.8761101961135864, "step": 8112 }, { "epoch": 1.4769272776918176, "grad_norm": 8.3125, "learning_rate": 3.0643453833081587e-06, "loss": 1.4022860527038574, "step": 8114 }, { "epoch": 1.4772913443160098, "grad_norm": 10.3125, "learning_rate": 3.0635796265438555e-06, "loss": 1.4284610748291016, "step": 8116 }, { "epoch": 1.477655410940202, "grad_norm": 13.75, "learning_rate": 3.062813860449499e-06, "loss": 1.4287099838256836, "step": 8118 }, { "epoch": 1.4780194775643942, "grad_norm": 19.625, "learning_rate": 3.0620480851374623e-06, "loss": 1.8255910873413086, "step": 8120 }, { "epoch": 1.4783835441885866, "grad_norm": 8.9375, "learning_rate": 3.061282300720122e-06, "loss": 1.334941029548645, "step": 8122 }, { "epoch": 1.4787476108127788, "grad_norm": 9.0625, "learning_rate": 3.060516507309852e-06, "loss": 1.1309086084365845, "step": 8124 }, { "epoch": 1.479111677436971, "grad_norm": 8.0, "learning_rate": 3.0597507050190296e-06, "loss": 1.3888471126556396, "step": 8126 }, { "epoch": 1.4794757440611632, "grad_norm": 15.375, "learning_rate": 3.058984893960033e-06, "loss": 1.4849586486816406, "step": 8128 }, { "epoch": 1.4798398106853554, "grad_norm": 4.28125, "learning_rate": 3.0582190742452433e-06, "loss": 1.0041438341140747, "step": 8130 }, { "epoch": 1.4802038773095476, "grad_norm": 3.78125, "learning_rate": 3.057453245987039e-06, "loss": 0.9366267919540405, "step": 8132 }, { "epoch": 1.4805679439337398, "grad_norm": 9.4375, "learning_rate": 3.0566874092978048e-06, "loss": 1.234405517578125, "step": 8134 }, { "epoch": 1.480932010557932, "grad_norm": 5.375, "learning_rate": 3.055921564289923e-06, "loss": 1.4830735921859741, "step": 8136 }, { "epoch": 1.4812960771821242, "grad_norm": 6.0, "learning_rate": 3.055155711075778e-06, "loss": 1.4721479415893555, "step": 8138 }, { "epoch": 1.4816601438063166, "grad_norm": 9.25, "learning_rate": 3.054389849767756e-06, "loss": 1.367739200592041, "step": 8140 }, { "epoch": 1.4820242104305088, "grad_norm": 18.75, "learning_rate": 3.053623980478243e-06, "loss": 1.5807201862335205, "step": 8142 }, { "epoch": 1.482388277054701, "grad_norm": 36.5, "learning_rate": 3.0528581033196298e-06, "loss": 1.1614680290222168, "step": 8144 }, { "epoch": 1.4827523436788932, "grad_norm": 6.21875, "learning_rate": 3.0520922184043036e-06, "loss": 1.2902480363845825, "step": 8146 }, { "epoch": 1.4831164103030854, "grad_norm": 8.9375, "learning_rate": 3.0513263258446545e-06, "loss": 1.444229245185852, "step": 8148 }, { "epoch": 1.4834804769272778, "grad_norm": 8.4375, "learning_rate": 3.0505604257530762e-06, "loss": 1.3943898677825928, "step": 8150 }, { "epoch": 1.48384454355147, "grad_norm": 32.75, "learning_rate": 3.04979451824196e-06, "loss": 1.3271794319152832, "step": 8152 }, { "epoch": 1.4842086101756622, "grad_norm": 11.4375, "learning_rate": 3.0490286034237003e-06, "loss": 1.4934171438217163, "step": 8154 }, { "epoch": 1.4845726767998544, "grad_norm": 11.5, "learning_rate": 3.048262681410691e-06, "loss": 1.3100519180297852, "step": 8156 }, { "epoch": 1.4849367434240466, "grad_norm": 13.9375, "learning_rate": 3.0474967523153293e-06, "loss": 1.5533921718597412, "step": 8158 }, { "epoch": 1.4853008100482388, "grad_norm": 28.75, "learning_rate": 3.046730816250012e-06, "loss": 1.5534807443618774, "step": 8160 }, { "epoch": 1.485664876672431, "grad_norm": 11.5625, "learning_rate": 3.0459648733271365e-06, "loss": 1.6323692798614502, "step": 8162 }, { "epoch": 1.4860289432966232, "grad_norm": 13.9375, "learning_rate": 3.045198923659102e-06, "loss": 1.491867184638977, "step": 8164 }, { "epoch": 1.4863930099208156, "grad_norm": 16.25, "learning_rate": 3.0444329673583084e-06, "loss": 1.481719732284546, "step": 8166 }, { "epoch": 1.4867570765450078, "grad_norm": 18.625, "learning_rate": 3.0436670045371573e-06, "loss": 1.4221787452697754, "step": 8168 }, { "epoch": 1.4871211431692, "grad_norm": 12.0625, "learning_rate": 3.04290103530805e-06, "loss": 1.4206541776657104, "step": 8170 }, { "epoch": 1.4874852097933922, "grad_norm": 10.5625, "learning_rate": 3.0421350597833897e-06, "loss": 1.2737412452697754, "step": 8172 }, { "epoch": 1.4878492764175844, "grad_norm": 13.0, "learning_rate": 3.0413690780755804e-06, "loss": 1.2841761112213135, "step": 8174 }, { "epoch": 1.4882133430417768, "grad_norm": 10.6875, "learning_rate": 3.0406030902970262e-06, "loss": 0.9190031290054321, "step": 8176 }, { "epoch": 1.488577409665969, "grad_norm": 15.375, "learning_rate": 3.039837096560133e-06, "loss": 1.3743256330490112, "step": 8178 }, { "epoch": 1.4889414762901612, "grad_norm": 54.75, "learning_rate": 3.0390710969773075e-06, "loss": 1.454878807067871, "step": 8180 }, { "epoch": 1.4893055429143534, "grad_norm": 14.375, "learning_rate": 3.038305091660957e-06, "loss": 1.7485191822052002, "step": 8182 }, { "epoch": 1.4896696095385455, "grad_norm": 22.125, "learning_rate": 3.0375390807234894e-06, "loss": 1.5463649034500122, "step": 8184 }, { "epoch": 1.4900336761627377, "grad_norm": 27.375, "learning_rate": 3.036773064277314e-06, "loss": 1.2868549823760986, "step": 8186 }, { "epoch": 1.49039774278693, "grad_norm": 23.0, "learning_rate": 3.036007042434841e-06, "loss": 0.8589935898780823, "step": 8188 }, { "epoch": 1.4907618094111221, "grad_norm": 4.625, "learning_rate": 3.0352410153084792e-06, "loss": 1.306921124458313, "step": 8190 }, { "epoch": 1.4911258760353143, "grad_norm": 6.28125, "learning_rate": 3.0344749830106424e-06, "loss": 1.117292881011963, "step": 8192 }, { "epoch": 1.4914899426595067, "grad_norm": 16.25, "learning_rate": 3.0337089456537405e-06, "loss": 1.467806100845337, "step": 8194 }, { "epoch": 1.491854009283699, "grad_norm": 9.25, "learning_rate": 3.0329429033501877e-06, "loss": 1.3650826215744019, "step": 8196 }, { "epoch": 1.4922180759078911, "grad_norm": 27.125, "learning_rate": 3.0321768562123976e-06, "loss": 1.2112196683883667, "step": 8198 }, { "epoch": 1.4925821425320833, "grad_norm": 8.0625, "learning_rate": 3.031410804352784e-06, "loss": 1.5436772108078003, "step": 8200 }, { "epoch": 1.4929462091562755, "grad_norm": 24.25, "learning_rate": 3.0306447478837625e-06, "loss": 1.4654940366744995, "step": 8202 }, { "epoch": 1.493310275780468, "grad_norm": 7.125, "learning_rate": 3.0298786869177487e-06, "loss": 1.1727949380874634, "step": 8204 }, { "epoch": 1.4936743424046601, "grad_norm": 29.375, "learning_rate": 3.0291126215671575e-06, "loss": 1.5292565822601318, "step": 8206 }, { "epoch": 1.4940384090288523, "grad_norm": 32.5, "learning_rate": 3.028346551944408e-06, "loss": 1.246259093284607, "step": 8208 }, { "epoch": 1.4944024756530445, "grad_norm": 9.875, "learning_rate": 3.027580478161917e-06, "loss": 0.7279324531555176, "step": 8210 }, { "epoch": 1.4947665422772367, "grad_norm": 17.75, "learning_rate": 3.0268144003321023e-06, "loss": 1.494260549545288, "step": 8212 }, { "epoch": 1.495130608901429, "grad_norm": 6.5625, "learning_rate": 3.026048318567383e-06, "loss": 1.555148959159851, "step": 8214 }, { "epoch": 1.495494675525621, "grad_norm": 6.53125, "learning_rate": 3.0252822329801785e-06, "loss": 1.1693923473358154, "step": 8216 }, { "epoch": 1.4958587421498133, "grad_norm": 9.5, "learning_rate": 3.0245161436829083e-06, "loss": 1.3028727769851685, "step": 8218 }, { "epoch": 1.4962228087740057, "grad_norm": 8.5, "learning_rate": 3.0237500507879948e-06, "loss": 1.2423659563064575, "step": 8220 }, { "epoch": 1.496586875398198, "grad_norm": 15.5625, "learning_rate": 3.0229839544078567e-06, "loss": 1.7578924894332886, "step": 8222 }, { "epoch": 1.49695094202239, "grad_norm": 14.125, "learning_rate": 3.0222178546549164e-06, "loss": 1.4031219482421875, "step": 8224 }, { "epoch": 1.4973150086465823, "grad_norm": 15.3125, "learning_rate": 3.0214517516415955e-06, "loss": 1.277764916419983, "step": 8226 }, { "epoch": 1.4976790752707745, "grad_norm": 10.8125, "learning_rate": 3.0206856454803173e-06, "loss": 1.088913917541504, "step": 8228 }, { "epoch": 1.498043141894967, "grad_norm": 9.6875, "learning_rate": 3.0199195362835047e-06, "loss": 1.5067713260650635, "step": 8230 }, { "epoch": 1.498407208519159, "grad_norm": 9.4375, "learning_rate": 3.019153424163581e-06, "loss": 1.273909330368042, "step": 8232 }, { "epoch": 1.4987712751433513, "grad_norm": 29.5, "learning_rate": 3.0183873092329694e-06, "loss": 1.5521973371505737, "step": 8234 }, { "epoch": 1.4991353417675435, "grad_norm": 7.53125, "learning_rate": 3.0176211916040955e-06, "loss": 1.6984868049621582, "step": 8236 }, { "epoch": 1.4994994083917357, "grad_norm": 13.5, "learning_rate": 3.0168550713893824e-06, "loss": 1.213895320892334, "step": 8238 }, { "epoch": 1.4998634750159279, "grad_norm": 9.625, "learning_rate": 3.0160889487012556e-06, "loss": 1.7921996116638184, "step": 8240 }, { "epoch": 1.50022754164012, "grad_norm": 17.625, "learning_rate": 3.0153228236521413e-06, "loss": 1.859833002090454, "step": 8242 }, { "epoch": 1.5005916082643123, "grad_norm": 9.8125, "learning_rate": 3.0145566963544654e-06, "loss": 1.4426655769348145, "step": 8244 }, { "epoch": 1.5009556748885045, "grad_norm": 8.8125, "learning_rate": 3.0137905669206525e-06, "loss": 1.1333223581314087, "step": 8246 }, { "epoch": 1.5013197415126969, "grad_norm": 15.8125, "learning_rate": 3.01302443546313e-06, "loss": 1.4960157871246338, "step": 8248 }, { "epoch": 1.501683808136889, "grad_norm": 22.875, "learning_rate": 3.012258302094324e-06, "loss": 1.5881778001785278, "step": 8250 }, { "epoch": 1.5020478747610813, "grad_norm": 20.125, "learning_rate": 3.011492166926662e-06, "loss": 1.6777312755584717, "step": 8252 }, { "epoch": 1.5024119413852735, "grad_norm": 13.4375, "learning_rate": 3.0107260300725717e-06, "loss": 1.4614934921264648, "step": 8254 }, { "epoch": 1.5027760080094659, "grad_norm": 6.5, "learning_rate": 3.0099598916444793e-06, "loss": 1.0745995044708252, "step": 8256 }, { "epoch": 1.503140074633658, "grad_norm": 20.625, "learning_rate": 3.0091937517548137e-06, "loss": 1.2333183288574219, "step": 8258 }, { "epoch": 1.5035041412578503, "grad_norm": 11.125, "learning_rate": 3.0084276105160013e-06, "loss": 1.2908381223678589, "step": 8260 }, { "epoch": 1.5038682078820425, "grad_norm": 12.0625, "learning_rate": 3.0076614680404725e-06, "loss": 1.2935439348220825, "step": 8262 }, { "epoch": 1.5042322745062346, "grad_norm": 5.75, "learning_rate": 3.0068953244406536e-06, "loss": 1.1082379817962646, "step": 8264 }, { "epoch": 1.5045963411304268, "grad_norm": 11.125, "learning_rate": 3.0061291798289738e-06, "loss": 1.4589749574661255, "step": 8266 }, { "epoch": 1.504960407754619, "grad_norm": 12.3125, "learning_rate": 3.005363034317862e-06, "loss": 1.7751314640045166, "step": 8268 }, { "epoch": 1.5053244743788112, "grad_norm": 11.9375, "learning_rate": 3.004596888019746e-06, "loss": 1.367035984992981, "step": 8270 }, { "epoch": 1.5056885410030034, "grad_norm": 9.0625, "learning_rate": 3.0038307410470556e-06, "loss": 0.9916867017745972, "step": 8272 }, { "epoch": 1.5060526076271956, "grad_norm": 8.125, "learning_rate": 3.0030645935122198e-06, "loss": 1.3437137603759766, "step": 8274 }, { "epoch": 1.506416674251388, "grad_norm": 21.75, "learning_rate": 3.002298445527667e-06, "loss": 1.4305222034454346, "step": 8276 }, { "epoch": 1.5067807408755802, "grad_norm": 10.9375, "learning_rate": 3.001532297205827e-06, "loss": 1.4024221897125244, "step": 8278 }, { "epoch": 1.5071448074997724, "grad_norm": 14.5625, "learning_rate": 3.000766148659129e-06, "loss": 1.1061606407165527, "step": 8280 }, { "epoch": 1.5075088741239648, "grad_norm": 9.75, "learning_rate": 3.0000000000000005e-06, "loss": 1.2565230131149292, "step": 8282 }, { "epoch": 1.507872940748157, "grad_norm": 8.6875, "learning_rate": 2.9992338513408724e-06, "loss": 1.5496501922607422, "step": 8284 }, { "epoch": 1.5082370073723492, "grad_norm": 10.5625, "learning_rate": 2.998467702794174e-06, "loss": 1.5667330026626587, "step": 8286 }, { "epoch": 1.5086010739965414, "grad_norm": 7.3125, "learning_rate": 2.9977015544723336e-06, "loss": 1.2874367237091064, "step": 8288 }, { "epoch": 1.5089651406207336, "grad_norm": 13.3125, "learning_rate": 2.9969354064877817e-06, "loss": 1.5023891925811768, "step": 8290 }, { "epoch": 1.5093292072449258, "grad_norm": 8.625, "learning_rate": 2.996169258952945e-06, "loss": 1.387244462966919, "step": 8292 }, { "epoch": 1.509693273869118, "grad_norm": 7.78125, "learning_rate": 2.995403111980254e-06, "loss": 1.2260160446166992, "step": 8294 }, { "epoch": 1.5100573404933102, "grad_norm": 16.125, "learning_rate": 2.9946369656821396e-06, "loss": 0.8720483779907227, "step": 8296 }, { "epoch": 1.5104214071175024, "grad_norm": 12.3125, "learning_rate": 2.9938708201710272e-06, "loss": 1.3226685523986816, "step": 8298 }, { "epoch": 1.5107854737416946, "grad_norm": 29.375, "learning_rate": 2.993104675559348e-06, "loss": 1.5295064449310303, "step": 8300 }, { "epoch": 1.511149540365887, "grad_norm": 27.625, "learning_rate": 2.992338531959529e-06, "loss": 1.7971018552780151, "step": 8302 }, { "epoch": 1.5115136069900792, "grad_norm": 11.375, "learning_rate": 2.991572389483999e-06, "loss": 1.0405447483062744, "step": 8304 }, { "epoch": 1.5118776736142714, "grad_norm": 5.40625, "learning_rate": 2.9908062482451873e-06, "loss": 1.3780726194381714, "step": 8306 }, { "epoch": 1.5122417402384638, "grad_norm": 11.3125, "learning_rate": 2.9900401083555213e-06, "loss": 0.9142655730247498, "step": 8308 }, { "epoch": 1.512605806862656, "grad_norm": 27.75, "learning_rate": 2.9892739699274297e-06, "loss": 0.9862573742866516, "step": 8310 }, { "epoch": 1.5129698734868482, "grad_norm": 10.3125, "learning_rate": 2.9885078330733384e-06, "loss": 1.7985886335372925, "step": 8312 }, { "epoch": 1.5133339401110404, "grad_norm": 30.25, "learning_rate": 2.987741697905676e-06, "loss": 1.7422081232070923, "step": 8314 }, { "epoch": 1.5136980067352326, "grad_norm": 19.75, "learning_rate": 2.9869755645368713e-06, "loss": 2.1116433143615723, "step": 8316 }, { "epoch": 1.5140620733594248, "grad_norm": 19.25, "learning_rate": 2.9862094330793485e-06, "loss": 2.140390157699585, "step": 8318 }, { "epoch": 1.514426139983617, "grad_norm": 22.625, "learning_rate": 2.9854433036455364e-06, "loss": 1.7981786727905273, "step": 8320 }, { "epoch": 1.5147902066078092, "grad_norm": 39.5, "learning_rate": 2.984677176347859e-06, "loss": 1.5796661376953125, "step": 8322 }, { "epoch": 1.5151542732320014, "grad_norm": 12.1875, "learning_rate": 2.9839110512987446e-06, "loss": 0.5388069748878479, "step": 8324 }, { "epoch": 1.5155183398561936, "grad_norm": 12.0, "learning_rate": 2.9831449286106186e-06, "loss": 1.4176826477050781, "step": 8326 }, { "epoch": 1.5158824064803857, "grad_norm": 14.9375, "learning_rate": 2.982378808395906e-06, "loss": 1.4480212926864624, "step": 8328 }, { "epoch": 1.5162464731045782, "grad_norm": 8.8125, "learning_rate": 2.9816126907670316e-06, "loss": 1.5152921676635742, "step": 8330 }, { "epoch": 1.5166105397287704, "grad_norm": 22.875, "learning_rate": 2.98084657583642e-06, "loss": 1.5244202613830566, "step": 8332 }, { "epoch": 1.5169746063529626, "grad_norm": 20.375, "learning_rate": 2.980080463716496e-06, "loss": 0.9391945600509644, "step": 8334 }, { "epoch": 1.517338672977155, "grad_norm": 13.75, "learning_rate": 2.9793143545196833e-06, "loss": 1.6006522178649902, "step": 8336 }, { "epoch": 1.5177027396013472, "grad_norm": 5.65625, "learning_rate": 2.9785482483584055e-06, "loss": 1.3516886234283447, "step": 8338 }, { "epoch": 1.5180668062255394, "grad_norm": 5.4375, "learning_rate": 2.9777821453450855e-06, "loss": 1.4529610872268677, "step": 8340 }, { "epoch": 1.5184308728497315, "grad_norm": 10.0625, "learning_rate": 2.9770160455921448e-06, "loss": 0.9903704524040222, "step": 8342 }, { "epoch": 1.5187949394739237, "grad_norm": 17.875, "learning_rate": 2.9762499492120062e-06, "loss": 1.4350577592849731, "step": 8344 }, { "epoch": 1.519159006098116, "grad_norm": 8.3125, "learning_rate": 2.9754838563170923e-06, "loss": 1.452880859375, "step": 8346 }, { "epoch": 1.5195230727223081, "grad_norm": 5.84375, "learning_rate": 2.9747177670198225e-06, "loss": 1.024656057357788, "step": 8348 }, { "epoch": 1.5198871393465003, "grad_norm": 9.625, "learning_rate": 2.9739516814326185e-06, "loss": 1.3598967790603638, "step": 8350 }, { "epoch": 1.5202512059706925, "grad_norm": 10.6875, "learning_rate": 2.9731855996678987e-06, "loss": 1.1387789249420166, "step": 8352 }, { "epoch": 1.5206152725948847, "grad_norm": 32.75, "learning_rate": 2.9724195218380837e-06, "loss": 1.6009889841079712, "step": 8354 }, { "epoch": 1.5209793392190771, "grad_norm": 11.6875, "learning_rate": 2.9716534480555924e-06, "loss": 1.5074760913848877, "step": 8356 }, { "epoch": 1.5213434058432693, "grad_norm": 13.9375, "learning_rate": 2.970887378432843e-06, "loss": 1.5610768795013428, "step": 8358 }, { "epoch": 1.5217074724674615, "grad_norm": 11.75, "learning_rate": 2.970121313082253e-06, "loss": 1.690721869468689, "step": 8360 }, { "epoch": 1.522071539091654, "grad_norm": 17.5, "learning_rate": 2.969355252116238e-06, "loss": 1.8271783590316772, "step": 8362 }, { "epoch": 1.5224356057158461, "grad_norm": 11.75, "learning_rate": 2.9685891956472163e-06, "loss": 1.4870573282241821, "step": 8364 }, { "epoch": 1.5227996723400383, "grad_norm": 29.125, "learning_rate": 2.967823143787604e-06, "loss": 1.5060391426086426, "step": 8366 }, { "epoch": 1.5231637389642305, "grad_norm": 12.5, "learning_rate": 2.9670570966498138e-06, "loss": 1.550807237625122, "step": 8368 }, { "epoch": 1.5235278055884227, "grad_norm": 10.625, "learning_rate": 2.966291054346261e-06, "loss": 1.4581334590911865, "step": 8370 }, { "epoch": 1.523891872212615, "grad_norm": 42.5, "learning_rate": 2.965525016989359e-06, "loss": 1.057798147201538, "step": 8372 }, { "epoch": 1.524255938836807, "grad_norm": 13.75, "learning_rate": 2.9647589846915214e-06, "loss": 1.9072461128234863, "step": 8374 }, { "epoch": 1.5246200054609993, "grad_norm": 10.4375, "learning_rate": 2.9639929575651604e-06, "loss": 1.8109527826309204, "step": 8376 }, { "epoch": 1.5249840720851915, "grad_norm": 10.375, "learning_rate": 2.9632269357226866e-06, "loss": 1.144439935684204, "step": 8378 }, { "epoch": 1.5253481387093837, "grad_norm": 10.875, "learning_rate": 2.962460919276512e-06, "loss": 1.5001131296157837, "step": 8380 }, { "epoch": 1.525712205333576, "grad_norm": 39.5, "learning_rate": 2.9616949083390433e-06, "loss": 1.561955213546753, "step": 8382 }, { "epoch": 1.5260762719577683, "grad_norm": 9.4375, "learning_rate": 2.9609289030226927e-06, "loss": 1.3002409934997559, "step": 8384 }, { "epoch": 1.5264403385819605, "grad_norm": 22.875, "learning_rate": 2.960162903439868e-06, "loss": 1.0219875574111938, "step": 8386 }, { "epoch": 1.5268044052061527, "grad_norm": 233.0, "learning_rate": 2.9593969097029743e-06, "loss": 1.0393669605255127, "step": 8388 }, { "epoch": 1.527168471830345, "grad_norm": 10.25, "learning_rate": 2.9586309219244214e-06, "loss": 1.3701894283294678, "step": 8390 }, { "epoch": 1.5275325384545373, "grad_norm": 5.03125, "learning_rate": 2.9578649402166117e-06, "loss": 1.283165693283081, "step": 8392 }, { "epoch": 1.5278966050787295, "grad_norm": 14.125, "learning_rate": 2.95709896469195e-06, "loss": 1.4996963739395142, "step": 8394 }, { "epoch": 1.5282606717029217, "grad_norm": 10.9375, "learning_rate": 2.956332995462844e-06, "loss": 1.4978924989700317, "step": 8396 }, { "epoch": 1.5286247383271139, "grad_norm": 25.125, "learning_rate": 2.9555670326416922e-06, "loss": 1.7476577758789062, "step": 8398 }, { "epoch": 1.528988804951306, "grad_norm": 16.375, "learning_rate": 2.9548010763408996e-06, "loss": 1.6362634897232056, "step": 8400 }, { "epoch": 1.5293528715754983, "grad_norm": 16.125, "learning_rate": 2.954035126672865e-06, "loss": 1.8842966556549072, "step": 8402 }, { "epoch": 1.5297169381996905, "grad_norm": 13.6875, "learning_rate": 2.9532691837499887e-06, "loss": 1.5491468906402588, "step": 8404 }, { "epoch": 1.5300810048238827, "grad_norm": 12.9375, "learning_rate": 2.9525032476846717e-06, "loss": 1.4289557933807373, "step": 8406 }, { "epoch": 1.5304450714480748, "grad_norm": 10.3125, "learning_rate": 2.9517373185893093e-06, "loss": 1.4095780849456787, "step": 8408 }, { "epoch": 1.5308091380722673, "grad_norm": 14.0625, "learning_rate": 2.9509713965763015e-06, "loss": 1.4054762125015259, "step": 8410 }, { "epoch": 1.5311732046964595, "grad_norm": 17.5, "learning_rate": 2.9502054817580407e-06, "loss": 1.4419529438018799, "step": 8412 }, { "epoch": 1.5315372713206516, "grad_norm": 7.5, "learning_rate": 2.949439574246924e-06, "loss": 1.496908187866211, "step": 8414 }, { "epoch": 1.531901337944844, "grad_norm": 17.25, "learning_rate": 2.9486736741553457e-06, "loss": 1.5230573415756226, "step": 8416 }, { "epoch": 1.5322654045690363, "grad_norm": 10.9375, "learning_rate": 2.947907781595698e-06, "loss": 1.5322203636169434, "step": 8418 }, { "epoch": 1.5326294711932285, "grad_norm": 3.625, "learning_rate": 2.947141896680372e-06, "loss": 0.8272435665130615, "step": 8420 }, { "epoch": 1.5329935378174206, "grad_norm": 40.5, "learning_rate": 2.9463760195217573e-06, "loss": 0.3384915590286255, "step": 8422 }, { "epoch": 1.5333576044416128, "grad_norm": 17.75, "learning_rate": 2.945610150232245e-06, "loss": 0.5723781585693359, "step": 8424 }, { "epoch": 1.533721671065805, "grad_norm": 31.25, "learning_rate": 2.9448442889242234e-06, "loss": 0.6992935538291931, "step": 8426 }, { "epoch": 1.5340857376899972, "grad_norm": 6.84375, "learning_rate": 2.9440784357100784e-06, "loss": 1.1146118640899658, "step": 8428 }, { "epoch": 1.5344498043141894, "grad_norm": 14.875, "learning_rate": 2.9433125907021966e-06, "loss": 1.372719407081604, "step": 8430 }, { "epoch": 1.5348138709383816, "grad_norm": 15.125, "learning_rate": 2.9425467540129615e-06, "loss": 1.4509079456329346, "step": 8432 }, { "epoch": 1.5351779375625738, "grad_norm": 10.125, "learning_rate": 2.9417809257547577e-06, "loss": 1.4932427406311035, "step": 8434 }, { "epoch": 1.5355420041867662, "grad_norm": 14.3125, "learning_rate": 2.941015106039968e-06, "loss": 1.5959903001785278, "step": 8436 }, { "epoch": 1.5359060708109584, "grad_norm": 11.375, "learning_rate": 2.9402492949809714e-06, "loss": 1.4296441078186035, "step": 8438 }, { "epoch": 1.5362701374351506, "grad_norm": 9.75, "learning_rate": 2.9394834926901496e-06, "loss": 1.6396554708480835, "step": 8440 }, { "epoch": 1.5366342040593428, "grad_norm": 17.375, "learning_rate": 2.9387176992798794e-06, "loss": 1.2531663179397583, "step": 8442 }, { "epoch": 1.5369982706835352, "grad_norm": 46.5, "learning_rate": 2.937951914862538e-06, "loss": 1.5916471481323242, "step": 8444 }, { "epoch": 1.5373623373077274, "grad_norm": 11.75, "learning_rate": 2.9371861395505018e-06, "loss": 1.412388563156128, "step": 8446 }, { "epoch": 1.5377264039319196, "grad_norm": 34.0, "learning_rate": 2.936420373456146e-06, "loss": 1.6131799221038818, "step": 8448 }, { "epoch": 1.5380904705561118, "grad_norm": 33.5, "learning_rate": 2.935654616691842e-06, "loss": 1.6662569046020508, "step": 8450 }, { "epoch": 1.538454537180304, "grad_norm": 7.5625, "learning_rate": 2.934888869369962e-06, "loss": 1.488200068473816, "step": 8452 }, { "epoch": 1.5388186038044962, "grad_norm": 8.75, "learning_rate": 2.9341231316028767e-06, "loss": 1.3250789642333984, "step": 8454 }, { "epoch": 1.5391826704286884, "grad_norm": 9.375, "learning_rate": 2.9333574035029554e-06, "loss": 1.4266244173049927, "step": 8456 }, { "epoch": 1.5395467370528806, "grad_norm": 8.375, "learning_rate": 2.932591685182565e-06, "loss": 1.4169809818267822, "step": 8458 }, { "epoch": 1.5399108036770728, "grad_norm": 5.53125, "learning_rate": 2.9318259767540717e-06, "loss": 1.1201229095458984, "step": 8460 }, { "epoch": 1.540274870301265, "grad_norm": 14.0625, "learning_rate": 2.931060278329839e-06, "loss": 1.3146371841430664, "step": 8462 }, { "epoch": 1.5406389369254574, "grad_norm": 17.25, "learning_rate": 2.9302945900222314e-06, "loss": 1.7455590963363647, "step": 8464 }, { "epoch": 1.5410030035496496, "grad_norm": 60.0, "learning_rate": 2.929528911943611e-06, "loss": 1.6149749755859375, "step": 8466 }, { "epoch": 1.5413670701738418, "grad_norm": 25.125, "learning_rate": 2.9287632442063375e-06, "loss": 1.7968575954437256, "step": 8468 }, { "epoch": 1.5417311367980342, "grad_norm": 5.875, "learning_rate": 2.9279975869227684e-06, "loss": 1.500647783279419, "step": 8470 }, { "epoch": 1.5420952034222264, "grad_norm": 13.4375, "learning_rate": 2.9272319402052614e-06, "loss": 1.484393835067749, "step": 8472 }, { "epoch": 1.5424592700464186, "grad_norm": 10.875, "learning_rate": 2.926466304166172e-06, "loss": 1.5424009561538696, "step": 8474 }, { "epoch": 1.5428233366706108, "grad_norm": 44.5, "learning_rate": 2.9257006789178554e-06, "loss": 1.6324516534805298, "step": 8476 }, { "epoch": 1.543187403294803, "grad_norm": 29.375, "learning_rate": 2.9249350645726627e-06, "loss": 2.130366563796997, "step": 8478 }, { "epoch": 1.5435514699189952, "grad_norm": 92.0, "learning_rate": 2.9241694612429455e-06, "loss": 1.7553602457046509, "step": 8480 }, { "epoch": 1.5439155365431874, "grad_norm": 21.375, "learning_rate": 2.9234038690410517e-06, "loss": 1.1325058937072754, "step": 8482 }, { "epoch": 1.5442796031673796, "grad_norm": 13.0625, "learning_rate": 2.92263828807933e-06, "loss": 1.5293290615081787, "step": 8484 }, { "epoch": 1.5446436697915717, "grad_norm": 12.4375, "learning_rate": 2.921872718470126e-06, "loss": 1.4125573635101318, "step": 8486 }, { "epoch": 1.545007736415764, "grad_norm": 11.5625, "learning_rate": 2.9211071603257845e-06, "loss": 1.1901652812957764, "step": 8488 }, { "epoch": 1.5453718030399564, "grad_norm": 8.6875, "learning_rate": 2.920341613758647e-06, "loss": 0.376153826713562, "step": 8490 }, { "epoch": 1.5457358696641486, "grad_norm": 8.625, "learning_rate": 2.9195760788810547e-06, "loss": 1.1818699836730957, "step": 8492 }, { "epoch": 1.5460999362883407, "grad_norm": 9.9375, "learning_rate": 2.9188105558053464e-06, "loss": 1.3671725988388062, "step": 8494 }, { "epoch": 1.546464002912533, "grad_norm": 10.9375, "learning_rate": 2.9180450446438613e-06, "loss": 1.3944634199142456, "step": 8496 }, { "epoch": 1.5468280695367254, "grad_norm": 7.5, "learning_rate": 2.917279545508933e-06, "loss": 1.6090569496154785, "step": 8498 }, { "epoch": 1.5471921361609176, "grad_norm": 12.0, "learning_rate": 2.916514058512897e-06, "loss": 1.305992603302002, "step": 8500 }, { "epoch": 1.5475562027851097, "grad_norm": 11.0, "learning_rate": 2.915748583768083e-06, "loss": 0.889894962310791, "step": 8502 }, { "epoch": 1.547920269409302, "grad_norm": 6.1875, "learning_rate": 2.9149831213868235e-06, "loss": 1.3305896520614624, "step": 8504 }, { "epoch": 1.5482843360334941, "grad_norm": 11.75, "learning_rate": 2.9142176714814474e-06, "loss": 1.4135520458221436, "step": 8506 }, { "epoch": 1.5486484026576863, "grad_norm": 6.15625, "learning_rate": 2.913452234164279e-06, "loss": 1.272111415863037, "step": 8508 }, { "epoch": 1.5490124692818785, "grad_norm": 10.875, "learning_rate": 2.9126868095476457e-06, "loss": 1.367983341217041, "step": 8510 }, { "epoch": 1.5493765359060707, "grad_norm": 12.1875, "learning_rate": 2.9119213977438686e-06, "loss": 1.951696753501892, "step": 8512 }, { "epoch": 1.549740602530263, "grad_norm": 39.75, "learning_rate": 2.911155998865269e-06, "loss": 1.373100996017456, "step": 8514 }, { "epoch": 1.550104669154455, "grad_norm": 12.125, "learning_rate": 2.910390613024168e-06, "loss": 1.467010498046875, "step": 8516 }, { "epoch": 1.5504687357786475, "grad_norm": 7.65625, "learning_rate": 2.9096252403328794e-06, "loss": 1.3421916961669922, "step": 8518 }, { "epoch": 1.5508328024028397, "grad_norm": 10.25, "learning_rate": 2.9088598809037227e-06, "loss": 1.6727701425552368, "step": 8520 }, { "epoch": 1.551196869027032, "grad_norm": 6.96875, "learning_rate": 2.9080945348490074e-06, "loss": 1.373345136642456, "step": 8522 }, { "epoch": 1.5515609356512243, "grad_norm": 3.78125, "learning_rate": 2.9073292022810463e-06, "loss": 1.0632575750350952, "step": 8524 }, { "epoch": 1.5519250022754165, "grad_norm": 19.625, "learning_rate": 2.9065638833121505e-06, "loss": 1.0813533067703247, "step": 8526 }, { "epoch": 1.5522890688996087, "grad_norm": 22.625, "learning_rate": 2.905798578054626e-06, "loss": 1.6733286380767822, "step": 8528 }, { "epoch": 1.552653135523801, "grad_norm": 43.0, "learning_rate": 2.9050332866207786e-06, "loss": 1.6042594909667969, "step": 8530 }, { "epoch": 1.553017202147993, "grad_norm": 11.375, "learning_rate": 2.90426800912291e-06, "loss": 1.4242624044418335, "step": 8532 }, { "epoch": 1.5533812687721853, "grad_norm": 6.75, "learning_rate": 2.9035027456733246e-06, "loss": 1.436721682548523, "step": 8534 }, { "epoch": 1.5537453353963775, "grad_norm": 10.8125, "learning_rate": 2.9027374963843203e-06, "loss": 1.2582474946975708, "step": 8536 }, { "epoch": 1.5541094020205697, "grad_norm": 9.875, "learning_rate": 2.9019722613681933e-06, "loss": 0.9072771668434143, "step": 8538 }, { "epoch": 1.5544734686447619, "grad_norm": 47.75, "learning_rate": 2.9012070407372405e-06, "loss": 1.0377178192138672, "step": 8540 }, { "epoch": 1.554837535268954, "grad_norm": 13.9375, "learning_rate": 2.9004418346037534e-06, "loss": 1.526257872581482, "step": 8542 }, { "epoch": 1.5552016018931465, "grad_norm": 12.0, "learning_rate": 2.899676643080024e-06, "loss": 1.6981232166290283, "step": 8544 }, { "epoch": 1.5555656685173387, "grad_norm": 11.0625, "learning_rate": 2.8989114662783403e-06, "loss": 1.436377763748169, "step": 8546 }, { "epoch": 1.5559297351415309, "grad_norm": 6.9375, "learning_rate": 2.898146304310989e-06, "loss": 1.0867383480072021, "step": 8548 }, { "epoch": 1.556293801765723, "grad_norm": 8.625, "learning_rate": 2.8973811572902567e-06, "loss": 0.9838950037956238, "step": 8550 }, { "epoch": 1.5566578683899155, "grad_norm": 169.0, "learning_rate": 2.8966160253284213e-06, "loss": 0.7961543798446655, "step": 8552 }, { "epoch": 1.5570219350141077, "grad_norm": 8.375, "learning_rate": 2.8958509085377663e-06, "loss": 0.9839916229248047, "step": 8554 }, { "epoch": 1.5573860016382999, "grad_norm": 24.125, "learning_rate": 2.895085807030568e-06, "loss": 1.0826778411865234, "step": 8556 }, { "epoch": 1.557750068262492, "grad_norm": 32.75, "learning_rate": 2.8943207209191025e-06, "loss": 1.559889554977417, "step": 8558 }, { "epoch": 1.5581141348866843, "grad_norm": 24.625, "learning_rate": 2.893555650315644e-06, "loss": 1.548248052597046, "step": 8560 }, { "epoch": 1.5584782015108765, "grad_norm": 11.625, "learning_rate": 2.892790595332461e-06, "loss": 1.5722010135650635, "step": 8562 }, { "epoch": 1.5588422681350687, "grad_norm": 11.1875, "learning_rate": 2.8920255560818233e-06, "loss": 1.3703052997589111, "step": 8564 }, { "epoch": 1.5592063347592608, "grad_norm": 24.75, "learning_rate": 2.891260532675999e-06, "loss": 1.5773344039916992, "step": 8566 }, { "epoch": 1.559570401383453, "grad_norm": 8.625, "learning_rate": 2.8904955252272503e-06, "loss": 1.4237834215164185, "step": 8568 }, { "epoch": 1.5599344680076452, "grad_norm": 21.875, "learning_rate": 2.88973053384784e-06, "loss": 2.098597526550293, "step": 8570 }, { "epoch": 1.5602985346318377, "grad_norm": 31.125, "learning_rate": 2.8889655586500258e-06, "loss": 1.4976621866226196, "step": 8572 }, { "epoch": 1.5606626012560298, "grad_norm": 8.0625, "learning_rate": 2.888200599746066e-06, "loss": 1.449190616607666, "step": 8574 }, { "epoch": 1.561026667880222, "grad_norm": 9.8125, "learning_rate": 2.8874356572482164e-06, "loss": 1.4395779371261597, "step": 8576 }, { "epoch": 1.5613907345044145, "grad_norm": 6.375, "learning_rate": 2.886670731268727e-06, "loss": 1.403394341468811, "step": 8578 }, { "epoch": 1.5617548011286067, "grad_norm": 24.25, "learning_rate": 2.885905821919849e-06, "loss": 0.8339476585388184, "step": 8580 }, { "epoch": 1.5621188677527988, "grad_norm": 12.4375, "learning_rate": 2.885140929313829e-06, "loss": 0.4694342017173767, "step": 8582 }, { "epoch": 1.562482934376991, "grad_norm": 13.75, "learning_rate": 2.884376053562911e-06, "loss": 1.5944947004318237, "step": 8584 }, { "epoch": 1.5628470010011832, "grad_norm": 52.0, "learning_rate": 2.8836111947793397e-06, "loss": 1.2944097518920898, "step": 8586 }, { "epoch": 1.5632110676253754, "grad_norm": 3.453125, "learning_rate": 2.8828463530753547e-06, "loss": 1.4292616844177246, "step": 8588 }, { "epoch": 1.5635751342495676, "grad_norm": 10.5625, "learning_rate": 2.882081528563192e-06, "loss": 1.0554499626159668, "step": 8590 }, { "epoch": 1.5639392008737598, "grad_norm": 12.8125, "learning_rate": 2.881316721355086e-06, "loss": 1.5793795585632324, "step": 8592 }, { "epoch": 1.564303267497952, "grad_norm": 5.125, "learning_rate": 2.8805519315632708e-06, "loss": 1.4209654331207275, "step": 8594 }, { "epoch": 1.5646673341221442, "grad_norm": 5.96875, "learning_rate": 2.879787159299976e-06, "loss": 1.1931174993515015, "step": 8596 }, { "epoch": 1.5650314007463366, "grad_norm": 6.875, "learning_rate": 2.8790224046774284e-06, "loss": 1.3480455875396729, "step": 8598 }, { "epoch": 1.5653954673705288, "grad_norm": 11.875, "learning_rate": 2.878257667807852e-06, "loss": 1.5183436870574951, "step": 8600 }, { "epoch": 1.565759533994721, "grad_norm": 8.5625, "learning_rate": 2.87749294880347e-06, "loss": 1.302595615386963, "step": 8602 }, { "epoch": 1.5661236006189134, "grad_norm": 36.25, "learning_rate": 2.8767282477765e-06, "loss": 1.6306260824203491, "step": 8604 }, { "epoch": 1.5664876672431056, "grad_norm": 27.875, "learning_rate": 2.8759635648391628e-06, "loss": 1.7543013095855713, "step": 8606 }, { "epoch": 1.5668517338672978, "grad_norm": 5.71875, "learning_rate": 2.8751989001036674e-06, "loss": 1.1017216444015503, "step": 8608 }, { "epoch": 1.56721580049149, "grad_norm": 14.9375, "learning_rate": 2.874434253682229e-06, "loss": 1.430077314376831, "step": 8610 }, { "epoch": 1.5675798671156822, "grad_norm": 11.0, "learning_rate": 2.8736696256870538e-06, "loss": 1.4390859603881836, "step": 8612 }, { "epoch": 1.5679439337398744, "grad_norm": 4.5, "learning_rate": 2.8729050162303494e-06, "loss": 1.0132466554641724, "step": 8614 }, { "epoch": 1.5683080003640666, "grad_norm": 10.875, "learning_rate": 2.872140425424319e-06, "loss": 1.2170507907867432, "step": 8616 }, { "epoch": 1.5686720669882588, "grad_norm": 8.125, "learning_rate": 2.8713758533811633e-06, "loss": 1.2501498460769653, "step": 8618 }, { "epoch": 1.569036133612451, "grad_norm": 4.65625, "learning_rate": 2.8706113002130796e-06, "loss": 1.261684536933899, "step": 8620 }, { "epoch": 1.5694002002366432, "grad_norm": 7.5625, "learning_rate": 2.8698467660322635e-06, "loss": 1.0582857131958008, "step": 8622 }, { "epoch": 1.5697642668608354, "grad_norm": 11.5, "learning_rate": 2.8690822509509063e-06, "loss": 1.607926845550537, "step": 8624 }, { "epoch": 1.5701283334850278, "grad_norm": 11.1875, "learning_rate": 2.868317755081199e-06, "loss": 0.9916055798530579, "step": 8626 }, { "epoch": 1.57049240010922, "grad_norm": 9.5, "learning_rate": 2.8675532785353274e-06, "loss": 1.2218564748764038, "step": 8628 }, { "epoch": 1.5708564667334122, "grad_norm": 6.5625, "learning_rate": 2.866788821425475e-06, "loss": 1.1856184005737305, "step": 8630 }, { "epoch": 1.5712205333576046, "grad_norm": 20.125, "learning_rate": 2.8660243838638235e-06, "loss": 1.9854960441589355, "step": 8632 }, { "epoch": 1.5715845999817968, "grad_norm": 9.3125, "learning_rate": 2.8652599659625503e-06, "loss": 1.9577082395553589, "step": 8634 }, { "epoch": 1.571948666605989, "grad_norm": 5.0625, "learning_rate": 2.8644955678338315e-06, "loss": 1.0977166891098022, "step": 8636 }, { "epoch": 1.5723127332301812, "grad_norm": 10.625, "learning_rate": 2.8637311895898383e-06, "loss": 1.1508939266204834, "step": 8638 }, { "epoch": 1.5726767998543734, "grad_norm": 10.3125, "learning_rate": 2.8629668313427417e-06, "loss": 1.2506386041641235, "step": 8640 }, { "epoch": 1.5730408664785656, "grad_norm": 12.5, "learning_rate": 2.8622024932047066e-06, "loss": 1.6508686542510986, "step": 8642 }, { "epoch": 1.5734049331027578, "grad_norm": 10.4375, "learning_rate": 2.861438175287897e-06, "loss": 1.4742003679275513, "step": 8644 }, { "epoch": 1.57376899972695, "grad_norm": 2.765625, "learning_rate": 2.860673877704474e-06, "loss": 1.1899914741516113, "step": 8646 }, { "epoch": 1.5741330663511421, "grad_norm": 13.125, "learning_rate": 2.8599096005665948e-06, "loss": 0.8315771818161011, "step": 8648 }, { "epoch": 1.5744971329753343, "grad_norm": 16.625, "learning_rate": 2.859145343986415e-06, "loss": 1.0613441467285156, "step": 8650 }, { "epoch": 1.5748611995995268, "grad_norm": 8.375, "learning_rate": 2.8583811080760836e-06, "loss": 1.2905110120773315, "step": 8652 }, { "epoch": 1.575225266223719, "grad_norm": 13.9375, "learning_rate": 2.857616892947751e-06, "loss": 1.5749127864837646, "step": 8654 }, { "epoch": 1.5755893328479111, "grad_norm": 27.25, "learning_rate": 2.8568526987135624e-06, "loss": 1.4892123937606812, "step": 8656 }, { "epoch": 1.5759533994721036, "grad_norm": 25.375, "learning_rate": 2.8560885254856606e-06, "loss": 0.6337347030639648, "step": 8658 }, { "epoch": 1.5763174660962957, "grad_norm": 9.5625, "learning_rate": 2.855324373376185e-06, "loss": 1.5698970556259155, "step": 8660 }, { "epoch": 1.576681532720488, "grad_norm": 15.125, "learning_rate": 2.8545602424972695e-06, "loss": 1.4183642864227295, "step": 8662 }, { "epoch": 1.5770455993446801, "grad_norm": 7.5, "learning_rate": 2.8537961329610496e-06, "loss": 1.2746975421905518, "step": 8664 }, { "epoch": 1.5774096659688723, "grad_norm": 5.21875, "learning_rate": 2.8530320448796554e-06, "loss": 1.3939204216003418, "step": 8666 }, { "epoch": 1.5777737325930645, "grad_norm": 12.5, "learning_rate": 2.8522679783652125e-06, "loss": 1.3499265909194946, "step": 8668 }, { "epoch": 1.5781377992172567, "grad_norm": 14.375, "learning_rate": 2.8515039335298457e-06, "loss": 1.814496636390686, "step": 8670 }, { "epoch": 1.578501865841449, "grad_norm": 13.375, "learning_rate": 2.8507399104856737e-06, "loss": 1.1691458225250244, "step": 8672 }, { "epoch": 1.578865932465641, "grad_norm": 12.9375, "learning_rate": 2.8499759093448152e-06, "loss": 1.5588127374649048, "step": 8674 }, { "epoch": 1.5792299990898333, "grad_norm": 6.125, "learning_rate": 2.8492119302193845e-06, "loss": 1.2434666156768799, "step": 8676 }, { "epoch": 1.5795940657140257, "grad_norm": 4.46875, "learning_rate": 2.848447973221492e-06, "loss": 1.2844034433364868, "step": 8678 }, { "epoch": 1.579958132338218, "grad_norm": 3.96875, "learning_rate": 2.8476840384632455e-06, "loss": 1.0985606908798218, "step": 8680 }, { "epoch": 1.58032219896241, "grad_norm": 14.0625, "learning_rate": 2.846920126056748e-06, "loss": 1.4552021026611328, "step": 8682 }, { "epoch": 1.5806862655866023, "grad_norm": 4.53125, "learning_rate": 2.8461562361141017e-06, "loss": 1.0634877681732178, "step": 8684 }, { "epoch": 1.5810503322107947, "grad_norm": 19.125, "learning_rate": 2.8453923687474055e-06, "loss": 1.027315616607666, "step": 8686 }, { "epoch": 1.581414398834987, "grad_norm": 15.625, "learning_rate": 2.844628524068752e-06, "loss": 0.6117433905601501, "step": 8688 }, { "epoch": 1.581778465459179, "grad_norm": 44.5, "learning_rate": 2.843864702190233e-06, "loss": 1.6204490661621094, "step": 8690 }, { "epoch": 1.5821425320833713, "grad_norm": 11.8125, "learning_rate": 2.843100903223936e-06, "loss": 1.3569025993347168, "step": 8692 }, { "epoch": 1.5825065987075635, "grad_norm": 40.75, "learning_rate": 2.842337127281945e-06, "loss": 1.1576783657073975, "step": 8694 }, { "epoch": 1.5828706653317557, "grad_norm": 18.75, "learning_rate": 2.8415733744763428e-06, "loss": 1.6060115098953247, "step": 8696 }, { "epoch": 1.5832347319559479, "grad_norm": 7.71875, "learning_rate": 2.840809644919206e-06, "loss": 1.812389850616455, "step": 8698 }, { "epoch": 1.58359879858014, "grad_norm": 30.25, "learning_rate": 2.8400459387226075e-06, "loss": 1.0458847284317017, "step": 8700 }, { "epoch": 1.5839628652043323, "grad_norm": 10.25, "learning_rate": 2.83928225599862e-06, "loss": 1.7215838432312012, "step": 8702 }, { "epoch": 1.5843269318285245, "grad_norm": 15.75, "learning_rate": 2.8385185968593087e-06, "loss": 1.4653116464614868, "step": 8704 }, { "epoch": 1.5846909984527169, "grad_norm": 16.375, "learning_rate": 2.837754961416741e-06, "loss": 1.867810845375061, "step": 8706 }, { "epoch": 1.585055065076909, "grad_norm": 7.34375, "learning_rate": 2.836991349782974e-06, "loss": 1.445216178894043, "step": 8708 }, { "epoch": 1.5854191317011013, "grad_norm": 10.6875, "learning_rate": 2.8362277620700663e-06, "loss": 1.335452914237976, "step": 8710 }, { "epoch": 1.5857831983252937, "grad_norm": 15.4375, "learning_rate": 2.83546419839007e-06, "loss": 1.5468621253967285, "step": 8712 }, { "epoch": 1.5861472649494859, "grad_norm": 14.375, "learning_rate": 2.8347006588550363e-06, "loss": 1.3194500207901, "step": 8714 }, { "epoch": 1.586511331573678, "grad_norm": 10.625, "learning_rate": 2.8339371435770114e-06, "loss": 1.390432357788086, "step": 8716 }, { "epoch": 1.5868753981978703, "grad_norm": 17.25, "learning_rate": 2.8331736526680375e-06, "loss": 1.5081697702407837, "step": 8718 }, { "epoch": 1.5872394648220625, "grad_norm": 18.125, "learning_rate": 2.832410186240153e-06, "loss": 1.4467625617980957, "step": 8720 }, { "epoch": 1.5876035314462547, "grad_norm": 18.25, "learning_rate": 2.8316467444053958e-06, "loss": 1.5970709323883057, "step": 8722 }, { "epoch": 1.5879675980704469, "grad_norm": 18.625, "learning_rate": 2.8308833272757953e-06, "loss": 1.835404634475708, "step": 8724 }, { "epoch": 1.588331664694639, "grad_norm": 12.9375, "learning_rate": 2.830119934963382e-06, "loss": 1.5871875286102295, "step": 8726 }, { "epoch": 1.5886957313188312, "grad_norm": 7.3125, "learning_rate": 2.8293565675801804e-06, "loss": 1.3618942499160767, "step": 8728 }, { "epoch": 1.5890597979430234, "grad_norm": 23.75, "learning_rate": 2.8285932252382096e-06, "loss": 1.1385767459869385, "step": 8730 }, { "epoch": 1.5894238645672158, "grad_norm": 11.125, "learning_rate": 2.82782990804949e-06, "loss": 1.374788522720337, "step": 8732 }, { "epoch": 1.589787931191408, "grad_norm": 11.5, "learning_rate": 2.8270666161260318e-06, "loss": 1.2342286109924316, "step": 8734 }, { "epoch": 1.5901519978156002, "grad_norm": 33.5, "learning_rate": 2.826303349579848e-06, "loss": 1.2460260391235352, "step": 8736 }, { "epoch": 1.5905160644397924, "grad_norm": 21.875, "learning_rate": 2.8255401085229434e-06, "loss": 0.7765758633613586, "step": 8738 }, { "epoch": 1.5908801310639848, "grad_norm": 8.125, "learning_rate": 2.824776893067321e-06, "loss": 1.0403656959533691, "step": 8740 }, { "epoch": 1.591244197688177, "grad_norm": 9.6875, "learning_rate": 2.8240137033249793e-06, "loss": 1.5699713230133057, "step": 8742 }, { "epoch": 1.5916082643123692, "grad_norm": 11.0, "learning_rate": 2.823250539407913e-06, "loss": 1.5276795625686646, "step": 8744 }, { "epoch": 1.5919723309365614, "grad_norm": 49.0, "learning_rate": 2.8224874014281146e-06, "loss": 1.6743950843811035, "step": 8746 }, { "epoch": 1.5923363975607536, "grad_norm": 18.875, "learning_rate": 2.82172428949757e-06, "loss": 1.6582130193710327, "step": 8748 }, { "epoch": 1.5927004641849458, "grad_norm": 15.75, "learning_rate": 2.8209612037282634e-06, "loss": 1.7769865989685059, "step": 8750 }, { "epoch": 1.593064530809138, "grad_norm": 8.8125, "learning_rate": 2.8201981442321744e-06, "loss": 1.7454893589019775, "step": 8752 }, { "epoch": 1.5934285974333302, "grad_norm": 14.9375, "learning_rate": 2.819435111121279e-06, "loss": 1.467721939086914, "step": 8754 }, { "epoch": 1.5937926640575224, "grad_norm": 14.5, "learning_rate": 2.8186721045075504e-06, "loss": 1.4269483089447021, "step": 8756 }, { "epoch": 1.5941567306817146, "grad_norm": 16.625, "learning_rate": 2.8179091245029557e-06, "loss": 1.0955188274383545, "step": 8758 }, { "epoch": 1.594520797305907, "grad_norm": 15.3125, "learning_rate": 2.817146171219458e-06, "loss": 0.6740895509719849, "step": 8760 }, { "epoch": 1.5948848639300992, "grad_norm": 8.9375, "learning_rate": 2.8163832447690187e-06, "loss": 1.601981520652771, "step": 8762 }, { "epoch": 1.5952489305542914, "grad_norm": 6.5625, "learning_rate": 2.8156203452635943e-06, "loss": 1.1408567428588867, "step": 8764 }, { "epoch": 1.5956129971784838, "grad_norm": 11.5, "learning_rate": 2.814857472815137e-06, "loss": 1.4998714923858643, "step": 8766 }, { "epoch": 1.595977063802676, "grad_norm": 12.5, "learning_rate": 2.814094627535595e-06, "loss": 1.8614895343780518, "step": 8768 }, { "epoch": 1.5963411304268682, "grad_norm": 10.5625, "learning_rate": 2.8133318095369133e-06, "loss": 0.9073572158813477, "step": 8770 }, { "epoch": 1.5967051970510604, "grad_norm": 7.0625, "learning_rate": 2.8125690189310324e-06, "loss": 1.1940504312515259, "step": 8772 }, { "epoch": 1.5970692636752526, "grad_norm": 15.625, "learning_rate": 2.8118062558298876e-06, "loss": 1.5961065292358398, "step": 8774 }, { "epoch": 1.5974333302994448, "grad_norm": 15.4375, "learning_rate": 2.811043520345412e-06, "loss": 1.5292340517044067, "step": 8776 }, { "epoch": 1.597797396923637, "grad_norm": 79.0, "learning_rate": 2.8102808125895352e-06, "loss": 0.8951650857925415, "step": 8778 }, { "epoch": 1.5981614635478292, "grad_norm": 26.0, "learning_rate": 2.8095181326741794e-06, "loss": 1.8279677629470825, "step": 8780 }, { "epoch": 1.5985255301720214, "grad_norm": 9.3125, "learning_rate": 2.808755480711266e-06, "loss": 1.4210129976272583, "step": 8782 }, { "epoch": 1.5988895967962136, "grad_norm": 105.5, "learning_rate": 2.8079928568127103e-06, "loss": 1.294496774673462, "step": 8784 }, { "epoch": 1.599253663420406, "grad_norm": 36.5, "learning_rate": 2.807230261090426e-06, "loss": 1.3026769161224365, "step": 8786 }, { "epoch": 1.5996177300445982, "grad_norm": 9.375, "learning_rate": 2.8064676936563177e-06, "loss": 1.5896800756454468, "step": 8788 }, { "epoch": 1.5999817966687904, "grad_norm": 7.5625, "learning_rate": 2.8057051546222926e-06, "loss": 1.3268531560897827, "step": 8790 }, { "epoch": 1.6003458632929826, "grad_norm": 21.75, "learning_rate": 2.804942644100248e-06, "loss": 1.7382636070251465, "step": 8792 }, { "epoch": 1.600709929917175, "grad_norm": 9.25, "learning_rate": 2.804180162202079e-06, "loss": 1.4696338176727295, "step": 8794 }, { "epoch": 1.6010739965413672, "grad_norm": 7.25, "learning_rate": 2.803417709039679e-06, "loss": 1.1101782321929932, "step": 8796 }, { "epoch": 1.6014380631655594, "grad_norm": 9.3125, "learning_rate": 2.8026552847249334e-06, "loss": 1.4599006175994873, "step": 8798 }, { "epoch": 1.6018021297897516, "grad_norm": 5.59375, "learning_rate": 2.8018928893697245e-06, "loss": 1.1237291097640991, "step": 8800 }, { "epoch": 1.6021661964139438, "grad_norm": 12.375, "learning_rate": 2.8011305230859315e-06, "loss": 1.5153921842575073, "step": 8802 }, { "epoch": 1.602530263038136, "grad_norm": 16.375, "learning_rate": 2.8003681859854276e-06, "loss": 1.4775722026824951, "step": 8804 }, { "epoch": 1.6028943296623281, "grad_norm": 15.4375, "learning_rate": 2.7996058781800845e-06, "loss": 1.6583020687103271, "step": 8806 }, { "epoch": 1.6032583962865203, "grad_norm": 9.0625, "learning_rate": 2.7988435997817666e-06, "loss": 1.7370648384094238, "step": 8808 }, { "epoch": 1.6036224629107125, "grad_norm": 5.21875, "learning_rate": 2.7980813509023343e-06, "loss": 0.975601851940155, "step": 8810 }, { "epoch": 1.6039865295349047, "grad_norm": 11.125, "learning_rate": 2.797319131653646e-06, "loss": 1.0002918243408203, "step": 8812 }, { "epoch": 1.6043505961590971, "grad_norm": 9.4375, "learning_rate": 2.796556942147553e-06, "loss": 1.1703486442565918, "step": 8814 }, { "epoch": 1.6047146627832893, "grad_norm": 9.5625, "learning_rate": 2.7957947824959055e-06, "loss": 1.5182147026062012, "step": 8816 }, { "epoch": 1.6050787294074815, "grad_norm": 14.75, "learning_rate": 2.7950326528105455e-06, "loss": 1.521026611328125, "step": 8818 }, { "epoch": 1.605442796031674, "grad_norm": 59.25, "learning_rate": 2.7942705532033128e-06, "loss": 1.3177852630615234, "step": 8820 }, { "epoch": 1.6058068626558661, "grad_norm": 147.0, "learning_rate": 2.7935084837860427e-06, "loss": 1.8035461902618408, "step": 8822 }, { "epoch": 1.6061709292800583, "grad_norm": 14.0, "learning_rate": 2.7927464446705654e-06, "loss": 1.5627117156982422, "step": 8824 }, { "epoch": 1.6065349959042505, "grad_norm": 44.0, "learning_rate": 2.791984435968709e-06, "loss": 2.0490307807922363, "step": 8826 }, { "epoch": 1.6068990625284427, "grad_norm": 9.625, "learning_rate": 2.791222457792292e-06, "loss": 1.780914545059204, "step": 8828 }, { "epoch": 1.607263129152635, "grad_norm": 9.875, "learning_rate": 2.790460510253134e-06, "loss": 1.4170982837677002, "step": 8830 }, { "epoch": 1.6076271957768271, "grad_norm": 21.75, "learning_rate": 2.7896985934630467e-06, "loss": 1.4862158298492432, "step": 8832 }, { "epoch": 1.6079912624010193, "grad_norm": 40.0, "learning_rate": 2.7889367075338382e-06, "loss": 1.8221937417984009, "step": 8834 }, { "epoch": 1.6083553290252115, "grad_norm": 21.25, "learning_rate": 2.788174852577313e-06, "loss": 1.5980892181396484, "step": 8836 }, { "epoch": 1.6087193956494037, "grad_norm": 7.4375, "learning_rate": 2.7874130287052697e-06, "loss": 0.985620379447937, "step": 8838 }, { "epoch": 1.609083462273596, "grad_norm": 6.84375, "learning_rate": 2.7866512360295027e-06, "loss": 0.9243313074111938, "step": 8840 }, { "epoch": 1.6094475288977883, "grad_norm": 7.8125, "learning_rate": 2.7858894746618025e-06, "loss": 0.6877187490463257, "step": 8842 }, { "epoch": 1.6098115955219805, "grad_norm": 10.25, "learning_rate": 2.7851277447139536e-06, "loss": 1.517800211906433, "step": 8844 }, { "epoch": 1.610175662146173, "grad_norm": 10.375, "learning_rate": 2.784366046297738e-06, "loss": 1.6749703884124756, "step": 8846 }, { "epoch": 1.610539728770365, "grad_norm": 23.25, "learning_rate": 2.783604379524931e-06, "loss": 1.4349150657653809, "step": 8848 }, { "epoch": 1.6109037953945573, "grad_norm": 16.875, "learning_rate": 2.782842744507304e-06, "loss": 1.266967535018921, "step": 8850 }, { "epoch": 1.6112678620187495, "grad_norm": 10.5625, "learning_rate": 2.7820811413566247e-06, "loss": 1.286463737487793, "step": 8852 }, { "epoch": 1.6116319286429417, "grad_norm": 12.25, "learning_rate": 2.781319570184654e-06, "loss": 1.4116493463516235, "step": 8854 }, { "epoch": 1.6119959952671339, "grad_norm": 11.25, "learning_rate": 2.780558031103151e-06, "loss": 1.4223873615264893, "step": 8856 }, { "epoch": 1.612360061891326, "grad_norm": 6.625, "learning_rate": 2.7797965242238667e-06, "loss": 1.5108402967453003, "step": 8858 }, { "epoch": 1.6127241285155183, "grad_norm": 9.75, "learning_rate": 2.7790350496585516e-06, "loss": 1.3979135751724243, "step": 8860 }, { "epoch": 1.6130881951397105, "grad_norm": 6.28125, "learning_rate": 2.7782736075189464e-06, "loss": 1.3624908924102783, "step": 8862 }, { "epoch": 1.6134522617639027, "grad_norm": 12.75, "learning_rate": 2.77751219791679e-06, "loss": 1.5869847536087036, "step": 8864 }, { "epoch": 1.6138163283880949, "grad_norm": 10.4375, "learning_rate": 2.776750820963818e-06, "loss": 1.573920488357544, "step": 8866 }, { "epoch": 1.6141803950122873, "grad_norm": 18.375, "learning_rate": 2.7759894767717577e-06, "loss": 1.4915132522583008, "step": 8868 }, { "epoch": 1.6145444616364795, "grad_norm": 19.75, "learning_rate": 2.7752281654523348e-06, "loss": 1.509536623954773, "step": 8870 }, { "epoch": 1.6149085282606717, "grad_norm": 13.9375, "learning_rate": 2.7744668871172663e-06, "loss": 0.956229567527771, "step": 8872 }, { "epoch": 1.615272594884864, "grad_norm": 3.765625, "learning_rate": 2.7737056418782692e-06, "loss": 1.08854079246521, "step": 8874 }, { "epoch": 1.6156366615090563, "grad_norm": 13.75, "learning_rate": 2.772944429847052e-06, "loss": 1.0453836917877197, "step": 8876 }, { "epoch": 1.6160007281332485, "grad_norm": 13.5, "learning_rate": 2.7721832511353184e-06, "loss": 1.5481067895889282, "step": 8878 }, { "epoch": 1.6163647947574407, "grad_norm": 25.375, "learning_rate": 2.771422105854771e-06, "loss": 1.7072434425354004, "step": 8880 }, { "epoch": 1.6167288613816329, "grad_norm": 22.5, "learning_rate": 2.7706609941171025e-06, "loss": 1.2968029975891113, "step": 8882 }, { "epoch": 1.617092928005825, "grad_norm": 18.0, "learning_rate": 2.7698999160340034e-06, "loss": 1.6372756958007812, "step": 8884 }, { "epoch": 1.6174569946300172, "grad_norm": 14.4375, "learning_rate": 2.7691388717171595e-06, "loss": 1.9359009265899658, "step": 8886 }, { "epoch": 1.6178210612542094, "grad_norm": 61.0, "learning_rate": 2.7683778612782496e-06, "loss": 1.2515411376953125, "step": 8888 }, { "epoch": 1.6181851278784016, "grad_norm": 9.9375, "learning_rate": 2.767616884828952e-06, "loss": 0.6741016507148743, "step": 8890 }, { "epoch": 1.6185491945025938, "grad_norm": 12.75, "learning_rate": 2.7668559424809332e-06, "loss": 1.3022091388702393, "step": 8892 }, { "epoch": 1.6189132611267862, "grad_norm": 28.625, "learning_rate": 2.7660950343458597e-06, "loss": 1.689863920211792, "step": 8894 }, { "epoch": 1.6192773277509784, "grad_norm": 5.71875, "learning_rate": 2.7653341605353933e-06, "loss": 0.8395198583602905, "step": 8896 }, { "epoch": 1.6196413943751706, "grad_norm": 4.625, "learning_rate": 2.764573321161187e-06, "loss": 1.459787368774414, "step": 8898 }, { "epoch": 1.620005460999363, "grad_norm": 3.453125, "learning_rate": 2.7638125163348916e-06, "loss": 1.001482367515564, "step": 8900 }, { "epoch": 1.6203695276235552, "grad_norm": 8.1875, "learning_rate": 2.763051746168153e-06, "loss": 1.0472490787506104, "step": 8902 }, { "epoch": 1.6207335942477474, "grad_norm": 7.21875, "learning_rate": 2.7622910107726092e-06, "loss": 1.1912736892700195, "step": 8904 }, { "epoch": 1.6210976608719396, "grad_norm": 5.4375, "learning_rate": 2.7615303102598974e-06, "loss": 1.2734090089797974, "step": 8906 }, { "epoch": 1.6214617274961318, "grad_norm": 17.875, "learning_rate": 2.7607696447416454e-06, "loss": 1.19173002243042, "step": 8908 }, { "epoch": 1.621825794120324, "grad_norm": 14.0, "learning_rate": 2.760009014329479e-06, "loss": 1.4895093441009521, "step": 8910 }, { "epoch": 1.6221898607445162, "grad_norm": 2.828125, "learning_rate": 2.759248419135017e-06, "loss": 1.1691850423812866, "step": 8912 }, { "epoch": 1.6225539273687084, "grad_norm": 4.09375, "learning_rate": 2.7584878592698728e-06, "loss": 0.8882176280021667, "step": 8914 }, { "epoch": 1.6229179939929006, "grad_norm": 12.4375, "learning_rate": 2.757727334845658e-06, "loss": 1.1957151889801025, "step": 8916 }, { "epoch": 1.6232820606170928, "grad_norm": 13.4375, "learning_rate": 2.756966845973974e-06, "loss": 1.4767260551452637, "step": 8918 }, { "epoch": 1.6236461272412852, "grad_norm": 8.0, "learning_rate": 2.7562063927664207e-06, "loss": 1.5322933197021484, "step": 8920 }, { "epoch": 1.6240101938654774, "grad_norm": 21.0, "learning_rate": 2.755445975334592e-06, "loss": 1.4067540168762207, "step": 8922 }, { "epoch": 1.6243742604896696, "grad_norm": 15.5, "learning_rate": 2.754685593790074e-06, "loss": 1.1420401334762573, "step": 8924 }, { "epoch": 1.6247383271138618, "grad_norm": 18.375, "learning_rate": 2.753925248244452e-06, "loss": 0.8038854598999023, "step": 8926 }, { "epoch": 1.6251023937380542, "grad_norm": 10.1875, "learning_rate": 2.7531649388093017e-06, "loss": 1.5709344148635864, "step": 8928 }, { "epoch": 1.6254664603622464, "grad_norm": 17.25, "learning_rate": 2.752404665596197e-06, "loss": 1.4348938465118408, "step": 8930 }, { "epoch": 1.6258305269864386, "grad_norm": 12.9375, "learning_rate": 2.7516444287167045e-06, "loss": 1.9187887907028198, "step": 8932 }, { "epoch": 1.6261945936106308, "grad_norm": 11.1875, "learning_rate": 2.7508842282823844e-06, "loss": 1.3741451501846313, "step": 8934 }, { "epoch": 1.626558660234823, "grad_norm": 29.625, "learning_rate": 2.7501240644047955e-06, "loss": 1.978157877922058, "step": 8936 }, { "epoch": 1.6269227268590152, "grad_norm": 13.125, "learning_rate": 2.749363937195486e-06, "loss": 1.3270370960235596, "step": 8938 }, { "epoch": 1.6272867934832074, "grad_norm": 14.625, "learning_rate": 2.748603846766004e-06, "loss": 1.503261685371399, "step": 8940 }, { "epoch": 1.6276508601073996, "grad_norm": 19.375, "learning_rate": 2.747843793227889e-06, "loss": 1.347365379333496, "step": 8942 }, { "epoch": 1.6280149267315918, "grad_norm": 5.8125, "learning_rate": 2.7470837766926743e-06, "loss": 0.8761991262435913, "step": 8944 }, { "epoch": 1.628378993355784, "grad_norm": 13.8125, "learning_rate": 2.7463237972718913e-06, "loss": 1.1706876754760742, "step": 8946 }, { "epoch": 1.6287430599799764, "grad_norm": 52.0, "learning_rate": 2.7455638550770625e-06, "loss": 1.4347889423370361, "step": 8948 }, { "epoch": 1.6291071266041686, "grad_norm": 12.0625, "learning_rate": 2.7448039502197077e-06, "loss": 1.2601158618927002, "step": 8950 }, { "epoch": 1.6294711932283608, "grad_norm": 15.8125, "learning_rate": 2.744044082811338e-06, "loss": 1.5689654350280762, "step": 8952 }, { "epoch": 1.6298352598525532, "grad_norm": 13.375, "learning_rate": 2.743284252963462e-06, "loss": 1.5567150115966797, "step": 8954 }, { "epoch": 1.6301993264767454, "grad_norm": 12.625, "learning_rate": 2.7425244607875825e-06, "loss": 1.1311638355255127, "step": 8956 }, { "epoch": 1.6305633931009376, "grad_norm": 44.25, "learning_rate": 2.7417647063951936e-06, "loss": 1.5073493719100952, "step": 8958 }, { "epoch": 1.6309274597251298, "grad_norm": 13.875, "learning_rate": 2.741004989897788e-06, "loss": 1.303006887435913, "step": 8960 }, { "epoch": 1.631291526349322, "grad_norm": 5.46875, "learning_rate": 2.7402453114068504e-06, "loss": 1.0294671058654785, "step": 8962 }, { "epoch": 1.6316555929735141, "grad_norm": 6.9375, "learning_rate": 2.73948567103386e-06, "loss": 1.4539611339569092, "step": 8964 }, { "epoch": 1.6320196595977063, "grad_norm": 13.1875, "learning_rate": 2.7387260688902933e-06, "loss": 1.7967519760131836, "step": 8966 }, { "epoch": 1.6323837262218985, "grad_norm": 10.1875, "learning_rate": 2.737966505087616e-06, "loss": 1.634358286857605, "step": 8968 }, { "epoch": 1.6327477928460907, "grad_norm": 34.25, "learning_rate": 2.7372069797372925e-06, "loss": 1.7835495471954346, "step": 8970 }, { "epoch": 1.633111859470283, "grad_norm": 10.3125, "learning_rate": 2.73644749295078e-06, "loss": 1.5124640464782715, "step": 8972 }, { "epoch": 1.6334759260944753, "grad_norm": 10.3125, "learning_rate": 2.7356880448395284e-06, "loss": 1.5099689960479736, "step": 8974 }, { "epoch": 1.6338399927186675, "grad_norm": 10.125, "learning_rate": 2.7349286355149868e-06, "loss": 1.367443323135376, "step": 8976 }, { "epoch": 1.6342040593428597, "grad_norm": 4.40625, "learning_rate": 2.7341692650885927e-06, "loss": 1.472070574760437, "step": 8978 }, { "epoch": 1.634568125967052, "grad_norm": 6.875, "learning_rate": 2.733409933671782e-06, "loss": 0.9984011650085449, "step": 8980 }, { "epoch": 1.6349321925912443, "grad_norm": 99.5, "learning_rate": 2.7326506413759836e-06, "loss": 1.314826250076294, "step": 8982 }, { "epoch": 1.6352962592154365, "grad_norm": 10.5, "learning_rate": 2.7318913883126184e-06, "loss": 0.881316065788269, "step": 8984 }, { "epoch": 1.6356603258396287, "grad_norm": 8.5, "learning_rate": 2.7311321745931073e-06, "loss": 1.4579654932022095, "step": 8986 }, { "epoch": 1.636024392463821, "grad_norm": 4.3125, "learning_rate": 2.7303730003288585e-06, "loss": 1.0781248807907104, "step": 8988 }, { "epoch": 1.6363884590880131, "grad_norm": 7.625, "learning_rate": 2.729613865631281e-06, "loss": 1.2237229347229004, "step": 8990 }, { "epoch": 1.6367525257122053, "grad_norm": 16.875, "learning_rate": 2.728854770611772e-06, "loss": 1.513213872909546, "step": 8992 }, { "epoch": 1.6371165923363975, "grad_norm": 8.9375, "learning_rate": 2.7280957153817256e-06, "loss": 1.3854542970657349, "step": 8994 }, { "epoch": 1.6374806589605897, "grad_norm": 15.0, "learning_rate": 2.7273367000525324e-06, "loss": 1.1437013149261475, "step": 8996 }, { "epoch": 1.637844725584782, "grad_norm": 7.21875, "learning_rate": 2.7265777247355723e-06, "loss": 1.3488255739212036, "step": 8998 }, { "epoch": 1.638208792208974, "grad_norm": 15.25, "learning_rate": 2.725818789542224e-06, "loss": 1.4252893924713135, "step": 9000 }, { "epoch": 1.6385728588331665, "grad_norm": 11.125, "learning_rate": 2.725059894583857e-06, "loss": 1.497605800628662, "step": 9002 }, { "epoch": 1.6389369254573587, "grad_norm": 8.8125, "learning_rate": 2.7243010399718356e-06, "loss": 1.2594776153564453, "step": 9004 }, { "epoch": 1.639300992081551, "grad_norm": 8.125, "learning_rate": 2.7235422258175202e-06, "loss": 1.3826425075531006, "step": 9006 }, { "epoch": 1.6396650587057433, "grad_norm": 8.375, "learning_rate": 2.7227834522322616e-06, "loss": 1.3130407333374023, "step": 9008 }, { "epoch": 1.6400291253299355, "grad_norm": 9.375, "learning_rate": 2.7220247193274095e-06, "loss": 1.435045838356018, "step": 9010 }, { "epoch": 1.6403931919541277, "grad_norm": 14.8125, "learning_rate": 2.721266027214302e-06, "loss": 1.6278200149536133, "step": 9012 }, { "epoch": 1.6407572585783199, "grad_norm": 16.125, "learning_rate": 2.7205073760042755e-06, "loss": 1.2888107299804688, "step": 9014 }, { "epoch": 1.641121325202512, "grad_norm": 23.375, "learning_rate": 2.7197487658086596e-06, "loss": 1.8384628295898438, "step": 9016 }, { "epoch": 1.6414853918267043, "grad_norm": 8.4375, "learning_rate": 2.7189901967387755e-06, "loss": 1.5917160511016846, "step": 9018 }, { "epoch": 1.6418494584508965, "grad_norm": 10.6875, "learning_rate": 2.718231668905943e-06, "loss": 1.2195689678192139, "step": 9020 }, { "epoch": 1.6422135250750887, "grad_norm": 8.0, "learning_rate": 2.7174731824214693e-06, "loss": 1.3459898233413696, "step": 9022 }, { "epoch": 1.6425775916992809, "grad_norm": 13.3125, "learning_rate": 2.716714737396662e-06, "loss": 1.4541394710540771, "step": 9024 }, { "epoch": 1.642941658323473, "grad_norm": 10.4375, "learning_rate": 2.7159563339428186e-06, "loss": 1.2718074321746826, "step": 9026 }, { "epoch": 1.6433057249476655, "grad_norm": 8.8125, "learning_rate": 2.7151979721712316e-06, "loss": 1.1680617332458496, "step": 9028 }, { "epoch": 1.6436697915718577, "grad_norm": 12.9375, "learning_rate": 2.7144396521931892e-06, "loss": 1.4630279541015625, "step": 9030 }, { "epoch": 1.6440338581960499, "grad_norm": 16.5, "learning_rate": 2.7136813741199703e-06, "loss": 1.5427327156066895, "step": 9032 }, { "epoch": 1.644397924820242, "grad_norm": 43.5, "learning_rate": 2.7129231380628483e-06, "loss": 1.393282413482666, "step": 9034 }, { "epoch": 1.6447619914444345, "grad_norm": 8.375, "learning_rate": 2.712164944133094e-06, "loss": 1.577666997909546, "step": 9036 }, { "epoch": 1.6451260580686267, "grad_norm": 39.75, "learning_rate": 2.711406792441966e-06, "loss": 1.3574159145355225, "step": 9038 }, { "epoch": 1.6454901246928189, "grad_norm": 9.1875, "learning_rate": 2.710648683100723e-06, "loss": 1.2750723361968994, "step": 9040 }, { "epoch": 1.645854191317011, "grad_norm": 10.1875, "learning_rate": 2.709890616220613e-06, "loss": 1.46896231174469, "step": 9042 }, { "epoch": 1.6462182579412032, "grad_norm": 29.375, "learning_rate": 2.7091325919128785e-06, "loss": 1.554670810699463, "step": 9044 }, { "epoch": 1.6465823245653954, "grad_norm": 10.875, "learning_rate": 2.7083746102887575e-06, "loss": 1.6285943984985352, "step": 9046 }, { "epoch": 1.6469463911895876, "grad_norm": 4.8125, "learning_rate": 2.707616671459481e-06, "loss": 1.3509740829467773, "step": 9048 }, { "epoch": 1.6473104578137798, "grad_norm": 10.3125, "learning_rate": 2.706858775536273e-06, "loss": 1.3567811250686646, "step": 9050 }, { "epoch": 1.647674524437972, "grad_norm": 3.96875, "learning_rate": 2.7061009226303513e-06, "loss": 1.1134076118469238, "step": 9052 }, { "epoch": 1.6480385910621642, "grad_norm": 11.25, "learning_rate": 2.7053431128529286e-06, "loss": 1.0791559219360352, "step": 9054 }, { "epoch": 1.6484026576863566, "grad_norm": 10.125, "learning_rate": 2.70458534631521e-06, "loss": 0.8206127285957336, "step": 9056 }, { "epoch": 1.6487667243105488, "grad_norm": 6.4375, "learning_rate": 2.7038276231283943e-06, "loss": 1.065165638923645, "step": 9058 }, { "epoch": 1.649130790934741, "grad_norm": 19.875, "learning_rate": 2.7030699434036746e-06, "loss": 1.3392390012741089, "step": 9060 }, { "epoch": 1.6494948575589334, "grad_norm": 17.125, "learning_rate": 2.7023123072522377e-06, "loss": 1.3549519777297974, "step": 9062 }, { "epoch": 1.6498589241831256, "grad_norm": 7.125, "learning_rate": 2.7015547147852626e-06, "loss": 1.3336328268051147, "step": 9064 }, { "epoch": 1.6502229908073178, "grad_norm": 54.25, "learning_rate": 2.7007971661139244e-06, "loss": 1.0506854057312012, "step": 9066 }, { "epoch": 1.65058705743151, "grad_norm": 8.75, "learning_rate": 2.7000396613493886e-06, "loss": 0.9831037521362305, "step": 9068 }, { "epoch": 1.6509511240557022, "grad_norm": 16.25, "learning_rate": 2.6992822006028185e-06, "loss": 1.522639274597168, "step": 9070 }, { "epoch": 1.6513151906798944, "grad_norm": 10.5625, "learning_rate": 2.6985247839853656e-06, "loss": 1.4508020877838135, "step": 9072 }, { "epoch": 1.6516792573040866, "grad_norm": 26.125, "learning_rate": 2.697767411608179e-06, "loss": 1.564084529876709, "step": 9074 }, { "epoch": 1.6520433239282788, "grad_norm": 17.375, "learning_rate": 2.6970100835824e-06, "loss": 1.777204990386963, "step": 9076 }, { "epoch": 1.652407390552471, "grad_norm": 8.1875, "learning_rate": 2.6962528000191622e-06, "loss": 1.308849573135376, "step": 9078 }, { "epoch": 1.6527714571766632, "grad_norm": 10.75, "learning_rate": 2.6954955610295963e-06, "loss": 1.4076833724975586, "step": 9080 }, { "epoch": 1.6531355238008556, "grad_norm": 14.375, "learning_rate": 2.694738366724823e-06, "loss": 1.5952646732330322, "step": 9082 }, { "epoch": 1.6534995904250478, "grad_norm": 14.0625, "learning_rate": 2.6939812172159565e-06, "loss": 1.5751872062683105, "step": 9084 }, { "epoch": 1.65386365704924, "grad_norm": 9.1875, "learning_rate": 2.6932241126141066e-06, "loss": 1.7137010097503662, "step": 9086 }, { "epoch": 1.6542277236734324, "grad_norm": 21.625, "learning_rate": 2.692467053030374e-06, "loss": 1.4660654067993164, "step": 9088 }, { "epoch": 1.6545917902976246, "grad_norm": 15.75, "learning_rate": 2.6917100385758564e-06, "loss": 1.0201032161712646, "step": 9090 }, { "epoch": 1.6549558569218168, "grad_norm": 15.5625, "learning_rate": 2.6909530693616408e-06, "loss": 1.4451243877410889, "step": 9092 }, { "epoch": 1.655319923546009, "grad_norm": 9.4375, "learning_rate": 2.6901961454988092e-06, "loss": 1.1621956825256348, "step": 9094 }, { "epoch": 1.6556839901702012, "grad_norm": 8.625, "learning_rate": 2.6894392670984386e-06, "loss": 1.4459900856018066, "step": 9096 }, { "epoch": 1.6560480567943934, "grad_norm": 11.0, "learning_rate": 2.6886824342715967e-06, "loss": 1.3991408348083496, "step": 9098 }, { "epoch": 1.6564121234185856, "grad_norm": 27.875, "learning_rate": 2.687925647129346e-06, "loss": 1.618884563446045, "step": 9100 }, { "epoch": 1.6567761900427778, "grad_norm": 15.25, "learning_rate": 2.687168905782741e-06, "loss": 1.824678659439087, "step": 9102 }, { "epoch": 1.65714025666697, "grad_norm": 21.375, "learning_rate": 2.6864122103428313e-06, "loss": 1.3834012746810913, "step": 9104 }, { "epoch": 1.6575043232911622, "grad_norm": 4.0625, "learning_rate": 2.68565556092066e-06, "loss": 1.0795729160308838, "step": 9106 }, { "epoch": 1.6578683899153543, "grad_norm": 27.375, "learning_rate": 2.684898957627261e-06, "loss": 0.8226364850997925, "step": 9108 }, { "epoch": 1.6582324565395468, "grad_norm": 16.75, "learning_rate": 2.6841424005736627e-06, "loss": 1.4209195375442505, "step": 9110 }, { "epoch": 1.658596523163739, "grad_norm": 13.1875, "learning_rate": 2.6833858898708875e-06, "loss": 1.658259630203247, "step": 9112 }, { "epoch": 1.6589605897879312, "grad_norm": 18.625, "learning_rate": 2.682629425629949e-06, "loss": 1.596691608428955, "step": 9114 }, { "epoch": 1.6593246564121236, "grad_norm": 36.0, "learning_rate": 2.6818730079618564e-06, "loss": 0.21741102635860443, "step": 9116 }, { "epoch": 1.6596887230363158, "grad_norm": 92.0, "learning_rate": 2.6811166369776105e-06, "loss": 1.3444486856460571, "step": 9118 }, { "epoch": 1.660052789660508, "grad_norm": 18.875, "learning_rate": 2.680360312788206e-06, "loss": 1.788914442062378, "step": 9120 }, { "epoch": 1.6604168562847001, "grad_norm": 12.875, "learning_rate": 2.6796040355046305e-06, "loss": 1.463478684425354, "step": 9122 }, { "epoch": 1.6607809229088923, "grad_norm": 11.375, "learning_rate": 2.6788478052378635e-06, "loss": 1.6262067556381226, "step": 9124 }, { "epoch": 1.6611449895330845, "grad_norm": 17.0, "learning_rate": 2.6780916220988806e-06, "loss": 1.5845322608947754, "step": 9126 }, { "epoch": 1.6615090561572767, "grad_norm": 19.375, "learning_rate": 2.6773354861986465e-06, "loss": 1.7036840915679932, "step": 9128 }, { "epoch": 1.661873122781469, "grad_norm": 25.25, "learning_rate": 2.6765793976481234e-06, "loss": 1.8864271640777588, "step": 9130 }, { "epoch": 1.6622371894056611, "grad_norm": 6.3125, "learning_rate": 2.6758233565582624e-06, "loss": 1.4782168865203857, "step": 9132 }, { "epoch": 1.6626012560298533, "grad_norm": 24.625, "learning_rate": 2.675067363040009e-06, "loss": 1.9831485748291016, "step": 9134 }, { "epoch": 1.6629653226540457, "grad_norm": 15.3125, "learning_rate": 2.674311417204305e-06, "loss": 1.4451037645339966, "step": 9136 }, { "epoch": 1.663329389278238, "grad_norm": 35.75, "learning_rate": 2.673555519162079e-06, "loss": 1.2403595447540283, "step": 9138 }, { "epoch": 1.6636934559024301, "grad_norm": 7.90625, "learning_rate": 2.67279966902426e-06, "loss": 0.8842126727104187, "step": 9140 }, { "epoch": 1.6640575225266225, "grad_norm": 16.25, "learning_rate": 2.672043866901761e-06, "loss": 1.546290636062622, "step": 9142 }, { "epoch": 1.6644215891508147, "grad_norm": 16.75, "learning_rate": 2.6712881129054962e-06, "loss": 2.038055658340454, "step": 9144 }, { "epoch": 1.664785655775007, "grad_norm": 8.75, "learning_rate": 2.6705324071463694e-06, "loss": 1.495240330696106, "step": 9146 }, { "epoch": 1.6651497223991991, "grad_norm": 9.4375, "learning_rate": 2.6697767497352755e-06, "loss": 1.4728375673294067, "step": 9148 }, { "epoch": 1.6655137890233913, "grad_norm": 22.0, "learning_rate": 2.6690211407831067e-06, "loss": 1.430677056312561, "step": 9150 }, { "epoch": 1.6658778556475835, "grad_norm": 16.5, "learning_rate": 2.6682655804007427e-06, "loss": 1.9896063804626465, "step": 9152 }, { "epoch": 1.6662419222717757, "grad_norm": 8.6875, "learning_rate": 2.6675100686990605e-06, "loss": 1.651367425918579, "step": 9154 }, { "epoch": 1.666605988895968, "grad_norm": 3.9375, "learning_rate": 2.66675460578893e-06, "loss": 0.9940225481987, "step": 9156 }, { "epoch": 1.66697005552016, "grad_norm": 9.9375, "learning_rate": 2.665999191781209e-06, "loss": 1.243117332458496, "step": 9158 }, { "epoch": 1.6673341221443523, "grad_norm": 8.5625, "learning_rate": 2.6652438267867537e-06, "loss": 1.3259273767471313, "step": 9160 }, { "epoch": 1.6676981887685447, "grad_norm": 13.9375, "learning_rate": 2.6644885109164097e-06, "loss": 1.8624930381774902, "step": 9162 }, { "epoch": 1.668062255392737, "grad_norm": 18.125, "learning_rate": 2.663733244281017e-06, "loss": 1.9162217378616333, "step": 9164 }, { "epoch": 1.668426322016929, "grad_norm": 17.0, "learning_rate": 2.6629780269914083e-06, "loss": 1.2164101600646973, "step": 9166 }, { "epoch": 1.6687903886411213, "grad_norm": 9.6875, "learning_rate": 2.6622228591584076e-06, "loss": 1.4263803958892822, "step": 9168 }, { "epoch": 1.6691544552653137, "grad_norm": 10.75, "learning_rate": 2.6614677408928347e-06, "loss": 1.2728183269500732, "step": 9170 }, { "epoch": 1.669518521889506, "grad_norm": 13.0, "learning_rate": 2.6607126723054977e-06, "loss": 1.4746191501617432, "step": 9172 }, { "epoch": 1.669882588513698, "grad_norm": 15.9375, "learning_rate": 2.659957653507201e-06, "loss": 1.4660627841949463, "step": 9174 }, { "epoch": 1.6702466551378903, "grad_norm": 21.375, "learning_rate": 2.6592026846087417e-06, "loss": 1.599808692932129, "step": 9176 }, { "epoch": 1.6706107217620825, "grad_norm": 19.125, "learning_rate": 2.658447765720906e-06, "loss": 1.6903605461120605, "step": 9178 }, { "epoch": 1.6709747883862747, "grad_norm": 20.625, "learning_rate": 2.657692896954478e-06, "loss": 2.237009048461914, "step": 9180 }, { "epoch": 1.6713388550104669, "grad_norm": 17.625, "learning_rate": 2.6569380784202293e-06, "loss": 1.7403486967086792, "step": 9182 }, { "epoch": 1.671702921634659, "grad_norm": 23.625, "learning_rate": 2.656183310228927e-06, "loss": 1.7829509973526, "step": 9184 }, { "epoch": 1.6720669882588513, "grad_norm": 10.625, "learning_rate": 2.6554285924913308e-06, "loss": 0.8804969191551208, "step": 9186 }, { "epoch": 1.6724310548830434, "grad_norm": 10.5625, "learning_rate": 2.654673925318192e-06, "loss": 1.4425172805786133, "step": 9188 }, { "epoch": 1.6727951215072359, "grad_norm": 9.0, "learning_rate": 2.653919308820256e-06, "loss": 0.6721599102020264, "step": 9190 }, { "epoch": 1.673159188131428, "grad_norm": 13.625, "learning_rate": 2.653164743108258e-06, "loss": 1.5481771230697632, "step": 9192 }, { "epoch": 1.6735232547556202, "grad_norm": 11.125, "learning_rate": 2.652410228292928e-06, "loss": 1.62593412399292, "step": 9194 }, { "epoch": 1.6738873213798127, "grad_norm": 26.125, "learning_rate": 2.6516557644849895e-06, "loss": 1.1630219221115112, "step": 9196 }, { "epoch": 1.6742513880040049, "grad_norm": 16.5, "learning_rate": 2.6509013517951554e-06, "loss": 0.5983873605728149, "step": 9198 }, { "epoch": 1.674615454628197, "grad_norm": 18.375, "learning_rate": 2.650146990334133e-06, "loss": 1.5465179681777954, "step": 9200 }, { "epoch": 1.6749795212523892, "grad_norm": 18.5, "learning_rate": 2.6493926802126223e-06, "loss": 1.7888509035110474, "step": 9202 }, { "epoch": 1.6753435878765814, "grad_norm": 10.5625, "learning_rate": 2.648638421541314e-06, "loss": 1.5550678968429565, "step": 9204 }, { "epoch": 1.6757076545007736, "grad_norm": 11.0625, "learning_rate": 2.6478842144308948e-06, "loss": 1.0708715915679932, "step": 9206 }, { "epoch": 1.6760717211249658, "grad_norm": 11.5625, "learning_rate": 2.6471300589920386e-06, "loss": 1.1484529972076416, "step": 9208 }, { "epoch": 1.676435787749158, "grad_norm": 14.8125, "learning_rate": 2.6463759553354173e-06, "loss": 1.0184969902038574, "step": 9210 }, { "epoch": 1.6767998543733502, "grad_norm": 12.8125, "learning_rate": 2.6456219035716914e-06, "loss": 1.2476813793182373, "step": 9212 }, { "epoch": 1.6771639209975424, "grad_norm": 31.625, "learning_rate": 2.644867903811514e-06, "loss": 1.5933899879455566, "step": 9214 }, { "epoch": 1.6775279876217348, "grad_norm": 55.75, "learning_rate": 2.644113956165534e-06, "loss": 2.2150018215179443, "step": 9216 }, { "epoch": 1.677892054245927, "grad_norm": 3.84375, "learning_rate": 2.6433600607443865e-06, "loss": 1.0567104816436768, "step": 9218 }, { "epoch": 1.6782561208701192, "grad_norm": 17.25, "learning_rate": 2.6426062176587065e-06, "loss": 1.4568065404891968, "step": 9220 }, { "epoch": 1.6786201874943114, "grad_norm": 16.375, "learning_rate": 2.6418524270191154e-06, "loss": 1.435537576675415, "step": 9222 }, { "epoch": 1.6789842541185038, "grad_norm": 12.0, "learning_rate": 2.641098688936229e-06, "loss": 1.5779021978378296, "step": 9224 }, { "epoch": 1.679348320742696, "grad_norm": 8.9375, "learning_rate": 2.6403450035206557e-06, "loss": 1.4415068626403809, "step": 9226 }, { "epoch": 1.6797123873668882, "grad_norm": 5.84375, "learning_rate": 2.6395913708829945e-06, "loss": 1.1263445615768433, "step": 9228 }, { "epoch": 1.6800764539910804, "grad_norm": 9.375, "learning_rate": 2.63883779113384e-06, "loss": 1.2108830213546753, "step": 9230 }, { "epoch": 1.6804405206152726, "grad_norm": 5.71875, "learning_rate": 2.638084264383777e-06, "loss": 0.6980651021003723, "step": 9232 }, { "epoch": 1.6808045872394648, "grad_norm": 21.375, "learning_rate": 2.6373307907433792e-06, "loss": 1.3352385759353638, "step": 9234 }, { "epoch": 1.681168653863657, "grad_norm": 15.25, "learning_rate": 2.63657737032322e-06, "loss": 0.7894084453582764, "step": 9236 }, { "epoch": 1.6815327204878492, "grad_norm": 26.375, "learning_rate": 2.635824003233858e-06, "loss": 1.5009653568267822, "step": 9238 }, { "epoch": 1.6818967871120414, "grad_norm": 8.0625, "learning_rate": 2.635070689585848e-06, "loss": 1.0865978002548218, "step": 9240 }, { "epoch": 1.6822608537362336, "grad_norm": 13.4375, "learning_rate": 2.6343174294897345e-06, "loss": 1.5583518743515015, "step": 9242 }, { "epoch": 1.682624920360426, "grad_norm": 5.3125, "learning_rate": 2.6335642230560566e-06, "loss": 1.0222206115722656, "step": 9244 }, { "epoch": 1.6829889869846182, "grad_norm": 6.78125, "learning_rate": 2.6328110703953447e-06, "loss": 1.3737409114837646, "step": 9246 }, { "epoch": 1.6833530536088104, "grad_norm": 10.75, "learning_rate": 2.632057971618119e-06, "loss": 1.403762936592102, "step": 9248 }, { "epoch": 1.6837171202330028, "grad_norm": 8.3125, "learning_rate": 2.6313049268348966e-06, "loss": 1.3826375007629395, "step": 9250 }, { "epoch": 1.684081186857195, "grad_norm": 11.125, "learning_rate": 2.6305519361561806e-06, "loss": 1.4757513999938965, "step": 9252 }, { "epoch": 1.6844452534813872, "grad_norm": 9.9375, "learning_rate": 2.6297989996924707e-06, "loss": 1.2882804870605469, "step": 9254 }, { "epoch": 1.6848093201055794, "grad_norm": 18.5, "learning_rate": 2.629046117554258e-06, "loss": 1.0880687236785889, "step": 9256 }, { "epoch": 1.6851733867297716, "grad_norm": 16.5, "learning_rate": 2.628293289852023e-06, "loss": 1.344069004058838, "step": 9258 }, { "epoch": 1.6855374533539638, "grad_norm": 8.5, "learning_rate": 2.627540516696243e-06, "loss": 1.216900110244751, "step": 9260 }, { "epoch": 1.685901519978156, "grad_norm": 16.375, "learning_rate": 2.6267877981973815e-06, "loss": 1.1618045568466187, "step": 9262 }, { "epoch": 1.6862655866023482, "grad_norm": 12.5, "learning_rate": 2.6260351344658987e-06, "loss": 1.771470546722412, "step": 9264 }, { "epoch": 1.6866296532265403, "grad_norm": 9.125, "learning_rate": 2.625282525612244e-06, "loss": 1.7320276498794556, "step": 9266 }, { "epoch": 1.6869937198507325, "grad_norm": 5.1875, "learning_rate": 2.62452997174686e-06, "loss": 1.2539374828338623, "step": 9268 }, { "epoch": 1.687357786474925, "grad_norm": 9.5, "learning_rate": 2.6237774729801812e-06, "loss": 1.0105149745941162, "step": 9270 }, { "epoch": 1.6877218530991172, "grad_norm": 10.8125, "learning_rate": 2.623025029422633e-06, "loss": 1.4932713508605957, "step": 9272 }, { "epoch": 1.6880859197233093, "grad_norm": 23.0, "learning_rate": 2.622272641184635e-06, "loss": 1.8004236221313477, "step": 9274 }, { "epoch": 1.6884499863475015, "grad_norm": 10.9375, "learning_rate": 2.621520308376596e-06, "loss": 1.4048701524734497, "step": 9276 }, { "epoch": 1.688814052971694, "grad_norm": 14.375, "learning_rate": 2.6207680311089167e-06, "loss": 1.559190273284912, "step": 9278 }, { "epoch": 1.6891781195958862, "grad_norm": 8.5625, "learning_rate": 2.6200158094919936e-06, "loss": 1.4601926803588867, "step": 9280 }, { "epoch": 1.6895421862200783, "grad_norm": 10.75, "learning_rate": 2.6192636436362094e-06, "loss": 1.6643003225326538, "step": 9282 }, { "epoch": 1.6899062528442705, "grad_norm": 10.3125, "learning_rate": 2.618511533651943e-06, "loss": 1.4071576595306396, "step": 9284 }, { "epoch": 1.6902703194684627, "grad_norm": 6.90625, "learning_rate": 2.617759479649563e-06, "loss": 1.356034278869629, "step": 9286 }, { "epoch": 1.690634386092655, "grad_norm": 8.1875, "learning_rate": 2.6170074817394297e-06, "loss": 1.2255876064300537, "step": 9288 }, { "epoch": 1.6909984527168471, "grad_norm": 33.5, "learning_rate": 2.6162555400318966e-06, "loss": 1.416519045829773, "step": 9290 }, { "epoch": 1.6913625193410393, "grad_norm": 3.515625, "learning_rate": 2.615503654637308e-06, "loss": 1.1522860527038574, "step": 9292 }, { "epoch": 1.6917265859652315, "grad_norm": 17.5, "learning_rate": 2.614751825666e-06, "loss": 1.2634915113449097, "step": 9294 }, { "epoch": 1.6920906525894237, "grad_norm": 6.59375, "learning_rate": 2.6140000532283003e-06, "loss": 1.0256493091583252, "step": 9296 }, { "epoch": 1.6924547192136161, "grad_norm": 13.9375, "learning_rate": 2.6132483374345276e-06, "loss": 1.3080412149429321, "step": 9298 }, { "epoch": 1.6928187858378083, "grad_norm": 11.75, "learning_rate": 2.6124966783949956e-06, "loss": 1.8101611137390137, "step": 9300 }, { "epoch": 1.6931828524620005, "grad_norm": 17.875, "learning_rate": 2.611745076220005e-06, "loss": 1.2362911701202393, "step": 9302 }, { "epoch": 1.693546919086193, "grad_norm": 12.5625, "learning_rate": 2.61099353101985e-06, "loss": 0.8984555006027222, "step": 9304 }, { "epoch": 1.6939109857103851, "grad_norm": 11.125, "learning_rate": 2.6102420429048186e-06, "loss": 1.5998139381408691, "step": 9306 }, { "epoch": 1.6942750523345773, "grad_norm": 3.671875, "learning_rate": 2.609490611985187e-06, "loss": 1.3438420295715332, "step": 9308 }, { "epoch": 1.6946391189587695, "grad_norm": 7.53125, "learning_rate": 2.6087392383712274e-06, "loss": 1.505103588104248, "step": 9310 }, { "epoch": 1.6950031855829617, "grad_norm": 31.25, "learning_rate": 2.6079879221731974e-06, "loss": 1.414860486984253, "step": 9312 }, { "epoch": 1.695367252207154, "grad_norm": 13.875, "learning_rate": 2.6072366635013515e-06, "loss": 1.612790584564209, "step": 9314 }, { "epoch": 1.695731318831346, "grad_norm": 31.5, "learning_rate": 2.606485462465934e-06, "loss": 1.4038561582565308, "step": 9316 }, { "epoch": 1.6960953854555383, "grad_norm": 16.625, "learning_rate": 2.605734319177179e-06, "loss": 1.6603057384490967, "step": 9318 }, { "epoch": 1.6964594520797305, "grad_norm": 14.1875, "learning_rate": 2.6049832337453163e-06, "loss": 1.4171526432037354, "step": 9320 }, { "epoch": 1.6968235187039227, "grad_norm": 8.8125, "learning_rate": 2.604232206280562e-06, "loss": 1.1852669715881348, "step": 9322 }, { "epoch": 1.697187585328115, "grad_norm": 47.5, "learning_rate": 2.603481236893127e-06, "loss": 0.8682548999786377, "step": 9324 }, { "epoch": 1.6975516519523073, "grad_norm": 46.25, "learning_rate": 2.602730325693216e-06, "loss": 1.2902064323425293, "step": 9326 }, { "epoch": 1.6979157185764995, "grad_norm": 11.0, "learning_rate": 2.6019794727910173e-06, "loss": 1.3586435317993164, "step": 9328 }, { "epoch": 1.6982797852006917, "grad_norm": 21.625, "learning_rate": 2.601228678296719e-06, "loss": 1.1663780212402344, "step": 9330 }, { "epoch": 1.698643851824884, "grad_norm": 11.8125, "learning_rate": 2.6004779423204963e-06, "loss": 0.8382279276847839, "step": 9332 }, { "epoch": 1.6990079184490763, "grad_norm": 42.0, "learning_rate": 2.5997272649725155e-06, "loss": 0.7721830606460571, "step": 9334 }, { "epoch": 1.6993719850732685, "grad_norm": 7.875, "learning_rate": 2.5989766463629373e-06, "loss": 1.210772156715393, "step": 9336 }, { "epoch": 1.6997360516974607, "grad_norm": 9.0625, "learning_rate": 2.59822608660191e-06, "loss": 1.6795170307159424, "step": 9338 }, { "epoch": 1.7001001183216529, "grad_norm": 8.0, "learning_rate": 2.597475585799577e-06, "loss": 1.5050535202026367, "step": 9340 }, { "epoch": 1.700464184945845, "grad_norm": 7.6875, "learning_rate": 2.5967251440660705e-06, "loss": 1.2744718790054321, "step": 9342 }, { "epoch": 1.7008282515700373, "grad_norm": 10.8125, "learning_rate": 2.5959747615115143e-06, "loss": 1.2136510610580444, "step": 9344 }, { "epoch": 1.7011923181942294, "grad_norm": 14.1875, "learning_rate": 2.5952244382460255e-06, "loss": 1.1773263216018677, "step": 9346 }, { "epoch": 1.7015563848184216, "grad_norm": 7.09375, "learning_rate": 2.594474174379709e-06, "loss": 1.3566792011260986, "step": 9348 }, { "epoch": 1.7019204514426138, "grad_norm": 11.0625, "learning_rate": 2.5937239700226647e-06, "loss": 1.6224448680877686, "step": 9350 }, { "epoch": 1.7022845180668063, "grad_norm": 19.125, "learning_rate": 2.5929738252849807e-06, "loss": 1.5998003482818604, "step": 9352 }, { "epoch": 1.7026485846909984, "grad_norm": 23.125, "learning_rate": 2.592223740276738e-06, "loss": 1.6076273918151855, "step": 9354 }, { "epoch": 1.7030126513151906, "grad_norm": 17.0, "learning_rate": 2.5914737151080095e-06, "loss": 1.2455108165740967, "step": 9356 }, { "epoch": 1.703376717939383, "grad_norm": 7.5625, "learning_rate": 2.590723749888857e-06, "loss": 1.497173547744751, "step": 9358 }, { "epoch": 1.7037407845635753, "grad_norm": 16.25, "learning_rate": 2.5899738447293366e-06, "loss": 1.5745115280151367, "step": 9360 }, { "epoch": 1.7041048511877674, "grad_norm": 13.4375, "learning_rate": 2.5892239997394924e-06, "loss": 1.5484111309051514, "step": 9362 }, { "epoch": 1.7044689178119596, "grad_norm": 9.875, "learning_rate": 2.5884742150293607e-06, "loss": 1.6625993251800537, "step": 9364 }, { "epoch": 1.7048329844361518, "grad_norm": 4.96875, "learning_rate": 2.5877244907089716e-06, "loss": 1.0240740776062012, "step": 9366 }, { "epoch": 1.705197051060344, "grad_norm": 8.6875, "learning_rate": 2.5869748268883414e-06, "loss": 1.629879117012024, "step": 9368 }, { "epoch": 1.7055611176845362, "grad_norm": 12.625, "learning_rate": 2.586225223677483e-06, "loss": 1.4819724559783936, "step": 9370 }, { "epoch": 1.7059251843087284, "grad_norm": 12.625, "learning_rate": 2.5854756811863945e-06, "loss": 1.4234089851379395, "step": 9372 }, { "epoch": 1.7062892509329206, "grad_norm": 6.9375, "learning_rate": 2.5847261995250705e-06, "loss": 1.3711363077163696, "step": 9374 }, { "epoch": 1.7066533175571128, "grad_norm": 7.90625, "learning_rate": 2.583976778803494e-06, "loss": 1.3665838241577148, "step": 9376 }, { "epoch": 1.7070173841813052, "grad_norm": 9.5, "learning_rate": 2.583227419131639e-06, "loss": 1.2841482162475586, "step": 9378 }, { "epoch": 1.7073814508054974, "grad_norm": 16.5, "learning_rate": 2.5824781206194717e-06, "loss": 1.358459711074829, "step": 9380 }, { "epoch": 1.7077455174296896, "grad_norm": 15.75, "learning_rate": 2.581728883376947e-06, "loss": 1.4769535064697266, "step": 9382 }, { "epoch": 1.708109584053882, "grad_norm": 18.625, "learning_rate": 2.580979707514014e-06, "loss": 1.9856996536254883, "step": 9384 }, { "epoch": 1.7084736506780742, "grad_norm": 10.875, "learning_rate": 2.580230593140612e-06, "loss": 1.3805100917816162, "step": 9386 }, { "epoch": 1.7088377173022664, "grad_norm": 13.6875, "learning_rate": 2.5794815403666674e-06, "loss": 1.180823564529419, "step": 9388 }, { "epoch": 1.7092017839264586, "grad_norm": 14.0625, "learning_rate": 2.5787325493021042e-06, "loss": 1.2458652257919312, "step": 9390 }, { "epoch": 1.7095658505506508, "grad_norm": 16.0, "learning_rate": 2.577983620056831e-06, "loss": 1.3731491565704346, "step": 9392 }, { "epoch": 1.709929917174843, "grad_norm": 11.5625, "learning_rate": 2.5772347527407514e-06, "loss": 1.512205958366394, "step": 9394 }, { "epoch": 1.7102939837990352, "grad_norm": 13.0, "learning_rate": 2.576485947463759e-06, "loss": 1.0887117385864258, "step": 9396 }, { "epoch": 1.7106580504232274, "grad_norm": 9.1875, "learning_rate": 2.5757372043357376e-06, "loss": 0.7916741371154785, "step": 9398 }, { "epoch": 1.7110221170474196, "grad_norm": 17.0, "learning_rate": 2.5749885234665617e-06, "loss": 1.1433402299880981, "step": 9400 }, { "epoch": 1.7113861836716118, "grad_norm": 19.375, "learning_rate": 2.5742399049660976e-06, "loss": 1.6151204109191895, "step": 9402 }, { "epoch": 1.711750250295804, "grad_norm": 10.125, "learning_rate": 2.5734913489442016e-06, "loss": 1.2714388370513916, "step": 9404 }, { "epoch": 1.7121143169199964, "grad_norm": 7.21875, "learning_rate": 2.5727428555107224e-06, "loss": 1.5417263507843018, "step": 9406 }, { "epoch": 1.7124783835441886, "grad_norm": 9.0, "learning_rate": 2.571994424775497e-06, "loss": 1.2543859481811523, "step": 9408 }, { "epoch": 1.7128424501683808, "grad_norm": 16.875, "learning_rate": 2.5712460568483554e-06, "loss": 1.5546578168869019, "step": 9410 }, { "epoch": 1.7132065167925732, "grad_norm": 6.21875, "learning_rate": 2.5704977518391176e-06, "loss": 1.0436283349990845, "step": 9412 }, { "epoch": 1.7135705834167654, "grad_norm": 10.0, "learning_rate": 2.569749509857594e-06, "loss": 1.3642622232437134, "step": 9414 }, { "epoch": 1.7139346500409576, "grad_norm": 19.5, "learning_rate": 2.5690013310135866e-06, "loss": 0.9530158042907715, "step": 9416 }, { "epoch": 1.7142987166651498, "grad_norm": 31.5, "learning_rate": 2.568253215416886e-06, "loss": 1.4778410196304321, "step": 9418 }, { "epoch": 1.714662783289342, "grad_norm": 9.625, "learning_rate": 2.5675051631772774e-06, "loss": 1.286505937576294, "step": 9420 }, { "epoch": 1.7150268499135342, "grad_norm": 12.375, "learning_rate": 2.5667571744045328e-06, "loss": 1.4485591650009155, "step": 9422 }, { "epoch": 1.7153909165377264, "grad_norm": 9.75, "learning_rate": 2.5660092492084163e-06, "loss": 1.3602252006530762, "step": 9424 }, { "epoch": 1.7157549831619185, "grad_norm": 13.125, "learning_rate": 2.5652613876986853e-06, "loss": 0.9761204719543457, "step": 9426 }, { "epoch": 1.7161190497861107, "grad_norm": 26.375, "learning_rate": 2.5645135899850826e-06, "loss": 1.7280157804489136, "step": 9428 }, { "epoch": 1.716483116410303, "grad_norm": 10.5, "learning_rate": 2.5637658561773472e-06, "loss": 1.4641187191009521, "step": 9430 }, { "epoch": 1.7168471830344953, "grad_norm": 10.1875, "learning_rate": 2.5630181863852034e-06, "loss": 1.4083130359649658, "step": 9432 }, { "epoch": 1.7172112496586875, "grad_norm": 14.0, "learning_rate": 2.562270580718369e-06, "loss": 0.9213528037071228, "step": 9434 }, { "epoch": 1.7175753162828797, "grad_norm": 11.5, "learning_rate": 2.5615230392865544e-06, "loss": 1.0007522106170654, "step": 9436 }, { "epoch": 1.7179393829070722, "grad_norm": 38.5, "learning_rate": 2.560775562199456e-06, "loss": 0.6947571039199829, "step": 9438 }, { "epoch": 1.7183034495312643, "grad_norm": 17.75, "learning_rate": 2.5600281495667646e-06, "loss": 1.6564505100250244, "step": 9440 }, { "epoch": 1.7186675161554565, "grad_norm": 25.625, "learning_rate": 2.5592808014981586e-06, "loss": 1.7123980522155762, "step": 9442 }, { "epoch": 1.7190315827796487, "grad_norm": 7.5, "learning_rate": 2.5585335181033088e-06, "loss": 1.358048439025879, "step": 9444 }, { "epoch": 1.719395649403841, "grad_norm": 6.0625, "learning_rate": 2.5577862994918765e-06, "loss": 1.3348028659820557, "step": 9446 }, { "epoch": 1.7197597160280331, "grad_norm": 9.5625, "learning_rate": 2.557039145773512e-06, "loss": 1.3989973068237305, "step": 9448 }, { "epoch": 1.7201237826522253, "grad_norm": 13.625, "learning_rate": 2.5562920570578586e-06, "loss": 1.7857215404510498, "step": 9450 }, { "epoch": 1.7204878492764175, "grad_norm": 3.828125, "learning_rate": 2.5555450334545472e-06, "loss": 1.1638060808181763, "step": 9452 }, { "epoch": 1.7208519159006097, "grad_norm": 6.65625, "learning_rate": 2.5547980750732006e-06, "loss": 1.1869091987609863, "step": 9454 }, { "epoch": 1.721215982524802, "grad_norm": 14.6875, "learning_rate": 2.5540511820234327e-06, "loss": 1.2031751871109009, "step": 9456 }, { "epoch": 1.7215800491489943, "grad_norm": 18.25, "learning_rate": 2.5533043544148463e-06, "loss": 1.6081607341766357, "step": 9458 }, { "epoch": 1.7219441157731865, "grad_norm": 12.875, "learning_rate": 2.5525575923570356e-06, "loss": 1.149942398071289, "step": 9460 }, { "epoch": 1.7223081823973787, "grad_norm": 16.625, "learning_rate": 2.551810895959585e-06, "loss": 1.8807177543640137, "step": 9462 }, { "epoch": 1.722672249021571, "grad_norm": 10.75, "learning_rate": 2.551064265332068e-06, "loss": 1.5904488563537598, "step": 9464 }, { "epoch": 1.7230363156457633, "grad_norm": 12.375, "learning_rate": 2.5503177005840527e-06, "loss": 1.200134038925171, "step": 9466 }, { "epoch": 1.7234003822699555, "grad_norm": 9.75, "learning_rate": 2.5495712018250907e-06, "loss": 0.9712511301040649, "step": 9468 }, { "epoch": 1.7237644488941477, "grad_norm": 6.53125, "learning_rate": 2.5488247691647306e-06, "loss": 1.2511146068572998, "step": 9470 }, { "epoch": 1.72412851551834, "grad_norm": 15.3125, "learning_rate": 2.548078402712507e-06, "loss": 1.2763681411743164, "step": 9472 }, { "epoch": 1.724492582142532, "grad_norm": 57.5, "learning_rate": 2.5473321025779453e-06, "loss": 1.3130264282226562, "step": 9474 }, { "epoch": 1.7248566487667243, "grad_norm": 11.5625, "learning_rate": 2.5465858688705647e-06, "loss": 1.5271480083465576, "step": 9476 }, { "epoch": 1.7252207153909165, "grad_norm": 4.21875, "learning_rate": 2.5458397016998688e-06, "loss": 1.0769906044006348, "step": 9478 }, { "epoch": 1.7255847820151087, "grad_norm": 19.625, "learning_rate": 2.545093601175357e-06, "loss": 1.4715313911437988, "step": 9480 }, { "epoch": 1.7259488486393009, "grad_norm": 6.8125, "learning_rate": 2.5443475674065155e-06, "loss": 1.36512291431427, "step": 9482 }, { "epoch": 1.726312915263493, "grad_norm": 9.375, "learning_rate": 2.543601600502821e-06, "loss": 1.8886725902557373, "step": 9484 }, { "epoch": 1.7266769818876855, "grad_norm": 7.96875, "learning_rate": 2.542855700573743e-06, "loss": 1.5973446369171143, "step": 9486 }, { "epoch": 1.7270410485118777, "grad_norm": 11.625, "learning_rate": 2.542109867728738e-06, "loss": 1.3817940950393677, "step": 9488 }, { "epoch": 1.7274051151360699, "grad_norm": 10.6875, "learning_rate": 2.541364102077255e-06, "loss": 1.3823630809783936, "step": 9490 }, { "epoch": 1.7277691817602623, "grad_norm": 16.25, "learning_rate": 2.5406184037287306e-06, "loss": 1.6294407844543457, "step": 9492 }, { "epoch": 1.7281332483844545, "grad_norm": 13.8125, "learning_rate": 2.5398727727925932e-06, "loss": 1.61515212059021, "step": 9494 }, { "epoch": 1.7284973150086467, "grad_norm": 11.5625, "learning_rate": 2.5391272093782628e-06, "loss": 1.671416997909546, "step": 9496 }, { "epoch": 1.7288613816328389, "grad_norm": 33.75, "learning_rate": 2.5383817135951454e-06, "loss": 1.786433219909668, "step": 9498 }, { "epoch": 1.729225448257031, "grad_norm": 9.0625, "learning_rate": 2.5376362855526423e-06, "loss": 1.5212745666503906, "step": 9500 }, { "epoch": 1.7295895148812233, "grad_norm": 16.5, "learning_rate": 2.536890925360139e-06, "loss": 1.1949679851531982, "step": 9502 }, { "epoch": 1.7299535815054154, "grad_norm": 12.8125, "learning_rate": 2.5361456331270164e-06, "loss": 1.3068662881851196, "step": 9504 }, { "epoch": 1.7303176481296076, "grad_norm": 22.125, "learning_rate": 2.535400408962643e-06, "loss": 1.3767890930175781, "step": 9506 }, { "epoch": 1.7306817147537998, "grad_norm": 10.625, "learning_rate": 2.5346552529763744e-06, "loss": 1.7470083236694336, "step": 9508 }, { "epoch": 1.731045781377992, "grad_norm": 21.5, "learning_rate": 2.5339101652775637e-06, "loss": 1.6628854274749756, "step": 9510 }, { "epoch": 1.7314098480021844, "grad_norm": 27.5, "learning_rate": 2.5331651459755467e-06, "loss": 0.6716596484184265, "step": 9512 }, { "epoch": 1.7317739146263766, "grad_norm": 17.75, "learning_rate": 2.5324201951796525e-06, "loss": 0.9974995255470276, "step": 9514 }, { "epoch": 1.7321379812505688, "grad_norm": 26.875, "learning_rate": 2.5316753129992e-06, "loss": 1.5912859439849854, "step": 9516 }, { "epoch": 1.732502047874761, "grad_norm": 13.9375, "learning_rate": 2.530930499543497e-06, "loss": 2.0026633739471436, "step": 9518 }, { "epoch": 1.7328661144989534, "grad_norm": 4.625, "learning_rate": 2.530185754921842e-06, "loss": 1.2697510719299316, "step": 9520 }, { "epoch": 1.7332301811231456, "grad_norm": 9.625, "learning_rate": 2.5294410792435244e-06, "loss": 1.0889990329742432, "step": 9522 }, { "epoch": 1.7335942477473378, "grad_norm": 57.0, "learning_rate": 2.528696472617821e-06, "loss": 1.425118088722229, "step": 9524 }, { "epoch": 1.73395831437153, "grad_norm": 7.40625, "learning_rate": 2.5279519351540003e-06, "loss": 0.8747223615646362, "step": 9526 }, { "epoch": 1.7343223809957222, "grad_norm": 7.53125, "learning_rate": 2.5272074669613188e-06, "loss": 1.295595645904541, "step": 9528 }, { "epoch": 1.7346864476199144, "grad_norm": 5.3125, "learning_rate": 2.5264630681490276e-06, "loss": 0.5483484268188477, "step": 9530 }, { "epoch": 1.7350505142441066, "grad_norm": 12.625, "learning_rate": 2.5257187388263605e-06, "loss": 1.5035117864608765, "step": 9532 }, { "epoch": 1.7354145808682988, "grad_norm": 11.5625, "learning_rate": 2.5249744791025467e-06, "loss": 1.470358967781067, "step": 9534 }, { "epoch": 1.735778647492491, "grad_norm": 10.4375, "learning_rate": 2.5242302890868035e-06, "loss": 1.357577919960022, "step": 9536 }, { "epoch": 1.7361427141166832, "grad_norm": 5.78125, "learning_rate": 2.5234861688883367e-06, "loss": 1.4563628435134888, "step": 9538 }, { "epoch": 1.7365067807408756, "grad_norm": 8.4375, "learning_rate": 2.5227421186163436e-06, "loss": 1.495448350906372, "step": 9540 }, { "epoch": 1.7368708473650678, "grad_norm": 15.75, "learning_rate": 2.5219981383800107e-06, "loss": 1.5220292806625366, "step": 9542 }, { "epoch": 1.73723491398926, "grad_norm": 13.375, "learning_rate": 2.5212542282885133e-06, "loss": 1.4442135095596313, "step": 9544 }, { "epoch": 1.7375989806134524, "grad_norm": 7.90625, "learning_rate": 2.5205103884510186e-06, "loss": 1.253954529762268, "step": 9546 }, { "epoch": 1.7379630472376446, "grad_norm": 11.1875, "learning_rate": 2.51976661897668e-06, "loss": 1.4745149612426758, "step": 9548 }, { "epoch": 1.7383271138618368, "grad_norm": 28.25, "learning_rate": 2.5190229199746454e-06, "loss": 2.002610683441162, "step": 9550 }, { "epoch": 1.738691180486029, "grad_norm": 6.625, "learning_rate": 2.5182792915540464e-06, "loss": 1.028231143951416, "step": 9552 }, { "epoch": 1.7390552471102212, "grad_norm": 12.8125, "learning_rate": 2.51753573382401e-06, "loss": 1.2153304815292358, "step": 9554 }, { "epoch": 1.7394193137344134, "grad_norm": 28.25, "learning_rate": 2.51679224689365e-06, "loss": 1.9716033935546875, "step": 9556 }, { "epoch": 1.7397833803586056, "grad_norm": 10.9375, "learning_rate": 2.5160488308720675e-06, "loss": 1.5472264289855957, "step": 9558 }, { "epoch": 1.7401474469827978, "grad_norm": 7.5, "learning_rate": 2.51530548586836e-06, "loss": 1.529146432876587, "step": 9560 }, { "epoch": 1.74051151360699, "grad_norm": 14.4375, "learning_rate": 2.5145622119916073e-06, "loss": 1.5244109630584717, "step": 9562 }, { "epoch": 1.7408755802311822, "grad_norm": 6.78125, "learning_rate": 2.513819009350882e-06, "loss": 1.4480321407318115, "step": 9564 }, { "epoch": 1.7412396468553746, "grad_norm": 33.25, "learning_rate": 2.513075878055248e-06, "loss": 1.1801588535308838, "step": 9566 }, { "epoch": 1.7416037134795668, "grad_norm": 19.375, "learning_rate": 2.5123328182137546e-06, "loss": 1.3722394704818726, "step": 9568 }, { "epoch": 1.741967780103759, "grad_norm": 17.5, "learning_rate": 2.5115898299354446e-06, "loss": 1.5290158987045288, "step": 9570 }, { "epoch": 1.7423318467279512, "grad_norm": 10.3125, "learning_rate": 2.5108469133293467e-06, "loss": 1.6164394617080688, "step": 9572 }, { "epoch": 1.7426959133521436, "grad_norm": 10.6875, "learning_rate": 2.5101040685044825e-06, "loss": 1.5796513557434082, "step": 9574 }, { "epoch": 1.7430599799763358, "grad_norm": 10.5625, "learning_rate": 2.509361295569861e-06, "loss": 1.4934625625610352, "step": 9576 }, { "epoch": 1.743424046600528, "grad_norm": 5.3125, "learning_rate": 2.5086185946344805e-06, "loss": 0.552492618560791, "step": 9578 }, { "epoch": 1.7437881132247202, "grad_norm": 11.8125, "learning_rate": 2.5078759658073305e-06, "loss": 1.4284861087799072, "step": 9580 }, { "epoch": 1.7441521798489124, "grad_norm": 8.875, "learning_rate": 2.5071334091973875e-06, "loss": 1.740067720413208, "step": 9582 }, { "epoch": 1.7445162464731045, "grad_norm": 8.1875, "learning_rate": 2.50639092491362e-06, "loss": 1.3642146587371826, "step": 9584 }, { "epoch": 1.7448803130972967, "grad_norm": 30.125, "learning_rate": 2.5056485130649834e-06, "loss": 1.5055596828460693, "step": 9586 }, { "epoch": 1.745244379721489, "grad_norm": 18.0, "learning_rate": 2.504906173760423e-06, "loss": 2.0214505195617676, "step": 9588 }, { "epoch": 1.7456084463456811, "grad_norm": 9.25, "learning_rate": 2.5041639071088773e-06, "loss": 1.335474967956543, "step": 9590 }, { "epoch": 1.7459725129698733, "grad_norm": 8.125, "learning_rate": 2.503421713219267e-06, "loss": 1.606168508529663, "step": 9592 }, { "epoch": 1.7463365795940657, "grad_norm": 8.3125, "learning_rate": 2.5026795922005086e-06, "loss": 1.4051029682159424, "step": 9594 }, { "epoch": 1.746700646218258, "grad_norm": 17.5, "learning_rate": 2.5019375441615046e-06, "loss": 1.4665770530700684, "step": 9596 }, { "epoch": 1.7470647128424501, "grad_norm": 9.3125, "learning_rate": 2.5011955692111477e-06, "loss": 1.4853754043579102, "step": 9598 }, { "epoch": 1.7474287794666425, "grad_norm": 10.0625, "learning_rate": 2.500453667458319e-06, "loss": 1.4237127304077148, "step": 9600 }, { "epoch": 1.7477928460908347, "grad_norm": 7.21875, "learning_rate": 2.499711839011891e-06, "loss": 1.3613488674163818, "step": 9602 }, { "epoch": 1.748156912715027, "grad_norm": 10.3125, "learning_rate": 2.498970083980722e-06, "loss": 0.9239940643310547, "step": 9604 }, { "epoch": 1.7485209793392191, "grad_norm": 14.9375, "learning_rate": 2.4982284024736638e-06, "loss": 1.1943309307098389, "step": 9606 }, { "epoch": 1.7488850459634113, "grad_norm": 7.09375, "learning_rate": 2.4974867945995528e-06, "loss": 0.851227879524231, "step": 9608 }, { "epoch": 1.7492491125876035, "grad_norm": 11.75, "learning_rate": 2.496745260467219e-06, "loss": 1.4715644121170044, "step": 9610 }, { "epoch": 1.7496131792117957, "grad_norm": 20.875, "learning_rate": 2.4960038001854788e-06, "loss": 1.628612995147705, "step": 9612 }, { "epoch": 1.749977245835988, "grad_norm": 10.6875, "learning_rate": 2.4952624138631376e-06, "loss": 1.5021353960037231, "step": 9614 }, { "epoch": 1.75034131246018, "grad_norm": 13.0625, "learning_rate": 2.4945211016089928e-06, "loss": 1.4636298418045044, "step": 9616 }, { "epoch": 1.7507053790843723, "grad_norm": 7.59375, "learning_rate": 2.4937798635318266e-06, "loss": 1.2379364967346191, "step": 9618 }, { "epoch": 1.7510694457085647, "grad_norm": 12.8125, "learning_rate": 2.4930386997404148e-06, "loss": 1.788224458694458, "step": 9620 }, { "epoch": 1.751433512332757, "grad_norm": 16.75, "learning_rate": 2.492297610343519e-06, "loss": 1.8670352697372437, "step": 9622 }, { "epoch": 1.751797578956949, "grad_norm": 7.8125, "learning_rate": 2.491556595449891e-06, "loss": 1.2936989068984985, "step": 9624 }, { "epoch": 1.7521616455811415, "grad_norm": 5.71875, "learning_rate": 2.490815655168273e-06, "loss": 0.8864408135414124, "step": 9626 }, { "epoch": 1.7525257122053337, "grad_norm": 7.125, "learning_rate": 2.490074789607393e-06, "loss": 1.0749104022979736, "step": 9628 }, { "epoch": 1.752889778829526, "grad_norm": 28.5, "learning_rate": 2.4893339988759717e-06, "loss": 1.4294183254241943, "step": 9630 }, { "epoch": 1.753253845453718, "grad_norm": 12.25, "learning_rate": 2.4885932830827162e-06, "loss": 1.4415847063064575, "step": 9632 }, { "epoch": 1.7536179120779103, "grad_norm": 5.4375, "learning_rate": 2.4878526423363236e-06, "loss": 1.1845145225524902, "step": 9634 }, { "epoch": 1.7539819787021025, "grad_norm": 21.0, "learning_rate": 2.4871120767454814e-06, "loss": 1.7602466344833374, "step": 9636 }, { "epoch": 1.7543460453262947, "grad_norm": 15.875, "learning_rate": 2.486371586418862e-06, "loss": 1.47333824634552, "step": 9638 }, { "epoch": 1.7547101119504869, "grad_norm": 13.3125, "learning_rate": 2.4856311714651317e-06, "loss": 1.3083434104919434, "step": 9640 }, { "epoch": 1.755074178574679, "grad_norm": 14.0, "learning_rate": 2.484890831992942e-06, "loss": 1.6655809879302979, "step": 9642 }, { "epoch": 1.7554382451988713, "grad_norm": 16.0, "learning_rate": 2.4841505681109348e-06, "loss": 0.9073799848556519, "step": 9644 }, { "epoch": 1.7558023118230635, "grad_norm": 12.75, "learning_rate": 2.483410379927742e-06, "loss": 1.241572618484497, "step": 9646 }, { "epoch": 1.7561663784472559, "grad_norm": 9.75, "learning_rate": 2.482670267551982e-06, "loss": 1.8137145042419434, "step": 9648 }, { "epoch": 1.756530445071448, "grad_norm": 7.4375, "learning_rate": 2.4819302310922635e-06, "loss": 1.2279140949249268, "step": 9650 }, { "epoch": 1.7568945116956403, "grad_norm": 7.5, "learning_rate": 2.4811902706571846e-06, "loss": 1.3970588445663452, "step": 9652 }, { "epoch": 1.7572585783198327, "grad_norm": 7.84375, "learning_rate": 2.48045038635533e-06, "loss": 1.402956247329712, "step": 9654 }, { "epoch": 1.7576226449440249, "grad_norm": 16.625, "learning_rate": 2.4797105782952767e-06, "loss": 1.8207128047943115, "step": 9656 }, { "epoch": 1.757986711568217, "grad_norm": 10.8125, "learning_rate": 2.4789708465855863e-06, "loss": 1.483075737953186, "step": 9658 }, { "epoch": 1.7583507781924093, "grad_norm": 18.875, "learning_rate": 2.4782311913348135e-06, "loss": 1.5671143531799316, "step": 9660 }, { "epoch": 1.7587148448166015, "grad_norm": 49.0, "learning_rate": 2.477491612651499e-06, "loss": 1.3647823333740234, "step": 9662 }, { "epoch": 1.7590789114407936, "grad_norm": 7.15625, "learning_rate": 2.476752110644172e-06, "loss": 1.352724552154541, "step": 9664 }, { "epoch": 1.7594429780649858, "grad_norm": 14.625, "learning_rate": 2.476012685421353e-06, "loss": 1.2368907928466797, "step": 9666 }, { "epoch": 1.759807044689178, "grad_norm": 47.25, "learning_rate": 2.475273337091548e-06, "loss": 1.5982136726379395, "step": 9668 }, { "epoch": 1.7601711113133702, "grad_norm": 9.75, "learning_rate": 2.474534065763255e-06, "loss": 1.4263297319412231, "step": 9670 }, { "epoch": 1.7605351779375624, "grad_norm": 9.375, "learning_rate": 2.473794871544958e-06, "loss": 1.2839057445526123, "step": 9672 }, { "epoch": 1.7608992445617548, "grad_norm": 13.6875, "learning_rate": 2.473055754545131e-06, "loss": 1.248408555984497, "step": 9674 }, { "epoch": 1.761263311185947, "grad_norm": 16.875, "learning_rate": 2.472316714872237e-06, "loss": 1.9855908155441284, "step": 9676 }, { "epoch": 1.7616273778101392, "grad_norm": 17.625, "learning_rate": 2.4715777526347258e-06, "loss": 1.597987413406372, "step": 9678 }, { "epoch": 1.7619914444343316, "grad_norm": 17.25, "learning_rate": 2.470838867941039e-06, "loss": 1.511553168296814, "step": 9680 }, { "epoch": 1.7623555110585238, "grad_norm": 6.96875, "learning_rate": 2.470100060899603e-06, "loss": 1.247733473777771, "step": 9682 }, { "epoch": 1.762719577682716, "grad_norm": 35.25, "learning_rate": 2.469361331618835e-06, "loss": 1.3600777387619019, "step": 9684 }, { "epoch": 1.7630836443069082, "grad_norm": 6.53125, "learning_rate": 2.4686226802071424e-06, "loss": 1.0719618797302246, "step": 9686 }, { "epoch": 1.7634477109311004, "grad_norm": 12.5, "learning_rate": 2.467884106772917e-06, "loss": 1.487484097480774, "step": 9688 }, { "epoch": 1.7638117775552926, "grad_norm": 19.25, "learning_rate": 2.467145611424543e-06, "loss": 1.0855759382247925, "step": 9690 }, { "epoch": 1.7641758441794848, "grad_norm": 11.0625, "learning_rate": 2.4664071942703914e-06, "loss": 1.6698211431503296, "step": 9692 }, { "epoch": 1.764539910803677, "grad_norm": 15.0, "learning_rate": 2.4656688554188203e-06, "loss": 1.6015561819076538, "step": 9694 }, { "epoch": 1.7649039774278692, "grad_norm": 13.875, "learning_rate": 2.464930594978181e-06, "loss": 1.7836737632751465, "step": 9696 }, { "epoch": 1.7652680440520614, "grad_norm": 18.125, "learning_rate": 2.4641924130568066e-06, "loss": 1.5052692890167236, "step": 9698 }, { "epoch": 1.7656321106762538, "grad_norm": 8.75, "learning_rate": 2.4634543097630255e-06, "loss": 1.4619030952453613, "step": 9700 }, { "epoch": 1.765996177300446, "grad_norm": 6.59375, "learning_rate": 2.4627162852051495e-06, "loss": 1.549086332321167, "step": 9702 }, { "epoch": 1.7663602439246382, "grad_norm": 7.75, "learning_rate": 2.461978339491481e-06, "loss": 1.185326099395752, "step": 9704 }, { "epoch": 1.7667243105488304, "grad_norm": 11.0625, "learning_rate": 2.4612404727303115e-06, "loss": 1.426266074180603, "step": 9706 }, { "epoch": 1.7670883771730228, "grad_norm": 7.625, "learning_rate": 2.460502685029918e-06, "loss": 1.2171050310134888, "step": 9708 }, { "epoch": 1.767452443797215, "grad_norm": 8.125, "learning_rate": 2.45976497649857e-06, "loss": 1.066184163093567, "step": 9710 }, { "epoch": 1.7678165104214072, "grad_norm": 9.5, "learning_rate": 2.4590273472445226e-06, "loss": 1.266125202178955, "step": 9712 }, { "epoch": 1.7681805770455994, "grad_norm": 5.84375, "learning_rate": 2.4582897973760187e-06, "loss": 1.1672508716583252, "step": 9714 }, { "epoch": 1.7685446436697916, "grad_norm": 149.0, "learning_rate": 2.4575523270012925e-06, "loss": 1.6852155923843384, "step": 9716 }, { "epoch": 1.7689087102939838, "grad_norm": 16.75, "learning_rate": 2.4568149362285633e-06, "loss": 1.973315715789795, "step": 9718 }, { "epoch": 1.769272776918176, "grad_norm": 5.65625, "learning_rate": 2.456077625166041e-06, "loss": 1.35611093044281, "step": 9720 }, { "epoch": 1.7696368435423682, "grad_norm": 19.0, "learning_rate": 2.4553403939219233e-06, "loss": 1.5911699533462524, "step": 9722 }, { "epoch": 1.7700009101665604, "grad_norm": 26.0, "learning_rate": 2.4546032426043947e-06, "loss": 1.450231909751892, "step": 9724 }, { "epoch": 1.7703649767907526, "grad_norm": 11.625, "learning_rate": 2.4538661713216312e-06, "loss": 1.0855478048324585, "step": 9726 }, { "epoch": 1.770729043414945, "grad_norm": 21.25, "learning_rate": 2.4531291801817926e-06, "loss": 0.5967104434967041, "step": 9728 }, { "epoch": 1.7710931100391372, "grad_norm": 63.25, "learning_rate": 2.4523922692930313e-06, "loss": 1.5665773153305054, "step": 9730 }, { "epoch": 1.7714571766633294, "grad_norm": 11.5, "learning_rate": 2.451655438763485e-06, "loss": 1.5051593780517578, "step": 9732 }, { "epoch": 1.7718212432875218, "grad_norm": 5.5625, "learning_rate": 2.45091868870128e-06, "loss": 1.6245388984680176, "step": 9734 }, { "epoch": 1.772185309911714, "grad_norm": 5.9375, "learning_rate": 2.4501820192145335e-06, "loss": 1.2335116863250732, "step": 9736 }, { "epoch": 1.7725493765359062, "grad_norm": 8.375, "learning_rate": 2.4494454304113464e-06, "loss": 1.3282036781311035, "step": 9738 }, { "epoch": 1.7729134431600984, "grad_norm": 9.4375, "learning_rate": 2.448708922399812e-06, "loss": 1.277222752571106, "step": 9740 }, { "epoch": 1.7732775097842906, "grad_norm": 28.625, "learning_rate": 2.447972495288009e-06, "loss": 1.4769645929336548, "step": 9742 }, { "epoch": 1.7736415764084827, "grad_norm": 14.4375, "learning_rate": 2.4472361491840046e-06, "loss": 1.7494463920593262, "step": 9744 }, { "epoch": 1.774005643032675, "grad_norm": 9.0625, "learning_rate": 2.4464998841958554e-06, "loss": 1.5159399509429932, "step": 9746 }, { "epoch": 1.7743697096568671, "grad_norm": 15.75, "learning_rate": 2.4457637004316048e-06, "loss": 1.6344311237335205, "step": 9748 }, { "epoch": 1.7747337762810593, "grad_norm": 13.4375, "learning_rate": 2.4450275979992854e-06, "loss": 1.628173828125, "step": 9750 }, { "epoch": 1.7750978429052515, "grad_norm": 9.75, "learning_rate": 2.444291577006917e-06, "loss": 1.2222493886947632, "step": 9752 }, { "epoch": 1.775461909529444, "grad_norm": 11.25, "learning_rate": 2.443555637562507e-06, "loss": 1.3597251176834106, "step": 9754 }, { "epoch": 1.7758259761536361, "grad_norm": 16.375, "learning_rate": 2.4428197797740526e-06, "loss": 1.582828164100647, "step": 9756 }, { "epoch": 1.7761900427778283, "grad_norm": 16.625, "learning_rate": 2.4420840037495373e-06, "loss": 1.9756643772125244, "step": 9758 }, { "epoch": 1.7765541094020205, "grad_norm": 9.375, "learning_rate": 2.441348309596934e-06, "loss": 1.3612494468688965, "step": 9760 }, { "epoch": 1.776918176026213, "grad_norm": 8.3125, "learning_rate": 2.440612697424202e-06, "loss": 1.1914498805999756, "step": 9762 }, { "epoch": 1.7772822426504051, "grad_norm": 8.5, "learning_rate": 2.4398771673392884e-06, "loss": 1.3873649835586548, "step": 9764 }, { "epoch": 1.7776463092745973, "grad_norm": 10.375, "learning_rate": 2.4391417194501323e-06, "loss": 1.4707486629486084, "step": 9766 }, { "epoch": 1.7780103758987895, "grad_norm": 16.5, "learning_rate": 2.4384063538646545e-06, "loss": 1.2610704898834229, "step": 9768 }, { "epoch": 1.7783744425229817, "grad_norm": 6.28125, "learning_rate": 2.437671070690769e-06, "loss": 0.9276196956634521, "step": 9770 }, { "epoch": 1.778738509147174, "grad_norm": 10.6875, "learning_rate": 2.436935870036375e-06, "loss": 1.3663438558578491, "step": 9772 }, { "epoch": 1.779102575771366, "grad_norm": 5.90625, "learning_rate": 2.4362007520093595e-06, "loss": 1.3276851177215576, "step": 9774 }, { "epoch": 1.7794666423955583, "grad_norm": 23.5, "learning_rate": 2.4354657167176e-06, "loss": 1.2276180982589722, "step": 9776 }, { "epoch": 1.7798307090197505, "grad_norm": 61.0, "learning_rate": 2.434730764268958e-06, "loss": 1.7314021587371826, "step": 9778 }, { "epoch": 1.7801947756439427, "grad_norm": 14.3125, "learning_rate": 2.4339958947712854e-06, "loss": 1.318850040435791, "step": 9780 }, { "epoch": 1.780558842268135, "grad_norm": 10.5, "learning_rate": 2.433261108332422e-06, "loss": 1.0651581287384033, "step": 9782 }, { "epoch": 1.7809229088923273, "grad_norm": 6.34375, "learning_rate": 2.432526405060193e-06, "loss": 1.230223536491394, "step": 9784 }, { "epoch": 1.7812869755165195, "grad_norm": 11.5, "learning_rate": 2.4317917850624152e-06, "loss": 1.2609082460403442, "step": 9786 }, { "epoch": 1.781651042140712, "grad_norm": 13.9375, "learning_rate": 2.4310572484468897e-06, "loss": 1.7883930206298828, "step": 9788 }, { "epoch": 1.782015108764904, "grad_norm": 5.03125, "learning_rate": 2.430322795321408e-06, "loss": 1.3512423038482666, "step": 9790 }, { "epoch": 1.7823791753890963, "grad_norm": 6.78125, "learning_rate": 2.429588425793747e-06, "loss": 1.3327968120574951, "step": 9792 }, { "epoch": 1.7827432420132885, "grad_norm": 10.625, "learning_rate": 2.4288541399716725e-06, "loss": 1.5858317613601685, "step": 9794 }, { "epoch": 1.7831073086374807, "grad_norm": 10.0, "learning_rate": 2.428119937962939e-06, "loss": 1.4162780046463013, "step": 9796 }, { "epoch": 1.7834713752616729, "grad_norm": 10.875, "learning_rate": 2.4273858198752863e-06, "loss": 1.5521361827850342, "step": 9798 }, { "epoch": 1.783835441885865, "grad_norm": 10.1875, "learning_rate": 2.4266517858164444e-06, "loss": 1.471111536026001, "step": 9800 }, { "epoch": 1.7841995085100573, "grad_norm": 15.6875, "learning_rate": 2.425917835894129e-06, "loss": 1.2437063455581665, "step": 9802 }, { "epoch": 1.7845635751342495, "grad_norm": 22.75, "learning_rate": 2.425183970216045e-06, "loss": 1.3602137565612793, "step": 9804 }, { "epoch": 1.7849276417584417, "grad_norm": 7.25, "learning_rate": 2.424450188889884e-06, "loss": 1.1773463487625122, "step": 9806 }, { "epoch": 1.785291708382634, "grad_norm": 31.25, "learning_rate": 2.4237164920233246e-06, "loss": 1.4031540155410767, "step": 9808 }, { "epoch": 1.7856557750068263, "grad_norm": 2.203125, "learning_rate": 2.422982879724035e-06, "loss": 1.119649887084961, "step": 9810 }, { "epoch": 1.7860198416310185, "grad_norm": 9.4375, "learning_rate": 2.422249352099669e-06, "loss": 1.3279435634613037, "step": 9812 }, { "epoch": 1.7863839082552107, "grad_norm": 12.375, "learning_rate": 2.421515909257869e-06, "loss": 1.4814023971557617, "step": 9814 }, { "epoch": 1.786747974879403, "grad_norm": 13.3125, "learning_rate": 2.4207825513062656e-06, "loss": 1.6729483604431152, "step": 9816 }, { "epoch": 1.7871120415035953, "grad_norm": 36.75, "learning_rate": 2.420049278352475e-06, "loss": 1.786515474319458, "step": 9818 }, { "epoch": 1.7874761081277875, "grad_norm": 5.21875, "learning_rate": 2.419316090504103e-06, "loss": 0.9854660630226135, "step": 9820 }, { "epoch": 1.7878401747519796, "grad_norm": 9.1875, "learning_rate": 2.4185829878687405e-06, "loss": 1.2063982486724854, "step": 9822 }, { "epoch": 1.7882042413761718, "grad_norm": 8.0, "learning_rate": 2.417849970553969e-06, "loss": 1.247206449508667, "step": 9824 }, { "epoch": 1.788568308000364, "grad_norm": 49.25, "learning_rate": 2.417117038667355e-06, "loss": 0.9150762557983398, "step": 9826 }, { "epoch": 1.7889323746245562, "grad_norm": 9.25, "learning_rate": 2.416384192316453e-06, "loss": 1.0062452554702759, "step": 9828 }, { "epoch": 1.7892964412487484, "grad_norm": 6.125, "learning_rate": 2.4156514316088063e-06, "loss": 1.250044345855713, "step": 9830 }, { "epoch": 1.7896605078729406, "grad_norm": 12.0, "learning_rate": 2.414918756651943e-06, "loss": 1.5411306619644165, "step": 9832 }, { "epoch": 1.7900245744971328, "grad_norm": 20.125, "learning_rate": 2.414186167553381e-06, "loss": 0.9801827669143677, "step": 9834 }, { "epoch": 1.7903886411213252, "grad_norm": 20.5, "learning_rate": 2.4134536644206254e-06, "loss": 1.5909456014633179, "step": 9836 }, { "epoch": 1.7907527077455174, "grad_norm": 58.0, "learning_rate": 2.412721247361167e-06, "loss": 1.3072903156280518, "step": 9838 }, { "epoch": 1.7911167743697096, "grad_norm": 10.625, "learning_rate": 2.411988916482486e-06, "loss": 1.0310885906219482, "step": 9840 }, { "epoch": 1.791480840993902, "grad_norm": 24.125, "learning_rate": 2.4112566718920482e-06, "loss": 1.7582591772079468, "step": 9842 }, { "epoch": 1.7918449076180942, "grad_norm": 9.625, "learning_rate": 2.4105245136973075e-06, "loss": 0.9733285903930664, "step": 9844 }, { "epoch": 1.7922089742422864, "grad_norm": 10.875, "learning_rate": 2.4097924420057066e-06, "loss": 1.4968267679214478, "step": 9846 }, { "epoch": 1.7925730408664786, "grad_norm": 19.875, "learning_rate": 2.409060456924672e-06, "loss": 1.4720216989517212, "step": 9848 }, { "epoch": 1.7929371074906708, "grad_norm": 7.75, "learning_rate": 2.4083285585616213e-06, "loss": 1.351499319076538, "step": 9850 }, { "epoch": 1.793301174114863, "grad_norm": 6.78125, "learning_rate": 2.4075967470239556e-06, "loss": 1.5258337259292603, "step": 9852 }, { "epoch": 1.7936652407390552, "grad_norm": 7.65625, "learning_rate": 2.406865022419067e-06, "loss": 1.4940929412841797, "step": 9854 }, { "epoch": 1.7940293073632474, "grad_norm": 6.34375, "learning_rate": 2.4061333848543332e-06, "loss": 1.3019057512283325, "step": 9856 }, { "epoch": 1.7943933739874396, "grad_norm": 13.8125, "learning_rate": 2.4054018344371172e-06, "loss": 1.3263444900512695, "step": 9858 }, { "epoch": 1.7947574406116318, "grad_norm": 33.25, "learning_rate": 2.404670371274774e-06, "loss": 1.4296473264694214, "step": 9860 }, { "epoch": 1.7951215072358242, "grad_norm": 5.28125, "learning_rate": 2.4039389954746396e-06, "loss": 1.4287168979644775, "step": 9862 }, { "epoch": 1.7954855738600164, "grad_norm": 7.53125, "learning_rate": 2.4032077071440424e-06, "loss": 1.3336765766143799, "step": 9864 }, { "epoch": 1.7958496404842086, "grad_norm": 6.15625, "learning_rate": 2.4024765063902962e-06, "loss": 1.3832186460494995, "step": 9866 }, { "epoch": 1.796213707108401, "grad_norm": 4.3125, "learning_rate": 2.4017453933207003e-06, "loss": 1.2775369882583618, "step": 9868 }, { "epoch": 1.7965777737325932, "grad_norm": 10.3125, "learning_rate": 2.4010143680425443e-06, "loss": 1.537932276725769, "step": 9870 }, { "epoch": 1.7969418403567854, "grad_norm": 11.4375, "learning_rate": 2.4002834306631014e-06, "loss": 1.3176320791244507, "step": 9872 }, { "epoch": 1.7973059069809776, "grad_norm": 10.125, "learning_rate": 2.3995525812896346e-06, "loss": 1.5509226322174072, "step": 9874 }, { "epoch": 1.7976699736051698, "grad_norm": 12.4375, "learning_rate": 2.3988218200293943e-06, "loss": 1.438368320465088, "step": 9876 }, { "epoch": 1.798034040229362, "grad_norm": 6.90625, "learning_rate": 2.3980911469896142e-06, "loss": 1.3350715637207031, "step": 9878 }, { "epoch": 1.7983981068535542, "grad_norm": 24.25, "learning_rate": 2.39736056227752e-06, "loss": 1.4058773517608643, "step": 9880 }, { "epoch": 1.7987621734777464, "grad_norm": 26.625, "learning_rate": 2.3966300660003202e-06, "loss": 1.571915626525879, "step": 9882 }, { "epoch": 1.7991262401019386, "grad_norm": 9.125, "learning_rate": 2.3958996582652133e-06, "loss": 1.2185813188552856, "step": 9884 }, { "epoch": 1.7994903067261308, "grad_norm": 11.375, "learning_rate": 2.3951693391793836e-06, "loss": 0.7160068154335022, "step": 9886 }, { "epoch": 1.799854373350323, "grad_norm": 13.5, "learning_rate": 2.3944391088500017e-06, "loss": 1.264702558517456, "step": 9888 }, { "epoch": 1.8002184399745154, "grad_norm": 13.5625, "learning_rate": 2.3937089673842274e-06, "loss": 1.4804730415344238, "step": 9890 }, { "epoch": 1.8005825065987076, "grad_norm": 7.53125, "learning_rate": 2.3929789148892035e-06, "loss": 1.2566052675247192, "step": 9892 }, { "epoch": 1.8009465732228997, "grad_norm": 51.5, "learning_rate": 2.392248951472065e-06, "loss": 1.2755497694015503, "step": 9894 }, { "epoch": 1.8013106398470922, "grad_norm": 13.125, "learning_rate": 2.39151907723993e-06, "loss": 1.5451502799987793, "step": 9896 }, { "epoch": 1.8016747064712844, "grad_norm": 16.875, "learning_rate": 2.3907892922999036e-06, "loss": 1.3404134511947632, "step": 9898 }, { "epoch": 1.8020387730954766, "grad_norm": 34.5, "learning_rate": 2.39005959675908e-06, "loss": 1.2840261459350586, "step": 9900 }, { "epoch": 1.8024028397196687, "grad_norm": 11.6875, "learning_rate": 2.3893299907245383e-06, "loss": 0.9123265147209167, "step": 9902 }, { "epoch": 1.802766906343861, "grad_norm": 9.875, "learning_rate": 2.3886004743033463e-06, "loss": 1.7312496900558472, "step": 9904 }, { "epoch": 1.8031309729680531, "grad_norm": 8.6875, "learning_rate": 2.387871047602557e-06, "loss": 1.0656354427337646, "step": 9906 }, { "epoch": 1.8034950395922453, "grad_norm": 5.21875, "learning_rate": 2.38714171072921e-06, "loss": 0.7418041825294495, "step": 9908 }, { "epoch": 1.8038591062164375, "grad_norm": 14.0, "learning_rate": 2.386412463790334e-06, "loss": 1.2070902585983276, "step": 9910 }, { "epoch": 1.8042231728406297, "grad_norm": 12.125, "learning_rate": 2.385683306892942e-06, "loss": 1.2074546813964844, "step": 9912 }, { "epoch": 1.804587239464822, "grad_norm": 3.875, "learning_rate": 2.3849542401440346e-06, "loss": 1.1747174263000488, "step": 9914 }, { "epoch": 1.8049513060890143, "grad_norm": 9.625, "learning_rate": 2.384225263650601e-06, "loss": 1.1381653547286987, "step": 9916 }, { "epoch": 1.8053153727132065, "grad_norm": 6.34375, "learning_rate": 2.3834963775196132e-06, "loss": 1.5001487731933594, "step": 9918 }, { "epoch": 1.8056794393373987, "grad_norm": 6.28125, "learning_rate": 2.382767581858035e-06, "loss": 1.3417357206344604, "step": 9920 }, { "epoch": 1.8060435059615911, "grad_norm": 4.65625, "learning_rate": 2.382038876772811e-06, "loss": 1.2414604425430298, "step": 9922 }, { "epoch": 1.8064075725857833, "grad_norm": 17.625, "learning_rate": 2.381310262370878e-06, "loss": 1.8292914628982544, "step": 9924 }, { "epoch": 1.8067716392099755, "grad_norm": 5.9375, "learning_rate": 2.3805817387591577e-06, "loss": 1.3193349838256836, "step": 9926 }, { "epoch": 1.8071357058341677, "grad_norm": 8.3125, "learning_rate": 2.379853306044556e-06, "loss": 1.2253795862197876, "step": 9928 }, { "epoch": 1.80749977245836, "grad_norm": 7.25, "learning_rate": 2.379124964333969e-06, "loss": 1.3973757028579712, "step": 9930 }, { "epoch": 1.807863839082552, "grad_norm": 7.78125, "learning_rate": 2.3783967137342766e-06, "loss": 1.3232834339141846, "step": 9932 }, { "epoch": 1.8082279057067443, "grad_norm": 3.15625, "learning_rate": 2.3776685543523477e-06, "loss": 1.0558030605316162, "step": 9934 }, { "epoch": 1.8085919723309365, "grad_norm": 16.0, "learning_rate": 2.3769404862950366e-06, "loss": 1.6298116445541382, "step": 9936 }, { "epoch": 1.8089560389551287, "grad_norm": 36.5, "learning_rate": 2.3762125096691833e-06, "loss": 1.709897756576538, "step": 9938 }, { "epoch": 1.8093201055793209, "grad_norm": 11.0, "learning_rate": 2.375484624581617e-06, "loss": 1.2543293237686157, "step": 9940 }, { "epoch": 1.8096841722035133, "grad_norm": 8.375, "learning_rate": 2.3747568311391505e-06, "loss": 1.5632082223892212, "step": 9942 }, { "epoch": 1.8100482388277055, "grad_norm": 4.8125, "learning_rate": 2.374029129448586e-06, "loss": 0.942573070526123, "step": 9944 }, { "epoch": 1.8104123054518977, "grad_norm": 20.75, "learning_rate": 2.3733015196167093e-06, "loss": 1.0287446975708008, "step": 9946 }, { "epoch": 1.8107763720760899, "grad_norm": 17.25, "learning_rate": 2.3725740017502946e-06, "loss": 0.7639522552490234, "step": 9948 }, { "epoch": 1.8111404387002823, "grad_norm": 4.5625, "learning_rate": 2.3718465759561032e-06, "loss": 0.999504804611206, "step": 9950 }, { "epoch": 1.8115045053244745, "grad_norm": 20.75, "learning_rate": 2.37111924234088e-06, "loss": 1.3263179063796997, "step": 9952 }, { "epoch": 1.8118685719486667, "grad_norm": 37.5, "learning_rate": 2.3703920010113603e-06, "loss": 2.006227731704712, "step": 9954 }, { "epoch": 1.8122326385728589, "grad_norm": 19.0, "learning_rate": 2.3696648520742627e-06, "loss": 1.997079610824585, "step": 9956 }, { "epoch": 1.812596705197051, "grad_norm": 11.6875, "learning_rate": 2.368937795636293e-06, "loss": 2.0477285385131836, "step": 9958 }, { "epoch": 1.8129607718212433, "grad_norm": 12.875, "learning_rate": 2.3682108318041448e-06, "loss": 1.5167710781097412, "step": 9960 }, { "epoch": 1.8133248384454355, "grad_norm": 10.1875, "learning_rate": 2.3674839606844963e-06, "loss": 1.478399634361267, "step": 9962 }, { "epoch": 1.8136889050696277, "grad_norm": 13.5, "learning_rate": 2.3667571823840132e-06, "loss": 1.423717975616455, "step": 9964 }, { "epoch": 1.8140529716938198, "grad_norm": 12.125, "learning_rate": 2.366030497009348e-06, "loss": 1.5092569589614868, "step": 9966 }, { "epoch": 1.814417038318012, "grad_norm": 11.9375, "learning_rate": 2.3653039046671373e-06, "loss": 1.5333917140960693, "step": 9968 }, { "epoch": 1.8147811049422045, "grad_norm": 11.75, "learning_rate": 2.364577405464007e-06, "loss": 1.1136995553970337, "step": 9970 }, { "epoch": 1.8151451715663967, "grad_norm": 12.25, "learning_rate": 2.3638509995065665e-06, "loss": 1.3786582946777344, "step": 9972 }, { "epoch": 1.8155092381905888, "grad_norm": 7.0625, "learning_rate": 2.3631246869014144e-06, "loss": 1.8730974197387695, "step": 9974 }, { "epoch": 1.8158733048147813, "grad_norm": 8.0625, "learning_rate": 2.3623984677551338e-06, "loss": 1.3522603511810303, "step": 9976 }, { "epoch": 1.8162373714389735, "grad_norm": 7.875, "learning_rate": 2.3616723421742927e-06, "loss": 1.6729565858840942, "step": 9978 }, { "epoch": 1.8166014380631657, "grad_norm": 17.0, "learning_rate": 2.36094631026545e-06, "loss": 1.3685040473937988, "step": 9980 }, { "epoch": 1.8169655046873578, "grad_norm": 26.875, "learning_rate": 2.3602203721351455e-06, "loss": 1.1316989660263062, "step": 9982 }, { "epoch": 1.81732957131155, "grad_norm": 12.0, "learning_rate": 2.3594945278899087e-06, "loss": 0.6563942432403564, "step": 9984 }, { "epoch": 1.8176936379357422, "grad_norm": 19.5, "learning_rate": 2.3587687776362546e-06, "loss": 1.5108327865600586, "step": 9986 }, { "epoch": 1.8180577045599344, "grad_norm": 16.625, "learning_rate": 2.3580431214806833e-06, "loss": 1.586306095123291, "step": 9988 }, { "epoch": 1.8184217711841266, "grad_norm": 13.5, "learning_rate": 2.357317559529683e-06, "loss": 1.6256462335586548, "step": 9990 }, { "epoch": 1.8187858378083188, "grad_norm": 12.75, "learning_rate": 2.356592091889725e-06, "loss": 1.7333831787109375, "step": 9992 }, { "epoch": 1.819149904432511, "grad_norm": 15.6875, "learning_rate": 2.3558667186672705e-06, "loss": 1.5826849937438965, "step": 9994 }, { "epoch": 1.8195139710567034, "grad_norm": 11.125, "learning_rate": 2.3551414399687658e-06, "loss": 1.3787710666656494, "step": 9996 }, { "epoch": 1.8198780376808956, "grad_norm": 15.125, "learning_rate": 2.3544162559006396e-06, "loss": 1.0588726997375488, "step": 9998 }, { "epoch": 1.8202421043050878, "grad_norm": 13.5, "learning_rate": 2.353691166569313e-06, "loss": 1.4980621337890625, "step": 10000 }, { "epoch": 1.82060617092928, "grad_norm": 16.75, "learning_rate": 2.352966172081187e-06, "loss": 1.6686599254608154, "step": 10002 }, { "epoch": 1.8209702375534724, "grad_norm": 16.625, "learning_rate": 2.3522412725426537e-06, "loss": 0.5254675149917603, "step": 10004 }, { "epoch": 1.8213343041776646, "grad_norm": 10.3125, "learning_rate": 2.3515164680600892e-06, "loss": 1.5052647590637207, "step": 10006 }, { "epoch": 1.8216983708018568, "grad_norm": 15.875, "learning_rate": 2.350791758739854e-06, "loss": 1.2988427877426147, "step": 10008 }, { "epoch": 1.822062437426049, "grad_norm": 12.5, "learning_rate": 2.3500671446882985e-06, "loss": 1.3656269311904907, "step": 10010 }, { "epoch": 1.8224265040502412, "grad_norm": 9.875, "learning_rate": 2.349342626011754e-06, "loss": 1.4077694416046143, "step": 10012 }, { "epoch": 1.8227905706744334, "grad_norm": 71.5, "learning_rate": 2.3486182028165427e-06, "loss": 1.3234163522720337, "step": 10014 }, { "epoch": 1.8231546372986256, "grad_norm": 13.75, "learning_rate": 2.347893875208971e-06, "loss": 0.47953706979751587, "step": 10016 }, { "epoch": 1.8235187039228178, "grad_norm": 21.125, "learning_rate": 2.3471696432953293e-06, "loss": 1.265723705291748, "step": 10018 }, { "epoch": 1.82388277054701, "grad_norm": 19.75, "learning_rate": 2.346445507181898e-06, "loss": 1.247279405593872, "step": 10020 }, { "epoch": 1.8242468371712022, "grad_norm": 10.6875, "learning_rate": 2.3457214669749387e-06, "loss": 1.6593971252441406, "step": 10022 }, { "epoch": 1.8246109037953946, "grad_norm": 10.0625, "learning_rate": 2.344997522780703e-06, "loss": 1.4940249919891357, "step": 10024 }, { "epoch": 1.8249749704195868, "grad_norm": 7.21875, "learning_rate": 2.344273674705427e-06, "loss": 1.1693711280822754, "step": 10026 }, { "epoch": 1.825339037043779, "grad_norm": 10.0, "learning_rate": 2.3435499228553306e-06, "loss": 1.3392581939697266, "step": 10028 }, { "epoch": 1.8257031036679714, "grad_norm": 11.375, "learning_rate": 2.342826267336624e-06, "loss": 1.159989595413208, "step": 10030 }, { "epoch": 1.8260671702921636, "grad_norm": 12.3125, "learning_rate": 2.3421027082554982e-06, "loss": 1.4001920223236084, "step": 10032 }, { "epoch": 1.8264312369163558, "grad_norm": 8.625, "learning_rate": 2.3413792457181337e-06, "loss": 1.4928545951843262, "step": 10034 }, { "epoch": 1.826795303540548, "grad_norm": 9.125, "learning_rate": 2.340655879830697e-06, "loss": 1.57978093624115, "step": 10036 }, { "epoch": 1.8271593701647402, "grad_norm": 6.625, "learning_rate": 2.339932610699337e-06, "loss": 1.290604829788208, "step": 10038 }, { "epoch": 1.8275234367889324, "grad_norm": 13.0, "learning_rate": 2.3392094384301916e-06, "loss": 1.3449780941009521, "step": 10040 }, { "epoch": 1.8278875034131246, "grad_norm": 7.28125, "learning_rate": 2.338486363129383e-06, "loss": 1.16850745677948, "step": 10042 }, { "epoch": 1.8282515700373168, "grad_norm": 7.46875, "learning_rate": 2.3377633849030193e-06, "loss": 0.9517574310302734, "step": 10044 }, { "epoch": 1.828615636661509, "grad_norm": 25.625, "learning_rate": 2.337040503857196e-06, "loss": 1.2145745754241943, "step": 10046 }, { "epoch": 1.8289797032857011, "grad_norm": 20.5, "learning_rate": 2.336317720097991e-06, "loss": 1.7920341491699219, "step": 10048 }, { "epoch": 1.8293437699098936, "grad_norm": 13.75, "learning_rate": 2.335595033731472e-06, "loss": 1.4677098989486694, "step": 10050 }, { "epoch": 1.8297078365340858, "grad_norm": 13.1875, "learning_rate": 2.3348724448636884e-06, "loss": 1.4022963047027588, "step": 10052 }, { "epoch": 1.830071903158278, "grad_norm": 12.75, "learning_rate": 2.3341499536006777e-06, "loss": 1.454895257949829, "step": 10054 }, { "epoch": 1.8304359697824701, "grad_norm": 8.625, "learning_rate": 2.3334275600484643e-06, "loss": 1.4594972133636475, "step": 10056 }, { "epoch": 1.8308000364066626, "grad_norm": 7.84375, "learning_rate": 2.3327052643130534e-06, "loss": 0.9805500507354736, "step": 10058 }, { "epoch": 1.8311641030308548, "grad_norm": 27.125, "learning_rate": 2.331983066500442e-06, "loss": 1.3447250127792358, "step": 10060 }, { "epoch": 1.831528169655047, "grad_norm": 11.75, "learning_rate": 2.3312609667166073e-06, "loss": 1.7461843490600586, "step": 10062 }, { "epoch": 1.8318922362792391, "grad_norm": 16.625, "learning_rate": 2.3305389650675163e-06, "loss": 1.3648128509521484, "step": 10064 }, { "epoch": 1.8322563029034313, "grad_norm": 12.0625, "learning_rate": 2.329817061659119e-06, "loss": 1.3718647956848145, "step": 10066 }, { "epoch": 1.8326203695276235, "grad_norm": 6.96875, "learning_rate": 2.3290952565973514e-06, "loss": 1.483525276184082, "step": 10068 }, { "epoch": 1.8329844361518157, "grad_norm": 9.0625, "learning_rate": 2.3283735499881365e-06, "loss": 1.2079241275787354, "step": 10070 }, { "epoch": 1.833348502776008, "grad_norm": 16.25, "learning_rate": 2.3276519419373807e-06, "loss": 1.3581733703613281, "step": 10072 }, { "epoch": 1.8337125694002, "grad_norm": 19.375, "learning_rate": 2.326930432550978e-06, "loss": 1.9249236583709717, "step": 10074 }, { "epoch": 1.8340766360243923, "grad_norm": 43.0, "learning_rate": 2.326209021934807e-06, "loss": 1.4857887029647827, "step": 10076 }, { "epoch": 1.8344407026485847, "grad_norm": 17.25, "learning_rate": 2.3254877101947304e-06, "loss": 1.573176622390747, "step": 10078 }, { "epoch": 1.834804769272777, "grad_norm": 13.4375, "learning_rate": 2.3247664974366e-06, "loss": 1.2796710729599, "step": 10080 }, { "epoch": 1.835168835896969, "grad_norm": 14.0, "learning_rate": 2.324045383766248e-06, "loss": 1.8859400749206543, "step": 10082 }, { "epoch": 1.8355329025211615, "grad_norm": 17.375, "learning_rate": 2.323324369289498e-06, "loss": 1.8698440790176392, "step": 10084 }, { "epoch": 1.8358969691453537, "grad_norm": 6.5, "learning_rate": 2.3226034541121544e-06, "loss": 1.0743274688720703, "step": 10086 }, { "epoch": 1.836261035769546, "grad_norm": 4.0, "learning_rate": 2.321882638340008e-06, "loss": 1.0709354877471924, "step": 10088 }, { "epoch": 1.836625102393738, "grad_norm": 6.4375, "learning_rate": 2.321161922078837e-06, "loss": 1.0000280141830444, "step": 10090 }, { "epoch": 1.8369891690179303, "grad_norm": 15.8125, "learning_rate": 2.3204413054344018e-06, "loss": 1.4246320724487305, "step": 10092 }, { "epoch": 1.8373532356421225, "grad_norm": 14.0625, "learning_rate": 2.3197207885124516e-06, "loss": 1.9766209125518799, "step": 10094 }, { "epoch": 1.8377173022663147, "grad_norm": 21.125, "learning_rate": 2.319000371418719e-06, "loss": 1.990584373474121, "step": 10096 }, { "epoch": 1.8380813688905069, "grad_norm": 6.5625, "learning_rate": 2.318280054258921e-06, "loss": 1.5160785913467407, "step": 10098 }, { "epoch": 1.838445435514699, "grad_norm": 11.3125, "learning_rate": 2.317559837138764e-06, "loss": 1.3069117069244385, "step": 10100 }, { "epoch": 1.8388095021388913, "grad_norm": 17.875, "learning_rate": 2.3168397201639336e-06, "loss": 1.3874542713165283, "step": 10102 }, { "epoch": 1.8391735687630837, "grad_norm": 24.625, "learning_rate": 2.316119703440106e-06, "loss": 1.5388832092285156, "step": 10104 }, { "epoch": 1.8395376353872759, "grad_norm": 19.875, "learning_rate": 2.3153997870729415e-06, "loss": 1.7023801803588867, "step": 10106 }, { "epoch": 1.839901702011468, "grad_norm": 6.9375, "learning_rate": 2.314679971168082e-06, "loss": 1.0762262344360352, "step": 10108 }, { "epoch": 1.8402657686356603, "grad_norm": 3.3125, "learning_rate": 2.3139602558311614e-06, "loss": 1.1073142290115356, "step": 10110 }, { "epoch": 1.8406298352598527, "grad_norm": 35.75, "learning_rate": 2.313240641167791e-06, "loss": 1.1669108867645264, "step": 10112 }, { "epoch": 1.8409939018840449, "grad_norm": 14.5, "learning_rate": 2.3125211272835747e-06, "loss": 1.7666399478912354, "step": 10114 }, { "epoch": 1.841357968508237, "grad_norm": 8.875, "learning_rate": 2.3118017142840967e-06, "loss": 1.5065183639526367, "step": 10116 }, { "epoch": 1.8417220351324293, "grad_norm": 9.0, "learning_rate": 2.3110824022749275e-06, "loss": 1.3965014219284058, "step": 10118 }, { "epoch": 1.8420861017566215, "grad_norm": 9.9375, "learning_rate": 2.310363191361624e-06, "loss": 1.7696468830108643, "step": 10120 }, { "epoch": 1.8424501683808137, "grad_norm": 15.5, "learning_rate": 2.3096440816497276e-06, "loss": 1.6102962493896484, "step": 10122 }, { "epoch": 1.8428142350050059, "grad_norm": 14.0, "learning_rate": 2.3089250732447644e-06, "loss": 1.6875274181365967, "step": 10124 }, { "epoch": 1.843178301629198, "grad_norm": 15.5, "learning_rate": 2.3082061662522463e-06, "loss": 2.058544635772705, "step": 10126 }, { "epoch": 1.8435423682533902, "grad_norm": 17.5, "learning_rate": 2.3074873607776692e-06, "loss": 1.361794352531433, "step": 10128 }, { "epoch": 1.8439064348775824, "grad_norm": 26.875, "learning_rate": 2.306768656926516e-06, "loss": 0.9231115579605103, "step": 10130 }, { "epoch": 1.8442705015017749, "grad_norm": 11.5, "learning_rate": 2.3060500548042516e-06, "loss": 1.4587790966033936, "step": 10132 }, { "epoch": 1.844634568125967, "grad_norm": 8.6875, "learning_rate": 2.30533155451633e-06, "loss": 1.2917771339416504, "step": 10134 }, { "epoch": 1.8449986347501592, "grad_norm": 13.75, "learning_rate": 2.3046131561681888e-06, "loss": 1.3458640575408936, "step": 10136 }, { "epoch": 1.8453627013743517, "grad_norm": 7.90625, "learning_rate": 2.303894859865247e-06, "loss": 1.4280368089675903, "step": 10138 }, { "epoch": 1.8457267679985438, "grad_norm": 20.125, "learning_rate": 2.303176665712915e-06, "loss": 1.568877935409546, "step": 10140 }, { "epoch": 1.846090834622736, "grad_norm": 19.75, "learning_rate": 2.3024585738165816e-06, "loss": 1.6109087467193604, "step": 10142 }, { "epoch": 1.8464549012469282, "grad_norm": 46.0, "learning_rate": 2.3017405842816263e-06, "loss": 0.5939738750457764, "step": 10144 }, { "epoch": 1.8468189678711204, "grad_norm": 35.5, "learning_rate": 2.3010226972134114e-06, "loss": 1.4714713096618652, "step": 10146 }, { "epoch": 1.8471830344953126, "grad_norm": 12.8125, "learning_rate": 2.300304912717282e-06, "loss": 1.2469115257263184, "step": 10148 }, { "epoch": 1.8475471011195048, "grad_norm": 6.09375, "learning_rate": 2.2995872308985715e-06, "loss": 1.3877387046813965, "step": 10150 }, { "epoch": 1.847911167743697, "grad_norm": 6.46875, "learning_rate": 2.298869651862596e-06, "loss": 1.2895342111587524, "step": 10152 }, { "epoch": 1.8482752343678892, "grad_norm": 13.5625, "learning_rate": 2.2981521757146575e-06, "loss": 1.5223307609558105, "step": 10154 }, { "epoch": 1.8486393009920814, "grad_norm": 19.25, "learning_rate": 2.297434802560044e-06, "loss": 1.8295148611068726, "step": 10156 }, { "epoch": 1.8490033676162738, "grad_norm": 5.59375, "learning_rate": 2.296717532504025e-06, "loss": 0.838250458240509, "step": 10158 }, { "epoch": 1.849367434240466, "grad_norm": 6.875, "learning_rate": 2.296000365651858e-06, "loss": 0.9912216067314148, "step": 10160 }, { "epoch": 1.8497315008646582, "grad_norm": 13.9375, "learning_rate": 2.295283302108784e-06, "loss": 1.6604490280151367, "step": 10162 }, { "epoch": 1.8500955674888506, "grad_norm": 23.5, "learning_rate": 2.29456634198003e-06, "loss": 1.6898630857467651, "step": 10164 }, { "epoch": 1.8504596341130428, "grad_norm": 13.0, "learning_rate": 2.2938494853708067e-06, "loss": 1.5062068700790405, "step": 10166 }, { "epoch": 1.850823700737235, "grad_norm": 18.5, "learning_rate": 2.2931327323863087e-06, "loss": 1.5017207860946655, "step": 10168 }, { "epoch": 1.8511877673614272, "grad_norm": 18.875, "learning_rate": 2.2924160831317186e-06, "loss": 1.7805989980697632, "step": 10170 }, { "epoch": 1.8515518339856194, "grad_norm": 20.5, "learning_rate": 2.2916995377121996e-06, "loss": 1.3220857381820679, "step": 10172 }, { "epoch": 1.8519159006098116, "grad_norm": 9.5, "learning_rate": 2.2909830962329034e-06, "loss": 1.3464634418487549, "step": 10174 }, { "epoch": 1.8522799672340038, "grad_norm": 10.4375, "learning_rate": 2.2902667587989653e-06, "loss": 1.4577744007110596, "step": 10176 }, { "epoch": 1.852644033858196, "grad_norm": 7.53125, "learning_rate": 2.2895505255155026e-06, "loss": 1.3538519144058228, "step": 10178 }, { "epoch": 1.8530081004823882, "grad_norm": 3.3125, "learning_rate": 2.2888343964876223e-06, "loss": 1.1247426271438599, "step": 10180 }, { "epoch": 1.8533721671065804, "grad_norm": 8.0, "learning_rate": 2.2881183718204115e-06, "loss": 1.0560004711151123, "step": 10182 }, { "epoch": 1.8537362337307728, "grad_norm": 216.0, "learning_rate": 2.2874024516189448e-06, "loss": 1.4581248760223389, "step": 10184 }, { "epoch": 1.854100300354965, "grad_norm": 11.875, "learning_rate": 2.2866866359882807e-06, "loss": 1.7488996982574463, "step": 10186 }, { "epoch": 1.8544643669791572, "grad_norm": 16.375, "learning_rate": 2.2859709250334617e-06, "loss": 2.0033645629882812, "step": 10188 }, { "epoch": 1.8548284336033494, "grad_norm": 19.5, "learning_rate": 2.285255318859516e-06, "loss": 1.7307451963424683, "step": 10190 }, { "epoch": 1.8551925002275418, "grad_norm": 8.6875, "learning_rate": 2.2845398175714552e-06, "loss": 1.427422285079956, "step": 10192 }, { "epoch": 1.855556566851734, "grad_norm": 10.5625, "learning_rate": 2.2838244212742765e-06, "loss": 1.3619296550750732, "step": 10194 }, { "epoch": 1.8559206334759262, "grad_norm": 8.6875, "learning_rate": 2.2831091300729622e-06, "loss": 1.5230233669281006, "step": 10196 }, { "epoch": 1.8562847001001184, "grad_norm": 7.875, "learning_rate": 2.2823939440724772e-06, "loss": 1.3712950944900513, "step": 10198 }, { "epoch": 1.8566487667243106, "grad_norm": 12.625, "learning_rate": 2.2816788633777735e-06, "loss": 1.0905096530914307, "step": 10200 }, { "epoch": 1.8570128333485028, "grad_norm": 11.625, "learning_rate": 2.2809638880937845e-06, "loss": 1.4866703748703003, "step": 10202 }, { "epoch": 1.857376899972695, "grad_norm": 122.5, "learning_rate": 2.2802490183254317e-06, "loss": 1.3468842506408691, "step": 10204 }, { "epoch": 1.8577409665968871, "grad_norm": 51.5, "learning_rate": 2.2795342541776185e-06, "loss": 0.9001464247703552, "step": 10206 }, { "epoch": 1.8581050332210793, "grad_norm": 5.6875, "learning_rate": 2.278819595755233e-06, "loss": 0.4550447463989258, "step": 10208 }, { "epoch": 1.8584690998452715, "grad_norm": 8.875, "learning_rate": 2.27810504316315e-06, "loss": 1.7200883626937866, "step": 10210 }, { "epoch": 1.858833166469464, "grad_norm": 7.125, "learning_rate": 2.2773905965062256e-06, "loss": 1.3557761907577515, "step": 10212 }, { "epoch": 1.8591972330936561, "grad_norm": 13.5625, "learning_rate": 2.276676255889303e-06, "loss": 1.967002272605896, "step": 10214 }, { "epoch": 1.8595612997178483, "grad_norm": 16.75, "learning_rate": 2.275962021417209e-06, "loss": 1.9087586402893066, "step": 10216 }, { "epoch": 1.8599253663420408, "grad_norm": 10.6875, "learning_rate": 2.2752478931947534e-06, "loss": 1.3895682096481323, "step": 10218 }, { "epoch": 1.860289432966233, "grad_norm": 14.375, "learning_rate": 2.274533871326733e-06, "loss": 1.5569223165512085, "step": 10220 }, { "epoch": 1.8606534995904251, "grad_norm": 13.875, "learning_rate": 2.273819955917926e-06, "loss": 1.4245853424072266, "step": 10222 }, { "epoch": 1.8610175662146173, "grad_norm": 9.625, "learning_rate": 2.273106147073098e-06, "loss": 1.2391117811203003, "step": 10224 }, { "epoch": 1.8613816328388095, "grad_norm": 9.5, "learning_rate": 2.272392444896997e-06, "loss": 1.4110950231552124, "step": 10226 }, { "epoch": 1.8617456994630017, "grad_norm": 16.375, "learning_rate": 2.271678849494356e-06, "loss": 1.5389165878295898, "step": 10228 }, { "epoch": 1.862109766087194, "grad_norm": 12.0625, "learning_rate": 2.2709653609698926e-06, "loss": 1.4933632612228394, "step": 10230 }, { "epoch": 1.8624738327113861, "grad_norm": 9.0625, "learning_rate": 2.2702519794283074e-06, "loss": 1.2820851802825928, "step": 10232 }, { "epoch": 1.8628378993355783, "grad_norm": 10.9375, "learning_rate": 2.2695387049742868e-06, "loss": 1.0977133512496948, "step": 10234 }, { "epoch": 1.8632019659597705, "grad_norm": 26.0, "learning_rate": 2.2688255377125016e-06, "loss": 0.6658727526664734, "step": 10236 }, { "epoch": 1.863566032583963, "grad_norm": 7.59375, "learning_rate": 2.2681124777476046e-06, "loss": 1.6029568910598755, "step": 10238 }, { "epoch": 1.8639300992081551, "grad_norm": 12.0, "learning_rate": 2.2673995251842364e-06, "loss": 1.5376559495925903, "step": 10240 }, { "epoch": 1.8642941658323473, "grad_norm": 18.25, "learning_rate": 2.266686680127018e-06, "loss": 1.4276180267333984, "step": 10242 }, { "epoch": 1.8646582324565395, "grad_norm": 13.3125, "learning_rate": 2.2659739426805576e-06, "loss": 1.2914968729019165, "step": 10244 }, { "epoch": 1.865022299080732, "grad_norm": 12.8125, "learning_rate": 2.2652613129494473e-06, "loss": 1.6507633924484253, "step": 10246 }, { "epoch": 1.865386365704924, "grad_norm": 25.0, "learning_rate": 2.2645487910382606e-06, "loss": 1.4687492847442627, "step": 10248 }, { "epoch": 1.8657504323291163, "grad_norm": 17.375, "learning_rate": 2.263836377051559e-06, "loss": 1.4469367265701294, "step": 10250 }, { "epoch": 1.8661144989533085, "grad_norm": 7.59375, "learning_rate": 2.263124071093885e-06, "loss": 1.1711663007736206, "step": 10252 }, { "epoch": 1.8664785655775007, "grad_norm": 12.25, "learning_rate": 2.2624118732697676e-06, "loss": 1.2464871406555176, "step": 10254 }, { "epoch": 1.8668426322016929, "grad_norm": 7.34375, "learning_rate": 2.2616997836837187e-06, "loss": 1.5155750513076782, "step": 10256 }, { "epoch": 1.867206698825885, "grad_norm": 7.53125, "learning_rate": 2.2609878024402344e-06, "loss": 1.5323436260223389, "step": 10258 }, { "epoch": 1.8675707654500773, "grad_norm": 7.875, "learning_rate": 2.2602759296437955e-06, "loss": 1.3353511095046997, "step": 10260 }, { "epoch": 1.8679348320742695, "grad_norm": 11.375, "learning_rate": 2.2595641653988656e-06, "loss": 1.2968274354934692, "step": 10262 }, { "epoch": 1.8682988986984617, "grad_norm": 13.8125, "learning_rate": 2.2588525098098936e-06, "loss": 1.7588025331497192, "step": 10264 }, { "epoch": 1.868662965322654, "grad_norm": 7.5625, "learning_rate": 2.258140962981313e-06, "loss": 1.2066104412078857, "step": 10266 }, { "epoch": 1.8690270319468463, "grad_norm": 7.46875, "learning_rate": 2.2574295250175383e-06, "loss": 1.4478979110717773, "step": 10268 }, { "epoch": 1.8693910985710385, "grad_norm": 8.6875, "learning_rate": 2.2567181960229727e-06, "loss": 1.2111611366271973, "step": 10270 }, { "epoch": 1.8697551651952309, "grad_norm": 29.0, "learning_rate": 2.2560069761019984e-06, "loss": 1.3516755104064941, "step": 10272 }, { "epoch": 1.870119231819423, "grad_norm": 9.25, "learning_rate": 2.255295865358986e-06, "loss": 1.218153953552246, "step": 10274 }, { "epoch": 1.8704832984436153, "grad_norm": 8.25, "learning_rate": 2.254584863898287e-06, "loss": 1.4296220541000366, "step": 10276 }, { "epoch": 1.8708473650678075, "grad_norm": 10.0625, "learning_rate": 2.2538739718242383e-06, "loss": 1.4985926151275635, "step": 10278 }, { "epoch": 1.8712114316919997, "grad_norm": 19.25, "learning_rate": 2.253163189241161e-06, "loss": 1.4593578577041626, "step": 10280 }, { "epoch": 1.8715754983161919, "grad_norm": 9.4375, "learning_rate": 2.2524525162533583e-06, "loss": 1.3749850988388062, "step": 10282 }, { "epoch": 1.871939564940384, "grad_norm": 9.6875, "learning_rate": 2.2517419529651196e-06, "loss": 1.3718054294586182, "step": 10284 }, { "epoch": 1.8723036315645762, "grad_norm": 10.0, "learning_rate": 2.251031499480717e-06, "loss": 1.5082831382751465, "step": 10286 }, { "epoch": 1.8726676981887684, "grad_norm": 6.8125, "learning_rate": 2.2503211559044068e-06, "loss": 1.3963520526885986, "step": 10288 }, { "epoch": 1.8730317648129606, "grad_norm": 11.125, "learning_rate": 2.2496109223404285e-06, "loss": 2.1480770111083984, "step": 10290 }, { "epoch": 1.873395831437153, "grad_norm": 13.0, "learning_rate": 2.2489007988930056e-06, "loss": 1.1892786026000977, "step": 10292 }, { "epoch": 1.8737598980613452, "grad_norm": 19.0, "learning_rate": 2.2481907856663475e-06, "loss": 1.8853135108947754, "step": 10294 }, { "epoch": 1.8741239646855374, "grad_norm": 26.625, "learning_rate": 2.2474808827646446e-06, "loss": 1.6226311922073364, "step": 10296 }, { "epoch": 1.8744880313097296, "grad_norm": 5.25, "learning_rate": 2.2467710902920724e-06, "loss": 1.3160977363586426, "step": 10298 }, { "epoch": 1.874852097933922, "grad_norm": 8.375, "learning_rate": 2.2460614083527902e-06, "loss": 1.3614863157272339, "step": 10300 }, { "epoch": 1.8752161645581142, "grad_norm": 17.0, "learning_rate": 2.2453518370509404e-06, "loss": 1.3437060117721558, "step": 10302 }, { "epoch": 1.8755802311823064, "grad_norm": 14.6875, "learning_rate": 2.2446423764906502e-06, "loss": 0.9286806583404541, "step": 10304 }, { "epoch": 1.8759442978064986, "grad_norm": 6.71875, "learning_rate": 2.243933026776031e-06, "loss": 1.4103400707244873, "step": 10306 }, { "epoch": 1.8763083644306908, "grad_norm": 11.6875, "learning_rate": 2.2432237880111748e-06, "loss": 1.3581068515777588, "step": 10308 }, { "epoch": 1.876672431054883, "grad_norm": 101.0, "learning_rate": 2.2425146603001617e-06, "loss": 1.7171987295150757, "step": 10310 }, { "epoch": 1.8770364976790752, "grad_norm": 23.125, "learning_rate": 2.241805643747052e-06, "loss": 1.5244677066802979, "step": 10312 }, { "epoch": 1.8774005643032674, "grad_norm": 60.0, "learning_rate": 2.241096738455891e-06, "loss": 1.9016815423965454, "step": 10314 }, { "epoch": 1.8777646309274596, "grad_norm": 12.3125, "learning_rate": 2.240387944530709e-06, "loss": 1.7663288116455078, "step": 10316 }, { "epoch": 1.8781286975516518, "grad_norm": 13.0625, "learning_rate": 2.239679262075517e-06, "loss": 1.251397967338562, "step": 10318 }, { "epoch": 1.8784927641758442, "grad_norm": 43.0, "learning_rate": 2.2389706911943125e-06, "loss": 1.0551949739456177, "step": 10320 }, { "epoch": 1.8788568308000364, "grad_norm": 15.0, "learning_rate": 2.2382622319910744e-06, "loss": 1.332777976989746, "step": 10322 }, { "epoch": 1.8792208974242286, "grad_norm": 49.75, "learning_rate": 2.237553884569767e-06, "loss": 1.8567054271697998, "step": 10324 }, { "epoch": 1.879584964048421, "grad_norm": 8.75, "learning_rate": 2.2368456490343372e-06, "loss": 0.8983043432235718, "step": 10326 }, { "epoch": 1.8799490306726132, "grad_norm": 6.65625, "learning_rate": 2.236137525488715e-06, "loss": 1.4657609462738037, "step": 10328 }, { "epoch": 1.8803130972968054, "grad_norm": 6.1875, "learning_rate": 2.235429514036816e-06, "loss": 0.9314013719558716, "step": 10330 }, { "epoch": 1.8806771639209976, "grad_norm": 5.40625, "learning_rate": 2.2347216147825367e-06, "loss": 1.109434962272644, "step": 10332 }, { "epoch": 1.8810412305451898, "grad_norm": 8.125, "learning_rate": 2.234013827829759e-06, "loss": 1.2846033573150635, "step": 10334 }, { "epoch": 1.881405297169382, "grad_norm": 20.25, "learning_rate": 2.2333061532823487e-06, "loss": 1.4278689622879028, "step": 10336 }, { "epoch": 1.8817693637935742, "grad_norm": 13.625, "learning_rate": 2.232598591244152e-06, "loss": 0.9231512546539307, "step": 10338 }, { "epoch": 1.8821334304177664, "grad_norm": 11.25, "learning_rate": 2.231891141819003e-06, "loss": 1.3879550695419312, "step": 10340 }, { "epoch": 1.8824974970419586, "grad_norm": 8.9375, "learning_rate": 2.2311838051107156e-06, "loss": 0.8556983470916748, "step": 10342 }, { "epoch": 1.8828615636661508, "grad_norm": 65.5, "learning_rate": 2.230476581223089e-06, "loss": 1.2869782447814941, "step": 10344 }, { "epoch": 1.8832256302903432, "grad_norm": 18.5, "learning_rate": 2.2297694702599064e-06, "loss": 1.6536414623260498, "step": 10346 }, { "epoch": 1.8835896969145354, "grad_norm": 16.5, "learning_rate": 2.2290624723249314e-06, "loss": 0.7474377155303955, "step": 10348 }, { "epoch": 1.8839537635387276, "grad_norm": 13.4375, "learning_rate": 2.228355587521915e-06, "loss": 1.7495023012161255, "step": 10350 }, { "epoch": 1.8843178301629198, "grad_norm": 13.5, "learning_rate": 2.2276488159545886e-06, "loss": 1.574780821800232, "step": 10352 }, { "epoch": 1.8846818967871122, "grad_norm": 16.875, "learning_rate": 2.226942157726668e-06, "loss": 1.9637646675109863, "step": 10354 }, { "epoch": 1.8850459634113044, "grad_norm": 10.375, "learning_rate": 2.226235612941854e-06, "loss": 1.7782838344573975, "step": 10356 }, { "epoch": 1.8854100300354966, "grad_norm": 8.5, "learning_rate": 2.2255291817038268e-06, "loss": 1.26701021194458, "step": 10358 }, { "epoch": 1.8857740966596888, "grad_norm": 7.15625, "learning_rate": 2.2248228641162542e-06, "loss": 0.7866450548171997, "step": 10360 }, { "epoch": 1.886138163283881, "grad_norm": 14.25, "learning_rate": 2.2241166602827844e-06, "loss": 0.9672653675079346, "step": 10362 }, { "epoch": 1.8865022299080731, "grad_norm": 11.0, "learning_rate": 2.22341057030705e-06, "loss": 1.0276377201080322, "step": 10364 }, { "epoch": 1.8868662965322653, "grad_norm": 15.125, "learning_rate": 2.2227045942926684e-06, "loss": 1.340790033340454, "step": 10366 }, { "epoch": 1.8872303631564575, "grad_norm": 11.8125, "learning_rate": 2.221998732343236e-06, "loss": 1.4866089820861816, "step": 10368 }, { "epoch": 1.8875944297806497, "grad_norm": 19.75, "learning_rate": 2.221292984562338e-06, "loss": 1.4822797775268555, "step": 10370 }, { "epoch": 1.887958496404842, "grad_norm": 21.125, "learning_rate": 2.220587351053538e-06, "loss": 1.6251622438430786, "step": 10372 }, { "epoch": 1.8883225630290343, "grad_norm": 5.03125, "learning_rate": 2.2198818319203855e-06, "loss": 1.21848726272583, "step": 10374 }, { "epoch": 1.8886866296532265, "grad_norm": 16.125, "learning_rate": 2.219176427266413e-06, "loss": 1.2360891103744507, "step": 10376 }, { "epoch": 1.8890506962774187, "grad_norm": 10.8125, "learning_rate": 2.218471137195135e-06, "loss": 1.1219102144241333, "step": 10378 }, { "epoch": 1.8894147629016111, "grad_norm": 5.21875, "learning_rate": 2.217765961810051e-06, "loss": 1.5207395553588867, "step": 10380 }, { "epoch": 1.8897788295258033, "grad_norm": 8.1875, "learning_rate": 2.217060901214641e-06, "loss": 1.1101880073547363, "step": 10382 }, { "epoch": 1.8901428961499955, "grad_norm": 10.625, "learning_rate": 2.2163559555123716e-06, "loss": 1.5389623641967773, "step": 10384 }, { "epoch": 1.8905069627741877, "grad_norm": 22.5, "learning_rate": 2.2156511248066896e-06, "loss": 1.6152387857437134, "step": 10386 }, { "epoch": 1.89087102939838, "grad_norm": 12.875, "learning_rate": 2.214946409201026e-06, "loss": 1.2975308895111084, "step": 10388 }, { "epoch": 1.8912350960225721, "grad_norm": 11.0625, "learning_rate": 2.214241808798796e-06, "loss": 0.8062430024147034, "step": 10390 }, { "epoch": 1.8915991626467643, "grad_norm": 14.5, "learning_rate": 2.2135373237033957e-06, "loss": 0.6579453945159912, "step": 10392 }, { "epoch": 1.8919632292709565, "grad_norm": 5.8125, "learning_rate": 2.212832954018206e-06, "loss": 0.9215497374534607, "step": 10394 }, { "epoch": 1.8923272958951487, "grad_norm": 8.75, "learning_rate": 2.2121286998465904e-06, "loss": 1.3262220621109009, "step": 10396 }, { "epoch": 1.892691362519341, "grad_norm": 33.0, "learning_rate": 2.2114245612918945e-06, "loss": 1.3361772298812866, "step": 10398 }, { "epoch": 1.8930554291435333, "grad_norm": 19.25, "learning_rate": 2.210720538457449e-06, "loss": 1.1203155517578125, "step": 10400 }, { "epoch": 1.8934194957677255, "grad_norm": 27.125, "learning_rate": 2.2100166314465654e-06, "loss": 1.2038145065307617, "step": 10402 }, { "epoch": 1.8937835623919177, "grad_norm": 21.625, "learning_rate": 2.20931284036254e-06, "loss": 1.8017784357070923, "step": 10404 }, { "epoch": 1.8941476290161101, "grad_norm": 7.3125, "learning_rate": 2.208609165308651e-06, "loss": 1.215720295906067, "step": 10406 }, { "epoch": 1.8945116956403023, "grad_norm": 11.5625, "learning_rate": 2.2079056063881595e-06, "loss": 1.5422110557556152, "step": 10408 }, { "epoch": 1.8948757622644945, "grad_norm": 5.5, "learning_rate": 2.20720216370431e-06, "loss": 1.4257352352142334, "step": 10410 }, { "epoch": 1.8952398288886867, "grad_norm": 21.875, "learning_rate": 2.2064988373603302e-06, "loss": 1.1668429374694824, "step": 10412 }, { "epoch": 1.895603895512879, "grad_norm": 27.125, "learning_rate": 2.2057956274594303e-06, "loss": 1.9746252298355103, "step": 10414 }, { "epoch": 1.895967962137071, "grad_norm": 20.875, "learning_rate": 2.205092534104804e-06, "loss": 1.4797194004058838, "step": 10416 }, { "epoch": 1.8963320287612633, "grad_norm": 15.875, "learning_rate": 2.2043895573996256e-06, "loss": 1.1040180921554565, "step": 10418 }, { "epoch": 1.8966960953854555, "grad_norm": 18.125, "learning_rate": 2.2036866974470565e-06, "loss": 1.563361644744873, "step": 10420 }, { "epoch": 1.8970601620096477, "grad_norm": 8.625, "learning_rate": 2.202983954350236e-06, "loss": 1.3463839292526245, "step": 10422 }, { "epoch": 1.8974242286338399, "grad_norm": 18.625, "learning_rate": 2.2022813282122915e-06, "loss": 1.7155592441558838, "step": 10424 }, { "epoch": 1.897788295258032, "grad_norm": 13.1875, "learning_rate": 2.2015788191363293e-06, "loss": 1.3703267574310303, "step": 10426 }, { "epoch": 1.8981523618822245, "grad_norm": 13.4375, "learning_rate": 2.2008764272254384e-06, "loss": 1.5700005292892456, "step": 10428 }, { "epoch": 1.8985164285064167, "grad_norm": 14.5625, "learning_rate": 2.2001741525826943e-06, "loss": 1.172662377357483, "step": 10430 }, { "epoch": 1.8988804951306089, "grad_norm": 31.125, "learning_rate": 2.1994719953111516e-06, "loss": 1.2414630651474, "step": 10432 }, { "epoch": 1.8992445617548013, "grad_norm": 12.125, "learning_rate": 2.198769955513849e-06, "loss": 1.6593002080917358, "step": 10434 }, { "epoch": 1.8996086283789935, "grad_norm": 20.625, "learning_rate": 2.1980680332938088e-06, "loss": 1.8022005558013916, "step": 10436 }, { "epoch": 1.8999726950031857, "grad_norm": 9.8125, "learning_rate": 2.1973662287540345e-06, "loss": 1.1300904750823975, "step": 10438 }, { "epoch": 1.9003367616273779, "grad_norm": 8.3125, "learning_rate": 2.196664541997514e-06, "loss": 1.2492272853851318, "step": 10440 }, { "epoch": 1.90070082825157, "grad_norm": 7.6875, "learning_rate": 2.195962973127215e-06, "loss": 1.329573392868042, "step": 10442 }, { "epoch": 1.9010648948757622, "grad_norm": 9.1875, "learning_rate": 2.1952615222460922e-06, "loss": 1.345508337020874, "step": 10444 }, { "epoch": 1.9014289614999544, "grad_norm": 28.375, "learning_rate": 2.19456018945708e-06, "loss": 1.9136571884155273, "step": 10446 }, { "epoch": 1.9017930281241466, "grad_norm": 7.875, "learning_rate": 2.1938589748630947e-06, "loss": 0.9954184293746948, "step": 10448 }, { "epoch": 1.9021570947483388, "grad_norm": 5.5, "learning_rate": 2.193157878567039e-06, "loss": 1.0466398000717163, "step": 10450 }, { "epoch": 1.902521161372531, "grad_norm": 8.375, "learning_rate": 2.192456900671794e-06, "loss": 1.422209620475769, "step": 10452 }, { "epoch": 1.9028852279967234, "grad_norm": 9.25, "learning_rate": 2.191756041280226e-06, "loss": 1.5524622201919556, "step": 10454 }, { "epoch": 1.9032492946209156, "grad_norm": 9.8125, "learning_rate": 2.191055300495184e-06, "loss": 1.1897838115692139, "step": 10456 }, { "epoch": 1.9036133612451078, "grad_norm": 25.0, "learning_rate": 2.190354678419498e-06, "loss": 1.3939895629882812, "step": 10458 }, { "epoch": 1.9039774278693002, "grad_norm": 17.25, "learning_rate": 2.1896541751559823e-06, "loss": 1.312684178352356, "step": 10460 }, { "epoch": 1.9043414944934924, "grad_norm": 10.5625, "learning_rate": 2.188953790807431e-06, "loss": 0.9836719036102295, "step": 10462 }, { "epoch": 1.9047055611176846, "grad_norm": 12.125, "learning_rate": 2.188253525476625e-06, "loss": 1.1918948888778687, "step": 10464 }, { "epoch": 1.9050696277418768, "grad_norm": 13.125, "learning_rate": 2.187553379266325e-06, "loss": 1.5050095319747925, "step": 10466 }, { "epoch": 1.905433694366069, "grad_norm": 9.0, "learning_rate": 2.186853352279273e-06, "loss": 1.5979856252670288, "step": 10468 }, { "epoch": 1.9057977609902612, "grad_norm": 20.125, "learning_rate": 2.186153444618197e-06, "loss": 1.4531993865966797, "step": 10470 }, { "epoch": 1.9061618276144534, "grad_norm": 16.125, "learning_rate": 2.1854536563858037e-06, "loss": 1.4323182106018066, "step": 10472 }, { "epoch": 1.9065258942386456, "grad_norm": 12.25, "learning_rate": 2.1847539876847858e-06, "loss": 1.371595859527588, "step": 10474 }, { "epoch": 1.9068899608628378, "grad_norm": 61.0, "learning_rate": 2.1840544386178166e-06, "loss": 1.3252613544464111, "step": 10476 }, { "epoch": 1.90725402748703, "grad_norm": 33.5, "learning_rate": 2.183355009287551e-06, "loss": 2.0604937076568604, "step": 10478 }, { "epoch": 1.9076180941112224, "grad_norm": 7.1875, "learning_rate": 2.182655699796629e-06, "loss": 1.1572775840759277, "step": 10480 }, { "epoch": 1.9079821607354146, "grad_norm": 15.375, "learning_rate": 2.1819565102476703e-06, "loss": 1.7954611778259277, "step": 10482 }, { "epoch": 1.9083462273596068, "grad_norm": 20.375, "learning_rate": 2.1812574407432786e-06, "loss": 1.4730114936828613, "step": 10484 }, { "epoch": 1.908710293983799, "grad_norm": 36.25, "learning_rate": 2.1805584913860397e-06, "loss": 1.9809212684631348, "step": 10486 }, { "epoch": 1.9090743606079914, "grad_norm": 21.625, "learning_rate": 2.17985966227852e-06, "loss": 1.4753215312957764, "step": 10488 }, { "epoch": 1.9094384272321836, "grad_norm": 9.875, "learning_rate": 2.1791609535232728e-06, "loss": 1.40386164188385, "step": 10490 }, { "epoch": 1.9098024938563758, "grad_norm": 11.4375, "learning_rate": 2.178462365222828e-06, "loss": 0.9462644457817078, "step": 10492 }, { "epoch": 1.910166560480568, "grad_norm": 17.125, "learning_rate": 2.1777638974797022e-06, "loss": 1.0285927057266235, "step": 10494 }, { "epoch": 1.9105306271047602, "grad_norm": 16.125, "learning_rate": 2.1770655503963924e-06, "loss": 0.5643746256828308, "step": 10496 }, { "epoch": 1.9108946937289524, "grad_norm": 12.125, "learning_rate": 2.176367324075377e-06, "loss": 1.461489200592041, "step": 10498 }, { "epoch": 1.9112587603531446, "grad_norm": 23.375, "learning_rate": 2.17566921861912e-06, "loss": 1.9206092357635498, "step": 10500 }, { "epoch": 1.9116228269773368, "grad_norm": 9.3125, "learning_rate": 2.174971234130064e-06, "loss": 1.2126343250274658, "step": 10502 }, { "epoch": 1.911986893601529, "grad_norm": 6.90625, "learning_rate": 2.1742733707106355e-06, "loss": 1.4992780685424805, "step": 10504 }, { "epoch": 1.9123509602257212, "grad_norm": 7.1875, "learning_rate": 2.1735756284632444e-06, "loss": 1.0821495056152344, "step": 10506 }, { "epoch": 1.9127150268499136, "grad_norm": 10.0, "learning_rate": 2.1728780074902795e-06, "loss": 1.4519574642181396, "step": 10508 }, { "epoch": 1.9130790934741058, "grad_norm": 5.84375, "learning_rate": 2.172180507894116e-06, "loss": 1.2001405954360962, "step": 10510 }, { "epoch": 1.913443160098298, "grad_norm": 9.8125, "learning_rate": 2.1714831297771074e-06, "loss": 1.3419532775878906, "step": 10512 }, { "epoch": 1.9138072267224904, "grad_norm": 7.21875, "learning_rate": 2.170785873241592e-06, "loss": 1.4213727712631226, "step": 10514 }, { "epoch": 1.9141712933466826, "grad_norm": 25.0, "learning_rate": 2.17008873838989e-06, "loss": 1.494529366493225, "step": 10516 }, { "epoch": 1.9145353599708748, "grad_norm": 9.4375, "learning_rate": 2.169391725324301e-06, "loss": 1.3124151229858398, "step": 10518 }, { "epoch": 1.914899426595067, "grad_norm": 6.9375, "learning_rate": 2.1686948341471108e-06, "loss": 1.2098684310913086, "step": 10520 }, { "epoch": 1.9152634932192591, "grad_norm": 22.0, "learning_rate": 2.167998064960584e-06, "loss": 1.5387494564056396, "step": 10522 }, { "epoch": 1.9156275598434513, "grad_norm": 29.625, "learning_rate": 2.16730141786697e-06, "loss": 1.5989289283752441, "step": 10524 }, { "epoch": 1.9159916264676435, "grad_norm": 43.25, "learning_rate": 2.1666048929684993e-06, "loss": 1.5735671520233154, "step": 10526 }, { "epoch": 1.9163556930918357, "grad_norm": 14.0, "learning_rate": 2.165908490367381e-06, "loss": 1.5775954723358154, "step": 10528 }, { "epoch": 1.916719759716028, "grad_norm": 9.4375, "learning_rate": 2.165212210165813e-06, "loss": 1.4131669998168945, "step": 10530 }, { "epoch": 1.9170838263402201, "grad_norm": 10.625, "learning_rate": 2.16451605246597e-06, "loss": 1.5526275634765625, "step": 10532 }, { "epoch": 1.9174478929644125, "grad_norm": 19.5, "learning_rate": 2.163820017370009e-06, "loss": 1.430741548538208, "step": 10534 }, { "epoch": 1.9178119595886047, "grad_norm": 6.28125, "learning_rate": 2.1631241049800733e-06, "loss": 1.2797216176986694, "step": 10536 }, { "epoch": 1.918176026212797, "grad_norm": 7.84375, "learning_rate": 2.1624283153982822e-06, "loss": 0.9575626254081726, "step": 10538 }, { "epoch": 1.9185400928369891, "grad_norm": 18.875, "learning_rate": 2.161732648726743e-06, "loss": 1.9104572534561157, "step": 10540 }, { "epoch": 1.9189041594611815, "grad_norm": 9.0625, "learning_rate": 2.16103710506754e-06, "loss": 1.2149488925933838, "step": 10542 }, { "epoch": 1.9192682260853737, "grad_norm": 4.03125, "learning_rate": 2.1603416845227403e-06, "loss": 0.8648663759231567, "step": 10544 }, { "epoch": 1.919632292709566, "grad_norm": 7.46875, "learning_rate": 2.1596463871943977e-06, "loss": 1.5795938968658447, "step": 10546 }, { "epoch": 1.9199963593337581, "grad_norm": 7.15625, "learning_rate": 2.1589512131845405e-06, "loss": 0.9581153392791748, "step": 10548 }, { "epoch": 1.9203604259579503, "grad_norm": 11.0625, "learning_rate": 2.1582561625951857e-06, "loss": 1.3011606931686401, "step": 10550 }, { "epoch": 1.9207244925821425, "grad_norm": 7.34375, "learning_rate": 2.1575612355283266e-06, "loss": 1.2429723739624023, "step": 10552 }, { "epoch": 1.9210885592063347, "grad_norm": 9.0625, "learning_rate": 2.156866432085942e-06, "loss": 1.2560807466506958, "step": 10554 }, { "epoch": 1.921452625830527, "grad_norm": 3.3125, "learning_rate": 2.1561717523699927e-06, "loss": 1.2178103923797607, "step": 10556 }, { "epoch": 1.921816692454719, "grad_norm": 20.5, "learning_rate": 2.1554771964824177e-06, "loss": 1.1888086795806885, "step": 10558 }, { "epoch": 1.9221807590789113, "grad_norm": 9.9375, "learning_rate": 2.154782764525143e-06, "loss": 1.4054150581359863, "step": 10560 }, { "epoch": 1.9225448257031037, "grad_norm": 25.875, "learning_rate": 2.154088456600071e-06, "loss": 1.4726853370666504, "step": 10562 }, { "epoch": 1.922908892327296, "grad_norm": 11.9375, "learning_rate": 2.1533942728090897e-06, "loss": 1.5992368459701538, "step": 10564 }, { "epoch": 1.923272958951488, "grad_norm": 13.0, "learning_rate": 2.152700213254069e-06, "loss": 0.5055093765258789, "step": 10566 }, { "epoch": 1.9236370255756805, "grad_norm": 17.375, "learning_rate": 2.152006278036857e-06, "loss": 1.3911727666854858, "step": 10568 }, { "epoch": 1.9240010921998727, "grad_norm": 8.9375, "learning_rate": 2.1513124672592887e-06, "loss": 1.0272668600082397, "step": 10570 }, { "epoch": 1.924365158824065, "grad_norm": 4.78125, "learning_rate": 2.1506187810231753e-06, "loss": 0.9814562797546387, "step": 10572 }, { "epoch": 1.924729225448257, "grad_norm": 8.4375, "learning_rate": 2.1499252194303127e-06, "loss": 1.3142948150634766, "step": 10574 }, { "epoch": 1.9250932920724493, "grad_norm": 7.0625, "learning_rate": 2.149231782582481e-06, "loss": 1.340053915977478, "step": 10576 }, { "epoch": 1.9254573586966415, "grad_norm": 4.65625, "learning_rate": 2.1485384705814354e-06, "loss": 1.0525617599487305, "step": 10578 }, { "epoch": 1.9258214253208337, "grad_norm": 12.375, "learning_rate": 2.1478452835289205e-06, "loss": 1.1405552625656128, "step": 10580 }, { "epoch": 1.9261854919450259, "grad_norm": 11.25, "learning_rate": 2.1471522215266556e-06, "loss": 1.4168860912322998, "step": 10582 }, { "epoch": 1.926549558569218, "grad_norm": 8.125, "learning_rate": 2.1464592846763454e-06, "loss": 1.247086763381958, "step": 10584 }, { "epoch": 1.9269136251934103, "grad_norm": 16.25, "learning_rate": 2.1457664730796775e-06, "loss": 1.7664971351623535, "step": 10586 }, { "epoch": 1.9272776918176027, "grad_norm": 26.625, "learning_rate": 2.1450737868383166e-06, "loss": 1.989170789718628, "step": 10588 }, { "epoch": 1.9276417584417949, "grad_norm": 5.40625, "learning_rate": 2.1443812260539137e-06, "loss": 1.3812612295150757, "step": 10590 }, { "epoch": 1.928005825065987, "grad_norm": 5.9375, "learning_rate": 2.143688790828098e-06, "loss": 1.1844573020935059, "step": 10592 }, { "epoch": 1.9283698916901792, "grad_norm": 3.71875, "learning_rate": 2.142996481262481e-06, "loss": 1.097573161125183, "step": 10594 }, { "epoch": 1.9287339583143717, "grad_norm": 12.125, "learning_rate": 2.142304297458659e-06, "loss": 1.2319780588150024, "step": 10596 }, { "epoch": 1.9290980249385639, "grad_norm": 13.0625, "learning_rate": 2.1416122395182037e-06, "loss": 1.3655378818511963, "step": 10598 }, { "epoch": 1.929462091562756, "grad_norm": 14.125, "learning_rate": 2.140920307542675e-06, "loss": 1.8357151746749878, "step": 10600 }, { "epoch": 1.9298261581869482, "grad_norm": 8.5625, "learning_rate": 2.140228501633609e-06, "loss": 1.3636448383331299, "step": 10602 }, { "epoch": 1.9301902248111404, "grad_norm": 4.5625, "learning_rate": 2.139536821892525e-06, "loss": 1.3478671312332153, "step": 10604 }, { "epoch": 1.9305542914353326, "grad_norm": 5.21875, "learning_rate": 2.138845268420927e-06, "loss": 1.1939629316329956, "step": 10606 }, { "epoch": 1.9309183580595248, "grad_norm": 9.5, "learning_rate": 2.138153841320294e-06, "loss": 0.9429299235343933, "step": 10608 }, { "epoch": 1.931282424683717, "grad_norm": 13.375, "learning_rate": 2.1374625406920945e-06, "loss": 1.3931149244308472, "step": 10610 }, { "epoch": 1.9316464913079092, "grad_norm": 13.9375, "learning_rate": 2.1367713666377698e-06, "loss": 1.9956785440444946, "step": 10612 }, { "epoch": 1.9320105579321014, "grad_norm": 5.6875, "learning_rate": 2.1360803192587483e-06, "loss": 1.1798756122589111, "step": 10614 }, { "epoch": 1.9323746245562938, "grad_norm": 9.1875, "learning_rate": 2.13538939865644e-06, "loss": 1.2579030990600586, "step": 10616 }, { "epoch": 1.932738691180486, "grad_norm": 8.125, "learning_rate": 2.1346986049322327e-06, "loss": 1.6402931213378906, "step": 10618 }, { "epoch": 1.9331027578046782, "grad_norm": 8.3125, "learning_rate": 2.1340079381874997e-06, "loss": 1.1270995140075684, "step": 10620 }, { "epoch": 1.9334668244288706, "grad_norm": 2.984375, "learning_rate": 2.133317398523591e-06, "loss": 1.2936630249023438, "step": 10622 }, { "epoch": 1.9338308910530628, "grad_norm": 8.5625, "learning_rate": 2.1326269860418413e-06, "loss": 0.9579342603683472, "step": 10624 }, { "epoch": 1.934194957677255, "grad_norm": 11.1875, "learning_rate": 2.1319367008435677e-06, "loss": 1.431419849395752, "step": 10626 }, { "epoch": 1.9345590243014472, "grad_norm": 10.375, "learning_rate": 2.1312465430300633e-06, "loss": 1.0643624067306519, "step": 10628 }, { "epoch": 1.9349230909256394, "grad_norm": 14.3125, "learning_rate": 2.1305565127026102e-06, "loss": 1.524848222732544, "step": 10630 }, { "epoch": 1.9352871575498316, "grad_norm": 27.5, "learning_rate": 2.1298666099624645e-06, "loss": 1.893900752067566, "step": 10632 }, { "epoch": 1.9356512241740238, "grad_norm": 19.75, "learning_rate": 2.129176834910866e-06, "loss": 1.8068509101867676, "step": 10634 }, { "epoch": 1.936015290798216, "grad_norm": 12.9375, "learning_rate": 2.1284871876490397e-06, "loss": 1.4855549335479736, "step": 10636 }, { "epoch": 1.9363793574224082, "grad_norm": 11.625, "learning_rate": 2.1277976682781853e-06, "loss": 1.6553020477294922, "step": 10638 }, { "epoch": 1.9367434240466004, "grad_norm": 6.59375, "learning_rate": 2.12710827689949e-06, "loss": 1.4351539611816406, "step": 10640 }, { "epoch": 1.9371074906707928, "grad_norm": 10.0625, "learning_rate": 2.1264190136141166e-06, "loss": 0.9558985233306885, "step": 10642 }, { "epoch": 1.937471557294985, "grad_norm": 2.65625, "learning_rate": 2.125729878523212e-06, "loss": 0.9934003949165344, "step": 10644 }, { "epoch": 1.9378356239191772, "grad_norm": 8.1875, "learning_rate": 2.125040871727906e-06, "loss": 1.3211816549301147, "step": 10646 }, { "epoch": 1.9381996905433696, "grad_norm": 26.75, "learning_rate": 2.1243519933293046e-06, "loss": 1.3147072792053223, "step": 10648 }, { "epoch": 1.9385637571675618, "grad_norm": 28.625, "learning_rate": 2.123663243428501e-06, "loss": 1.7976093292236328, "step": 10650 }, { "epoch": 1.938927823791754, "grad_norm": 7.25, "learning_rate": 2.1229746221265644e-06, "loss": 1.3431675434112549, "step": 10652 }, { "epoch": 1.9392918904159462, "grad_norm": 11.3125, "learning_rate": 2.1222861295245467e-06, "loss": 1.3423734903335571, "step": 10654 }, { "epoch": 1.9396559570401384, "grad_norm": 13.5, "learning_rate": 2.1215977657234843e-06, "loss": 1.385918140411377, "step": 10656 }, { "epoch": 1.9400200236643306, "grad_norm": 12.0, "learning_rate": 2.1209095308243883e-06, "loss": 1.2211955785751343, "step": 10658 }, { "epoch": 1.9403840902885228, "grad_norm": 9.5625, "learning_rate": 2.1202214249282573e-06, "loss": 1.0299816131591797, "step": 10660 }, { "epoch": 1.940748156912715, "grad_norm": 34.0, "learning_rate": 2.1195334481360665e-06, "loss": 1.522233486175537, "step": 10662 }, { "epoch": 1.9411122235369072, "grad_norm": 76.5, "learning_rate": 2.1188456005487725e-06, "loss": 1.7297430038452148, "step": 10664 }, { "epoch": 1.9414762901610993, "grad_norm": 12.1875, "learning_rate": 2.1181578822673175e-06, "loss": 1.4662880897521973, "step": 10666 }, { "epoch": 1.9418403567852915, "grad_norm": 9.9375, "learning_rate": 2.117470293392618e-06, "loss": 1.5376778841018677, "step": 10668 }, { "epoch": 1.942204423409484, "grad_norm": 9.3125, "learning_rate": 2.116782834025578e-06, "loss": 1.182296633720398, "step": 10670 }, { "epoch": 1.9425684900336762, "grad_norm": 8.9375, "learning_rate": 2.116095504267077e-06, "loss": 1.425065040588379, "step": 10672 }, { "epoch": 1.9429325566578683, "grad_norm": 14.8125, "learning_rate": 2.1154083042179773e-06, "loss": 1.4091671705245972, "step": 10674 }, { "epoch": 1.9432966232820608, "grad_norm": 7.0625, "learning_rate": 2.1147212339791257e-06, "loss": 1.423144817352295, "step": 10676 }, { "epoch": 1.943660689906253, "grad_norm": 14.375, "learning_rate": 2.114034293651344e-06, "loss": 1.092933177947998, "step": 10678 }, { "epoch": 1.9440247565304452, "grad_norm": 16.25, "learning_rate": 2.113347483335441e-06, "loss": 0.911866307258606, "step": 10680 }, { "epoch": 1.9443888231546373, "grad_norm": 12.875, "learning_rate": 2.1126608031322006e-06, "loss": 1.382908582687378, "step": 10682 }, { "epoch": 1.9447528897788295, "grad_norm": 9.375, "learning_rate": 2.11197425314239e-06, "loss": 1.3330384492874146, "step": 10684 }, { "epoch": 1.9451169564030217, "grad_norm": 15.0625, "learning_rate": 2.1112878334667607e-06, "loss": 1.2274844646453857, "step": 10686 }, { "epoch": 1.945481023027214, "grad_norm": 11.5625, "learning_rate": 2.110601544206039e-06, "loss": 1.5134776830673218, "step": 10688 }, { "epoch": 1.9458450896514061, "grad_norm": 84.5, "learning_rate": 2.1099153854609377e-06, "loss": 1.4281120300292969, "step": 10690 }, { "epoch": 1.9462091562755983, "grad_norm": 16.25, "learning_rate": 2.1092293573321455e-06, "loss": 1.6313104629516602, "step": 10692 }, { "epoch": 1.9465732228997905, "grad_norm": 13.4375, "learning_rate": 2.1085434599203343e-06, "loss": 1.9710571765899658, "step": 10694 }, { "epoch": 1.946937289523983, "grad_norm": 21.0, "learning_rate": 2.1078576933261593e-06, "loss": 1.228401780128479, "step": 10696 }, { "epoch": 1.9473013561481751, "grad_norm": 21.5, "learning_rate": 2.1071720576502503e-06, "loss": 1.3314635753631592, "step": 10698 }, { "epoch": 1.9476654227723673, "grad_norm": 8.6875, "learning_rate": 2.1064865529932253e-06, "loss": 1.859405517578125, "step": 10700 }, { "epoch": 1.9480294893965597, "grad_norm": 7.4375, "learning_rate": 2.105801179455677e-06, "loss": 1.380920648574829, "step": 10702 }, { "epoch": 1.948393556020752, "grad_norm": 4.5625, "learning_rate": 2.105115937138181e-06, "loss": 0.9663572907447815, "step": 10704 }, { "epoch": 1.9487576226449441, "grad_norm": 14.6875, "learning_rate": 2.104430826141296e-06, "loss": 1.6403248310089111, "step": 10706 }, { "epoch": 1.9491216892691363, "grad_norm": 8.625, "learning_rate": 2.1037458465655562e-06, "loss": 1.5831127166748047, "step": 10708 }, { "epoch": 1.9494857558933285, "grad_norm": 16.0, "learning_rate": 2.103060998511483e-06, "loss": 1.4402159452438354, "step": 10710 }, { "epoch": 1.9498498225175207, "grad_norm": 17.875, "learning_rate": 2.102376282079572e-06, "loss": 1.1778827905654907, "step": 10712 }, { "epoch": 1.950213889141713, "grad_norm": 20.0, "learning_rate": 2.1016916973703033e-06, "loss": 0.9038905501365662, "step": 10714 }, { "epoch": 1.950577955765905, "grad_norm": 9.5, "learning_rate": 2.1010072444841394e-06, "loss": 1.0613863468170166, "step": 10716 }, { "epoch": 1.9509420223900973, "grad_norm": 4.0625, "learning_rate": 2.100322923521517e-06, "loss": 0.9329920411109924, "step": 10718 }, { "epoch": 1.9513060890142895, "grad_norm": 23.5, "learning_rate": 2.099638734582862e-06, "loss": 1.1665724515914917, "step": 10720 }, { "epoch": 1.951670155638482, "grad_norm": 11.0, "learning_rate": 2.098954677768572e-06, "loss": 1.405213713645935, "step": 10722 }, { "epoch": 1.952034222262674, "grad_norm": 11.375, "learning_rate": 2.098270753179031e-06, "loss": 1.4372384548187256, "step": 10724 }, { "epoch": 1.9523982888868663, "grad_norm": 6.5625, "learning_rate": 2.097586960914604e-06, "loss": 1.3001532554626465, "step": 10726 }, { "epoch": 1.9527623555110585, "grad_norm": 18.375, "learning_rate": 2.096903301075632e-06, "loss": 1.5140721797943115, "step": 10728 }, { "epoch": 1.953126422135251, "grad_norm": 16.625, "learning_rate": 2.0962197737624423e-06, "loss": 2.1442830562591553, "step": 10730 }, { "epoch": 1.953490488759443, "grad_norm": 13.375, "learning_rate": 2.095536379075337e-06, "loss": 1.4570618867874146, "step": 10732 }, { "epoch": 1.9538545553836353, "grad_norm": 9.6875, "learning_rate": 2.094853117114602e-06, "loss": 1.5648422241210938, "step": 10734 }, { "epoch": 1.9542186220078275, "grad_norm": 9.75, "learning_rate": 2.0941699879805057e-06, "loss": 1.4410163164138794, "step": 10736 }, { "epoch": 1.9545826886320197, "grad_norm": 12.625, "learning_rate": 2.09348699177329e-06, "loss": 1.5370848178863525, "step": 10738 }, { "epoch": 1.9549467552562119, "grad_norm": 13.9375, "learning_rate": 2.092804128593187e-06, "loss": 1.5675652027130127, "step": 10740 }, { "epoch": 1.955310821880404, "grad_norm": 11.8125, "learning_rate": 2.092121398540401e-06, "loss": 1.2988542318344116, "step": 10742 }, { "epoch": 1.9556748885045963, "grad_norm": 9.75, "learning_rate": 2.091438801715119e-06, "loss": 0.9742361307144165, "step": 10744 }, { "epoch": 1.9560389551287884, "grad_norm": 9.6875, "learning_rate": 2.0907563382175127e-06, "loss": 1.7126858234405518, "step": 10746 }, { "epoch": 1.9564030217529806, "grad_norm": 28.75, "learning_rate": 2.090074008147727e-06, "loss": 1.7257542610168457, "step": 10748 }, { "epoch": 1.956767088377173, "grad_norm": 23.25, "learning_rate": 2.0893918116058953e-06, "loss": 1.5862066745758057, "step": 10750 }, { "epoch": 1.9571311550013653, "grad_norm": 16.625, "learning_rate": 2.0887097486921235e-06, "loss": 1.7677816152572632, "step": 10752 }, { "epoch": 1.9574952216255574, "grad_norm": 15.375, "learning_rate": 2.0880278195065024e-06, "loss": 1.473554253578186, "step": 10754 }, { "epoch": 1.9578592882497499, "grad_norm": 8.3125, "learning_rate": 2.087346024149104e-06, "loss": 1.0296990871429443, "step": 10756 }, { "epoch": 1.958223354873942, "grad_norm": 13.625, "learning_rate": 2.0866643627199767e-06, "loss": 1.5855239629745483, "step": 10758 }, { "epoch": 1.9585874214981343, "grad_norm": 11.5, "learning_rate": 2.0859828353191545e-06, "loss": 1.4761435985565186, "step": 10760 }, { "epoch": 1.9589514881223264, "grad_norm": 8.3125, "learning_rate": 2.0853014420466462e-06, "loss": 1.4762241840362549, "step": 10762 }, { "epoch": 1.9593155547465186, "grad_norm": 6.53125, "learning_rate": 2.0846201830024437e-06, "loss": 1.0595502853393555, "step": 10764 }, { "epoch": 1.9596796213707108, "grad_norm": 8.375, "learning_rate": 2.0839390582865204e-06, "loss": 1.0314304828643799, "step": 10766 }, { "epoch": 1.960043687994903, "grad_norm": 26.75, "learning_rate": 2.083258067998827e-06, "loss": 1.520331621170044, "step": 10768 }, { "epoch": 1.9604077546190952, "grad_norm": 25.125, "learning_rate": 2.0825772122392984e-06, "loss": 1.411595106124878, "step": 10770 }, { "epoch": 1.9607718212432874, "grad_norm": 14.875, "learning_rate": 2.0818964911078442e-06, "loss": 1.3655226230621338, "step": 10772 }, { "epoch": 1.9611358878674796, "grad_norm": 12.375, "learning_rate": 2.0812159047043593e-06, "loss": 1.4935168027877808, "step": 10774 }, { "epoch": 1.961499954491672, "grad_norm": 9.75, "learning_rate": 2.0805354531287185e-06, "loss": 1.5060609579086304, "step": 10776 }, { "epoch": 1.9618640211158642, "grad_norm": 6.78125, "learning_rate": 2.0798551364807717e-06, "loss": 1.092186689376831, "step": 10778 }, { "epoch": 1.9622280877400564, "grad_norm": 56.25, "learning_rate": 2.0791749548603567e-06, "loss": 1.1833786964416504, "step": 10780 }, { "epoch": 1.9625921543642486, "grad_norm": 3.875, "learning_rate": 2.078494908367284e-06, "loss": 1.3987946510314941, "step": 10782 }, { "epoch": 1.962956220988441, "grad_norm": 13.0625, "learning_rate": 2.0778149971013483e-06, "loss": 0.859051525592804, "step": 10784 }, { "epoch": 1.9633202876126332, "grad_norm": 20.125, "learning_rate": 2.0771352211623264e-06, "loss": 1.0024619102478027, "step": 10786 }, { "epoch": 1.9636843542368254, "grad_norm": 15.5, "learning_rate": 2.0764555806499688e-06, "loss": 1.5861179828643799, "step": 10788 }, { "epoch": 1.9640484208610176, "grad_norm": 21.375, "learning_rate": 2.075776075664013e-06, "loss": 1.8818206787109375, "step": 10790 }, { "epoch": 1.9644124874852098, "grad_norm": 7.03125, "learning_rate": 2.075096706304173e-06, "loss": 0.9894894361495972, "step": 10792 }, { "epoch": 1.964776554109402, "grad_norm": 10.4375, "learning_rate": 2.0744174726701414e-06, "loss": 1.0505038499832153, "step": 10794 }, { "epoch": 1.9651406207335942, "grad_norm": 6.0625, "learning_rate": 2.0737383748615962e-06, "loss": 1.4778542518615723, "step": 10796 }, { "epoch": 1.9655046873577864, "grad_norm": 22.0, "learning_rate": 2.0730594129781895e-06, "loss": 1.1886543035507202, "step": 10798 }, { "epoch": 1.9658687539819786, "grad_norm": 10.0, "learning_rate": 2.072380587119559e-06, "loss": 1.3207370042800903, "step": 10800 }, { "epoch": 1.9662328206061708, "grad_norm": 30.625, "learning_rate": 2.0717018973853166e-06, "loss": 1.598076343536377, "step": 10802 }, { "epoch": 1.9665968872303632, "grad_norm": 10.75, "learning_rate": 2.0710233438750585e-06, "loss": 1.732502818107605, "step": 10804 }, { "epoch": 1.9669609538545554, "grad_norm": 10.0625, "learning_rate": 2.0703449266883613e-06, "loss": 1.1132270097732544, "step": 10806 }, { "epoch": 1.9673250204787476, "grad_norm": 14.0, "learning_rate": 2.0696666459247773e-06, "loss": 1.4258068799972534, "step": 10808 }, { "epoch": 1.96768908710294, "grad_norm": 15.125, "learning_rate": 2.0689885016838437e-06, "loss": 1.3199408054351807, "step": 10810 }, { "epoch": 1.9680531537271322, "grad_norm": 19.625, "learning_rate": 2.068310494065074e-06, "loss": 1.5398144721984863, "step": 10812 }, { "epoch": 1.9684172203513244, "grad_norm": 10.25, "learning_rate": 2.067632623167962e-06, "loss": 1.44391667842865, "step": 10814 }, { "epoch": 1.9687812869755166, "grad_norm": 18.5, "learning_rate": 2.0669548890919865e-06, "loss": 1.193953275680542, "step": 10816 }, { "epoch": 1.9691453535997088, "grad_norm": 14.8125, "learning_rate": 2.0662772919365977e-06, "loss": 1.752061367034912, "step": 10818 }, { "epoch": 1.969509420223901, "grad_norm": 22.0, "learning_rate": 2.065599831801234e-06, "loss": 1.4170674085617065, "step": 10820 }, { "epoch": 1.9698734868480932, "grad_norm": 6.6875, "learning_rate": 2.0649225087853074e-06, "loss": 1.3031843900680542, "step": 10822 }, { "epoch": 1.9702375534722854, "grad_norm": 5.96875, "learning_rate": 2.064245322988212e-06, "loss": 1.3610680103302002, "step": 10824 }, { "epoch": 1.9706016200964775, "grad_norm": 14.1875, "learning_rate": 2.063568274509325e-06, "loss": 1.5324195623397827, "step": 10826 }, { "epoch": 1.9709656867206697, "grad_norm": 27.0, "learning_rate": 2.0628913634479973e-06, "loss": 1.9046146869659424, "step": 10828 }, { "epoch": 1.9713297533448622, "grad_norm": 190.0, "learning_rate": 2.0622145899035654e-06, "loss": 1.325943946838379, "step": 10830 }, { "epoch": 1.9716938199690544, "grad_norm": 15.125, "learning_rate": 2.0615379539753415e-06, "loss": 1.494063377380371, "step": 10832 }, { "epoch": 1.9720578865932465, "grad_norm": 16.5, "learning_rate": 2.0608614557626186e-06, "loss": 1.836858868598938, "step": 10834 }, { "epoch": 1.9724219532174387, "grad_norm": 28.625, "learning_rate": 2.0601850953646727e-06, "loss": 1.7695846557617188, "step": 10836 }, { "epoch": 1.9727860198416312, "grad_norm": 21.375, "learning_rate": 2.0595088728807537e-06, "loss": 1.029619812965393, "step": 10838 }, { "epoch": 1.9731500864658233, "grad_norm": 31.625, "learning_rate": 2.058832788410098e-06, "loss": 1.155238389968872, "step": 10840 }, { "epoch": 1.9735141530900155, "grad_norm": 13.75, "learning_rate": 2.058156842051915e-06, "loss": 1.5167967081069946, "step": 10842 }, { "epoch": 1.9738782197142077, "grad_norm": 5.4375, "learning_rate": 2.057481033905398e-06, "loss": 1.4010772705078125, "step": 10844 }, { "epoch": 1.9742422863384, "grad_norm": 7.28125, "learning_rate": 2.0568053640697213e-06, "loss": 1.3923753499984741, "step": 10846 }, { "epoch": 1.9746063529625921, "grad_norm": 4.25, "learning_rate": 2.0561298326440334e-06, "loss": 1.2189128398895264, "step": 10848 }, { "epoch": 1.9749704195867843, "grad_norm": 5.03125, "learning_rate": 2.055454439727469e-06, "loss": 1.0579004287719727, "step": 10850 }, { "epoch": 1.9753344862109765, "grad_norm": 7.71875, "learning_rate": 2.0547791854191365e-06, "loss": 1.241098403930664, "step": 10852 }, { "epoch": 1.9756985528351687, "grad_norm": 15.25, "learning_rate": 2.0541040698181273e-06, "loss": 1.2325248718261719, "step": 10854 }, { "epoch": 1.976062619459361, "grad_norm": 9.8125, "learning_rate": 2.053429093023514e-06, "loss": 1.075623631477356, "step": 10856 }, { "epoch": 1.9764266860835533, "grad_norm": 15.75, "learning_rate": 2.052754255134344e-06, "loss": 1.4465932846069336, "step": 10858 }, { "epoch": 1.9767907527077455, "grad_norm": 12.0, "learning_rate": 2.052079556249649e-06, "loss": 1.5425323247909546, "step": 10860 }, { "epoch": 1.9771548193319377, "grad_norm": 34.5, "learning_rate": 2.0514049964684375e-06, "loss": 1.5979787111282349, "step": 10862 }, { "epoch": 1.9775188859561301, "grad_norm": 5.40625, "learning_rate": 2.0507305758896973e-06, "loss": 1.4477760791778564, "step": 10864 }, { "epoch": 1.9778829525803223, "grad_norm": 21.0, "learning_rate": 2.0500562946124e-06, "loss": 1.1385356187820435, "step": 10866 }, { "epoch": 1.9782470192045145, "grad_norm": 24.375, "learning_rate": 2.0493821527354896e-06, "loss": 2.09773850440979, "step": 10868 }, { "epoch": 1.9786110858287067, "grad_norm": 16.125, "learning_rate": 2.0487081503578978e-06, "loss": 1.9110240936279297, "step": 10870 }, { "epoch": 1.978975152452899, "grad_norm": 16.625, "learning_rate": 2.0480342875785283e-06, "loss": 1.2511754035949707, "step": 10872 }, { "epoch": 1.979339219077091, "grad_norm": 29.0, "learning_rate": 2.0473605644962687e-06, "loss": 0.9926354885101318, "step": 10874 }, { "epoch": 1.9797032857012833, "grad_norm": 19.5, "learning_rate": 2.046686981209987e-06, "loss": 1.4487391710281372, "step": 10876 }, { "epoch": 1.9800673523254755, "grad_norm": 93.0, "learning_rate": 2.0460135378185263e-06, "loss": 1.4361932277679443, "step": 10878 }, { "epoch": 1.9804314189496677, "grad_norm": 13.5625, "learning_rate": 2.045340234420714e-06, "loss": 1.3014165163040161, "step": 10880 }, { "epoch": 1.9807954855738599, "grad_norm": 14.25, "learning_rate": 2.044667071115352e-06, "loss": 0.6043608784675598, "step": 10882 }, { "epoch": 1.9811595521980523, "grad_norm": 5.1875, "learning_rate": 2.0439940480012255e-06, "loss": 0.8798382878303528, "step": 10884 }, { "epoch": 1.9815236188222445, "grad_norm": 6.9375, "learning_rate": 2.0433211651770998e-06, "loss": 1.5133492946624756, "step": 10886 }, { "epoch": 1.9818876854464367, "grad_norm": 5.65625, "learning_rate": 2.0426484227417144e-06, "loss": 1.0015044212341309, "step": 10888 }, { "epoch": 1.9822517520706289, "grad_norm": 10.625, "learning_rate": 2.0419758207937944e-06, "loss": 1.1347880363464355, "step": 10890 }, { "epoch": 1.9826158186948213, "grad_norm": 11.25, "learning_rate": 2.0413033594320396e-06, "loss": 1.4811532497406006, "step": 10892 }, { "epoch": 1.9829798853190135, "grad_norm": 9.375, "learning_rate": 2.040631038755131e-06, "loss": 1.5913841724395752, "step": 10894 }, { "epoch": 1.9833439519432057, "grad_norm": 7.3125, "learning_rate": 2.0399588588617307e-06, "loss": 1.0283632278442383, "step": 10896 }, { "epoch": 1.9837080185673979, "grad_norm": 9.1875, "learning_rate": 2.0392868198504755e-06, "loss": 1.5134652853012085, "step": 10898 }, { "epoch": 1.98407208519159, "grad_norm": 17.625, "learning_rate": 2.038614921819988e-06, "loss": 1.526085376739502, "step": 10900 }, { "epoch": 1.9844361518157823, "grad_norm": 20.0, "learning_rate": 2.0379431648688634e-06, "loss": 1.7380069494247437, "step": 10902 }, { "epoch": 1.9848002184399745, "grad_norm": 14.8125, "learning_rate": 2.03727154909568e-06, "loss": 1.3333088159561157, "step": 10904 }, { "epoch": 1.9851642850641666, "grad_norm": 12.9375, "learning_rate": 2.0366000745989965e-06, "loss": 1.1788612604141235, "step": 10906 }, { "epoch": 1.9855283516883588, "grad_norm": 11.75, "learning_rate": 2.035928741477346e-06, "loss": 1.420978307723999, "step": 10908 }, { "epoch": 1.985892418312551, "grad_norm": 10.0625, "learning_rate": 2.035257549829248e-06, "loss": 1.2888288497924805, "step": 10910 }, { "epoch": 1.9862564849367434, "grad_norm": 19.5, "learning_rate": 2.0345864997531934e-06, "loss": 0.694785475730896, "step": 10912 }, { "epoch": 1.9866205515609356, "grad_norm": 19.875, "learning_rate": 2.0339155913476567e-06, "loss": 0.4282897710800171, "step": 10914 }, { "epoch": 1.9869846181851278, "grad_norm": 17.0, "learning_rate": 2.0332448247110937e-06, "loss": 1.7610716819763184, "step": 10916 }, { "epoch": 1.9873486848093203, "grad_norm": 10.5625, "learning_rate": 2.0325741999419328e-06, "loss": 1.1312470436096191, "step": 10918 }, { "epoch": 1.9877127514335124, "grad_norm": 8.375, "learning_rate": 2.0319037171385892e-06, "loss": 1.420812726020813, "step": 10920 }, { "epoch": 1.9880768180577046, "grad_norm": 16.875, "learning_rate": 2.0312333763994507e-06, "loss": 1.3763034343719482, "step": 10922 }, { "epoch": 1.9884408846818968, "grad_norm": 67.0, "learning_rate": 2.0305631778228877e-06, "loss": 1.2035367488861084, "step": 10924 }, { "epoch": 1.988804951306089, "grad_norm": 12.75, "learning_rate": 2.0298931215072514e-06, "loss": 1.589197039604187, "step": 10926 }, { "epoch": 1.9891690179302812, "grad_norm": 7.65625, "learning_rate": 2.0292232075508658e-06, "loss": 1.0951956510543823, "step": 10928 }, { "epoch": 1.9895330845544734, "grad_norm": 14.8125, "learning_rate": 2.028553436052042e-06, "loss": 1.479056477546692, "step": 10930 }, { "epoch": 1.9898971511786656, "grad_norm": 5.875, "learning_rate": 2.027883807109064e-06, "loss": 1.2826660871505737, "step": 10932 }, { "epoch": 1.9902612178028578, "grad_norm": 11.125, "learning_rate": 2.0272143208201965e-06, "loss": 1.260714054107666, "step": 10934 }, { "epoch": 1.99062528442705, "grad_norm": 8.875, "learning_rate": 2.026544977283687e-06, "loss": 0.886364221572876, "step": 10936 }, { "epoch": 1.9909893510512424, "grad_norm": 10.5, "learning_rate": 2.0258757765977556e-06, "loss": 1.4551880359649658, "step": 10938 }, { "epoch": 1.9913534176754346, "grad_norm": 9.3125, "learning_rate": 2.025206718860607e-06, "loss": 1.423505187034607, "step": 10940 }, { "epoch": 1.9917174842996268, "grad_norm": 6.78125, "learning_rate": 2.0245378041704224e-06, "loss": 1.3690451383590698, "step": 10942 }, { "epoch": 1.9920815509238192, "grad_norm": 7.84375, "learning_rate": 2.02386903262536e-06, "loss": 1.38144850730896, "step": 10944 }, { "epoch": 1.9924456175480114, "grad_norm": 8.0, "learning_rate": 2.023200404323563e-06, "loss": 1.5066239833831787, "step": 10946 }, { "epoch": 1.9928096841722036, "grad_norm": 18.75, "learning_rate": 2.0225319193631467e-06, "loss": 1.362168550491333, "step": 10948 }, { "epoch": 1.9931737507963958, "grad_norm": 10.4375, "learning_rate": 2.021863577842211e-06, "loss": 1.4317498207092285, "step": 10950 }, { "epoch": 1.993537817420588, "grad_norm": 16.75, "learning_rate": 2.0211953798588305e-06, "loss": 1.4061001539230347, "step": 10952 }, { "epoch": 1.9939018840447802, "grad_norm": 18.125, "learning_rate": 2.0205273255110606e-06, "loss": 1.9899253845214844, "step": 10954 }, { "epoch": 1.9942659506689724, "grad_norm": 10.125, "learning_rate": 2.019859414896938e-06, "loss": 1.497283697128296, "step": 10956 }, { "epoch": 1.9946300172931646, "grad_norm": 12.0, "learning_rate": 2.0191916481144725e-06, "loss": 1.3275463581085205, "step": 10958 }, { "epoch": 1.9949940839173568, "grad_norm": 10.5625, "learning_rate": 2.018524025261659e-06, "loss": 1.2029767036437988, "step": 10960 }, { "epoch": 1.995358150541549, "grad_norm": 3.21875, "learning_rate": 2.0178565464364667e-06, "loss": 1.2263057231903076, "step": 10962 }, { "epoch": 1.9957222171657414, "grad_norm": 9.1875, "learning_rate": 2.0171892117368453e-06, "loss": 1.4551470279693604, "step": 10964 }, { "epoch": 1.9960862837899336, "grad_norm": 12.125, "learning_rate": 2.016522021260725e-06, "loss": 1.3572474718093872, "step": 10966 }, { "epoch": 1.9964503504141258, "grad_norm": 11.0625, "learning_rate": 2.0158549751060116e-06, "loss": 0.8097766637802124, "step": 10968 }, { "epoch": 1.996814417038318, "grad_norm": 8.875, "learning_rate": 2.0151880733705935e-06, "loss": 1.4493364095687866, "step": 10970 }, { "epoch": 1.9971784836625104, "grad_norm": 6.625, "learning_rate": 2.0145213161523336e-06, "loss": 1.2721589803695679, "step": 10972 }, { "epoch": 1.9975425502867026, "grad_norm": 8.875, "learning_rate": 2.013854703549076e-06, "loss": 1.225865364074707, "step": 10974 }, { "epoch": 1.9979066169108948, "grad_norm": 12.0, "learning_rate": 2.013188235658646e-06, "loss": 1.4053356647491455, "step": 10976 }, { "epoch": 1.998270683535087, "grad_norm": 24.625, "learning_rate": 2.0125219125788416e-06, "loss": 1.3101599216461182, "step": 10978 }, { "epoch": 1.9986347501592792, "grad_norm": 8.0, "learning_rate": 2.011855734407446e-06, "loss": 1.398789405822754, "step": 10980 }, { "epoch": 1.9989988167834714, "grad_norm": 10.0, "learning_rate": 2.011189701242216e-06, "loss": 1.1941096782684326, "step": 10982 }, { "epoch": 1.9993628834076635, "grad_norm": 15.1875, "learning_rate": 2.010523813180889e-06, "loss": 1.9056274890899658, "step": 10984 }, { "epoch": 1.9997269500318557, "grad_norm": 16.0, "learning_rate": 2.0098580703211845e-06, "loss": 1.604621410369873, "step": 10986 }, { "epoch": 2.0, "grad_norm": 20.375, "learning_rate": 2.0091924727607935e-06, "loss": 1.499759316444397, "step": 10988 }, { "epoch": 2.000364066624192, "grad_norm": 2.625, "learning_rate": 2.008527020597394e-06, "loss": 1.3992056846618652, "step": 10990 }, { "epoch": 2.0007281332483844, "grad_norm": 44.0, "learning_rate": 2.0078617139286346e-06, "loss": 0.9406244158744812, "step": 10992 }, { "epoch": 2.0010921998725766, "grad_norm": 9.25, "learning_rate": 2.0071965528521476e-06, "loss": 1.4997491836547852, "step": 10994 }, { "epoch": 2.0014562664967688, "grad_norm": 5.40625, "learning_rate": 2.0065315374655443e-06, "loss": 1.037550449371338, "step": 10996 }, { "epoch": 2.001820333120961, "grad_norm": 17.875, "learning_rate": 2.00586666786641e-06, "loss": 1.4874814748764038, "step": 10998 }, { "epoch": 2.002184399745153, "grad_norm": 19.0, "learning_rate": 2.0052019441523153e-06, "loss": 0.333152174949646, "step": 11000 }, { "epoch": 2.002548466369346, "grad_norm": 7.625, "learning_rate": 2.0045373664208024e-06, "loss": 1.3715388774871826, "step": 11002 }, { "epoch": 2.002912532993538, "grad_norm": 6.15625, "learning_rate": 2.0038729347693963e-06, "loss": 1.313523769378662, "step": 11004 }, { "epoch": 2.00327659961773, "grad_norm": 10.75, "learning_rate": 2.0032086492956014e-06, "loss": 1.3113813400268555, "step": 11006 }, { "epoch": 2.0036406662419224, "grad_norm": 21.375, "learning_rate": 2.0025445100968965e-06, "loss": 1.8938591480255127, "step": 11008 }, { "epoch": 2.0040047328661146, "grad_norm": 75.5, "learning_rate": 2.0018805172707437e-06, "loss": 1.2183549404144287, "step": 11010 }, { "epoch": 2.0043687994903068, "grad_norm": 13.3125, "learning_rate": 2.0012166709145793e-06, "loss": 1.4318671226501465, "step": 11012 }, { "epoch": 2.004732866114499, "grad_norm": 11.625, "learning_rate": 2.0005529711258197e-06, "loss": 1.3979640007019043, "step": 11014 }, { "epoch": 2.005096932738691, "grad_norm": 13.4375, "learning_rate": 1.9998894180018627e-06, "loss": 1.4701021909713745, "step": 11016 }, { "epoch": 2.0054609993628834, "grad_norm": 11.5, "learning_rate": 1.999226011640079e-06, "loss": 1.5419895648956299, "step": 11018 }, { "epoch": 2.0058250659870756, "grad_norm": 10.375, "learning_rate": 1.9985627521378243e-06, "loss": 1.3981966972351074, "step": 11020 }, { "epoch": 2.0061891326112677, "grad_norm": 36.5, "learning_rate": 1.997899639592426e-06, "loss": 1.8604216575622559, "step": 11022 }, { "epoch": 2.00655319923546, "grad_norm": 4.625, "learning_rate": 1.9972366741011937e-06, "loss": 1.0971505641937256, "step": 11024 }, { "epoch": 2.006917265859652, "grad_norm": 8.5625, "learning_rate": 1.9965738557614176e-06, "loss": 1.357130765914917, "step": 11026 }, { "epoch": 2.0072813324838443, "grad_norm": 11.375, "learning_rate": 1.9959111846703594e-06, "loss": 1.4518401622772217, "step": 11028 }, { "epoch": 2.007645399108037, "grad_norm": 7.65625, "learning_rate": 1.9952486609252678e-06, "loss": 1.5343501567840576, "step": 11030 }, { "epoch": 2.008009465732229, "grad_norm": 7.90625, "learning_rate": 1.994586284623362e-06, "loss": 1.4651836156845093, "step": 11032 }, { "epoch": 2.0083735323564214, "grad_norm": 23.75, "learning_rate": 1.9939240558618444e-06, "loss": 1.3329646587371826, "step": 11034 }, { "epoch": 2.0087375989806135, "grad_norm": 9.3125, "learning_rate": 1.9932619747378953e-06, "loss": 1.3475887775421143, "step": 11036 }, { "epoch": 2.0091016656048057, "grad_norm": 18.375, "learning_rate": 1.99260004134867e-06, "loss": 1.6197303533554077, "step": 11038 }, { "epoch": 2.009465732228998, "grad_norm": 8.6875, "learning_rate": 1.9919382557913074e-06, "loss": 1.1972200870513916, "step": 11040 }, { "epoch": 2.00982979885319, "grad_norm": 9.1875, "learning_rate": 1.9912766181629196e-06, "loss": 1.1718153953552246, "step": 11042 }, { "epoch": 2.0101938654773823, "grad_norm": 4.21875, "learning_rate": 1.9906151285605993e-06, "loss": 0.9909963607788086, "step": 11044 }, { "epoch": 2.0105579321015745, "grad_norm": 11.1875, "learning_rate": 1.98995378708142e-06, "loss": 1.668025255203247, "step": 11046 }, { "epoch": 2.0109219987257667, "grad_norm": 5.96875, "learning_rate": 1.9892925938224274e-06, "loss": 1.3886585235595703, "step": 11048 }, { "epoch": 2.011286065349959, "grad_norm": 9.9375, "learning_rate": 1.9886315488806525e-06, "loss": 1.6192712783813477, "step": 11050 }, { "epoch": 2.011650131974151, "grad_norm": 13.875, "learning_rate": 1.9879706523530977e-06, "loss": 1.3619799613952637, "step": 11052 }, { "epoch": 2.0120141985983433, "grad_norm": 11.4375, "learning_rate": 1.987309904336748e-06, "loss": 1.4744445085525513, "step": 11054 }, { "epoch": 2.012378265222536, "grad_norm": 5.15625, "learning_rate": 1.9866493049285674e-06, "loss": 1.1223396062850952, "step": 11056 }, { "epoch": 2.012742331846728, "grad_norm": 4.875, "learning_rate": 1.985988854225492e-06, "loss": 0.7826835513114929, "step": 11058 }, { "epoch": 2.0131063984709203, "grad_norm": 12.5, "learning_rate": 1.985328552324446e-06, "loss": 1.536156415939331, "step": 11060 }, { "epoch": 2.0134704650951125, "grad_norm": 9.125, "learning_rate": 1.984668399322321e-06, "loss": 1.4070651531219482, "step": 11062 }, { "epoch": 2.0138345317193047, "grad_norm": 7.09375, "learning_rate": 1.984008395315993e-06, "loss": 1.3552974462509155, "step": 11064 }, { "epoch": 2.014198598343497, "grad_norm": 22.375, "learning_rate": 1.983348540402317e-06, "loss": 1.4086244106292725, "step": 11066 }, { "epoch": 2.014562664967689, "grad_norm": 8.375, "learning_rate": 1.982688834678121e-06, "loss": 1.1711201667785645, "step": 11068 }, { "epoch": 2.0149267315918813, "grad_norm": 10.1875, "learning_rate": 1.9820292782402176e-06, "loss": 1.5485522747039795, "step": 11070 }, { "epoch": 2.0152907982160735, "grad_norm": 3.546875, "learning_rate": 1.9813698711853912e-06, "loss": 1.1161587238311768, "step": 11072 }, { "epoch": 2.0156548648402657, "grad_norm": 10.75, "learning_rate": 1.980710613610407e-06, "loss": 1.3998215198516846, "step": 11074 }, { "epoch": 2.016018931464458, "grad_norm": 24.75, "learning_rate": 1.9800515056120114e-06, "loss": 1.5405805110931396, "step": 11076 }, { "epoch": 2.01638299808865, "grad_norm": 19.375, "learning_rate": 1.979392547286922e-06, "loss": 1.5268285274505615, "step": 11078 }, { "epoch": 2.0167470647128423, "grad_norm": 8.3125, "learning_rate": 1.978733738731842e-06, "loss": 1.4839603900909424, "step": 11080 }, { "epoch": 2.0171111313370345, "grad_norm": 10.0, "learning_rate": 1.978075080043446e-06, "loss": 1.6420220136642456, "step": 11082 }, { "epoch": 2.017475197961227, "grad_norm": 3.421875, "learning_rate": 1.97741657131839e-06, "loss": 0.9530258178710938, "step": 11084 }, { "epoch": 2.0178392645854193, "grad_norm": 22.5, "learning_rate": 1.976758212653309e-06, "loss": 2.084479570388794, "step": 11086 }, { "epoch": 2.0182033312096115, "grad_norm": 8.125, "learning_rate": 1.976100004144812e-06, "loss": 1.3172053098678589, "step": 11088 }, { "epoch": 2.0185673978338037, "grad_norm": 10.25, "learning_rate": 1.9754419458894924e-06, "loss": 1.398364543914795, "step": 11090 }, { "epoch": 2.018931464457996, "grad_norm": 21.625, "learning_rate": 1.974784037983913e-06, "loss": 1.422690987586975, "step": 11092 }, { "epoch": 2.019295531082188, "grad_norm": 21.875, "learning_rate": 1.974126280524621e-06, "loss": 1.3765701055526733, "step": 11094 }, { "epoch": 2.0196595977063803, "grad_norm": 24.0, "learning_rate": 1.9734686736081417e-06, "loss": 1.33046555519104, "step": 11096 }, { "epoch": 2.0200236643305725, "grad_norm": 21.25, "learning_rate": 1.972811217330972e-06, "loss": 1.3694443702697754, "step": 11098 }, { "epoch": 2.0203877309547646, "grad_norm": 9.0625, "learning_rate": 1.972153911789596e-06, "loss": 1.1963346004486084, "step": 11100 }, { "epoch": 2.020751797578957, "grad_norm": 19.75, "learning_rate": 1.9714967570804665e-06, "loss": 1.5416532754898071, "step": 11102 }, { "epoch": 2.021115864203149, "grad_norm": 14.875, "learning_rate": 1.9708397533000186e-06, "loss": 1.4636294841766357, "step": 11104 }, { "epoch": 2.0214799308273412, "grad_norm": 17.125, "learning_rate": 1.970182900544668e-06, "loss": 1.8029705286026, "step": 11106 }, { "epoch": 2.0218439974515334, "grad_norm": 7.3125, "learning_rate": 1.9695261989108017e-06, "loss": 1.0296516418457031, "step": 11108 }, { "epoch": 2.022208064075726, "grad_norm": 5.0, "learning_rate": 1.9688696484947912e-06, "loss": 1.0822944641113281, "step": 11110 }, { "epoch": 2.0225721306999183, "grad_norm": 6.0, "learning_rate": 1.9682132493929802e-06, "loss": 1.46492338180542, "step": 11112 }, { "epoch": 2.0229361973241105, "grad_norm": 8.0, "learning_rate": 1.9675570017016925e-06, "loss": 1.1143368482589722, "step": 11114 }, { "epoch": 2.0233002639483026, "grad_norm": 27.375, "learning_rate": 1.9669009055172326e-06, "loss": 1.5624620914459229, "step": 11116 }, { "epoch": 2.023664330572495, "grad_norm": 14.0625, "learning_rate": 1.966244960935876e-06, "loss": 1.489018440246582, "step": 11118 }, { "epoch": 2.024028397196687, "grad_norm": 8.6875, "learning_rate": 1.965589168053884e-06, "loss": 1.501712441444397, "step": 11120 }, { "epoch": 2.0243924638208792, "grad_norm": 39.5, "learning_rate": 1.964933526967488e-06, "loss": 1.4440007209777832, "step": 11122 }, { "epoch": 2.0247565304450714, "grad_norm": 8.4375, "learning_rate": 1.964278037772902e-06, "loss": 1.3837101459503174, "step": 11124 }, { "epoch": 2.0251205970692636, "grad_norm": 11.125, "learning_rate": 1.9636227005663177e-06, "loss": 1.5050194263458252, "step": 11126 }, { "epoch": 2.025484663693456, "grad_norm": 21.375, "learning_rate": 1.962967515443901e-06, "loss": 1.394757628440857, "step": 11128 }, { "epoch": 2.025848730317648, "grad_norm": 15.625, "learning_rate": 1.9623124825017993e-06, "loss": 0.9417558908462524, "step": 11130 }, { "epoch": 2.02621279694184, "grad_norm": 9.25, "learning_rate": 1.9616576018361355e-06, "loss": 1.6117267608642578, "step": 11132 }, { "epoch": 2.0265768635660324, "grad_norm": 21.0, "learning_rate": 1.9610028735430096e-06, "loss": 0.8419802784919739, "step": 11134 }, { "epoch": 2.026940930190225, "grad_norm": 7.40625, "learning_rate": 1.9603482977185028e-06, "loss": 0.8970146179199219, "step": 11136 }, { "epoch": 2.0273049968144172, "grad_norm": 10.0625, "learning_rate": 1.9596938744586684e-06, "loss": 0.9122978448867798, "step": 11138 }, { "epoch": 2.0276690634386094, "grad_norm": 18.25, "learning_rate": 1.959039603859543e-06, "loss": 1.7657095193862915, "step": 11140 }, { "epoch": 2.0280331300628016, "grad_norm": 8.75, "learning_rate": 1.958385486017137e-06, "loss": 1.3639525175094604, "step": 11142 }, { "epoch": 2.028397196686994, "grad_norm": 20.125, "learning_rate": 1.957731521027439e-06, "loss": 1.5718979835510254, "step": 11144 }, { "epoch": 2.028761263311186, "grad_norm": 10.8125, "learning_rate": 1.957077708986417e-06, "loss": 1.4763069152832031, "step": 11146 }, { "epoch": 2.029125329935378, "grad_norm": 12.125, "learning_rate": 1.956424049990014e-06, "loss": 1.5811841487884521, "step": 11148 }, { "epoch": 2.0294893965595704, "grad_norm": 8.8125, "learning_rate": 1.9557705441341534e-06, "loss": 1.4419233798980713, "step": 11150 }, { "epoch": 2.0298534631837626, "grad_norm": 4.65625, "learning_rate": 1.9551171915147334e-06, "loss": 1.1227138042449951, "step": 11152 }, { "epoch": 2.030217529807955, "grad_norm": 14.125, "learning_rate": 1.9544639922276294e-06, "loss": 1.5154244899749756, "step": 11154 }, { "epoch": 2.030581596432147, "grad_norm": 28.125, "learning_rate": 1.9538109463686994e-06, "loss": 0.6497099995613098, "step": 11156 }, { "epoch": 2.030945663056339, "grad_norm": 10.3125, "learning_rate": 1.9531580540337715e-06, "loss": 1.4992166757583618, "step": 11158 }, { "epoch": 2.0313097296805314, "grad_norm": 15.875, "learning_rate": 1.9525053153186583e-06, "loss": 1.3977303504943848, "step": 11160 }, { "epoch": 2.0316737963047236, "grad_norm": 4.84375, "learning_rate": 1.9518527303191444e-06, "loss": 1.0993247032165527, "step": 11162 }, { "epoch": 2.032037862928916, "grad_norm": 44.0, "learning_rate": 1.951200299130993e-06, "loss": 1.545391321182251, "step": 11164 }, { "epoch": 2.0324019295531084, "grad_norm": 15.875, "learning_rate": 1.95054802184995e-06, "loss": 1.387149453163147, "step": 11166 }, { "epoch": 2.0327659961773006, "grad_norm": 11.5625, "learning_rate": 1.9498958985717294e-06, "loss": 1.2326574325561523, "step": 11168 }, { "epoch": 2.0331300628014928, "grad_norm": 53.5, "learning_rate": 1.9492439293920317e-06, "loss": 1.9308810234069824, "step": 11170 }, { "epoch": 2.033494129425685, "grad_norm": 13.375, "learning_rate": 1.9485921144065282e-06, "loss": 1.0600999593734741, "step": 11172 }, { "epoch": 2.033858196049877, "grad_norm": 16.5, "learning_rate": 1.9479404537108704e-06, "loss": 1.4514191150665283, "step": 11174 }, { "epoch": 2.0342222626740694, "grad_norm": 2.515625, "learning_rate": 1.9472889474006883e-06, "loss": 1.0639142990112305, "step": 11176 }, { "epoch": 2.0345863292982616, "grad_norm": 17.25, "learning_rate": 1.946637595571586e-06, "loss": 1.6877634525299072, "step": 11178 }, { "epoch": 2.0349503959224537, "grad_norm": 9.3125, "learning_rate": 1.945986398319149e-06, "loss": 1.540773868560791, "step": 11180 }, { "epoch": 2.035314462546646, "grad_norm": 19.25, "learning_rate": 1.9453353557389357e-06, "loss": 0.2354702353477478, "step": 11182 }, { "epoch": 2.035678529170838, "grad_norm": 9.5, "learning_rate": 1.944684467926484e-06, "loss": 1.4709038734436035, "step": 11184 }, { "epoch": 2.0360425957950303, "grad_norm": 39.0, "learning_rate": 1.944033734977312e-06, "loss": 1.360809087753296, "step": 11186 }, { "epoch": 2.0364066624192225, "grad_norm": 19.25, "learning_rate": 1.9433831569869075e-06, "loss": 1.529122233390808, "step": 11188 }, { "epoch": 2.036770729043415, "grad_norm": 5.1875, "learning_rate": 1.942732734050744e-06, "loss": 0.7980729341506958, "step": 11190 }, { "epoch": 2.0371347956676074, "grad_norm": 7.4375, "learning_rate": 1.942082466264267e-06, "loss": 0.9450342059135437, "step": 11192 }, { "epoch": 2.0374988622917996, "grad_norm": 16.25, "learning_rate": 1.9414323537228995e-06, "loss": 1.3768373727798462, "step": 11194 }, { "epoch": 2.0378629289159917, "grad_norm": 5.3125, "learning_rate": 1.940782396522046e-06, "loss": 1.2740161418914795, "step": 11196 }, { "epoch": 2.038226995540184, "grad_norm": 9.3125, "learning_rate": 1.9401325947570816e-06, "loss": 1.0506833791732788, "step": 11198 }, { "epoch": 2.038591062164376, "grad_norm": 17.5, "learning_rate": 1.9394829485233645e-06, "loss": 1.5903105735778809, "step": 11200 }, { "epoch": 2.0389551287885683, "grad_norm": 4.625, "learning_rate": 1.9388334579162267e-06, "loss": 0.9030445218086243, "step": 11202 }, { "epoch": 2.0393191954127605, "grad_norm": 6.78125, "learning_rate": 1.9381841230309777e-06, "loss": 1.2412681579589844, "step": 11204 }, { "epoch": 2.0396832620369527, "grad_norm": 12.375, "learning_rate": 1.9375349439629065e-06, "loss": 1.6972997188568115, "step": 11206 }, { "epoch": 2.040047328661145, "grad_norm": 15.0, "learning_rate": 1.9368859208072755e-06, "loss": 1.527509331703186, "step": 11208 }, { "epoch": 2.040411395285337, "grad_norm": 3.828125, "learning_rate": 1.936237053659328e-06, "loss": 1.1031190156936646, "step": 11210 }, { "epoch": 2.0407754619095293, "grad_norm": 9.375, "learning_rate": 1.935588342614282e-06, "loss": 1.4453120231628418, "step": 11212 }, { "epoch": 2.0411395285337215, "grad_norm": 16.125, "learning_rate": 1.9349397877673313e-06, "loss": 1.4460902214050293, "step": 11214 }, { "epoch": 2.0415035951579137, "grad_norm": 11.875, "learning_rate": 1.934291389213653e-06, "loss": 1.529700517654419, "step": 11216 }, { "epoch": 2.0418676617821063, "grad_norm": 14.9375, "learning_rate": 1.933643147048392e-06, "loss": 1.2103297710418701, "step": 11218 }, { "epoch": 2.0422317284062985, "grad_norm": 7.875, "learning_rate": 1.9329950613666794e-06, "loss": 1.4142873287200928, "step": 11220 }, { "epoch": 2.0425957950304907, "grad_norm": 10.375, "learning_rate": 1.932347132263617e-06, "loss": 1.232865333557129, "step": 11222 }, { "epoch": 2.042959861654683, "grad_norm": 21.625, "learning_rate": 1.9316993598342846e-06, "loss": 1.9871118068695068, "step": 11224 }, { "epoch": 2.043323928278875, "grad_norm": 5.53125, "learning_rate": 1.931051744173744e-06, "loss": 0.8346189856529236, "step": 11226 }, { "epoch": 2.0436879949030673, "grad_norm": 11.1875, "learning_rate": 1.930404285377026e-06, "loss": 1.5212898254394531, "step": 11228 }, { "epoch": 2.0440520615272595, "grad_norm": 3.40625, "learning_rate": 1.9297569835391463e-06, "loss": 0.8817059993743896, "step": 11230 }, { "epoch": 2.0444161281514517, "grad_norm": 14.5625, "learning_rate": 1.9291098387550907e-06, "loss": 1.2817615270614624, "step": 11232 }, { "epoch": 2.044780194775644, "grad_norm": 50.5, "learning_rate": 1.9284628511198254e-06, "loss": 1.738089680671692, "step": 11234 }, { "epoch": 2.045144261399836, "grad_norm": 20.125, "learning_rate": 1.927816020728296e-06, "loss": 1.577939510345459, "step": 11236 }, { "epoch": 2.0455083280240283, "grad_norm": 5.0625, "learning_rate": 1.927169347675419e-06, "loss": 1.0617436170578003, "step": 11238 }, { "epoch": 2.0458723946482205, "grad_norm": 5.375, "learning_rate": 1.9265228320560934e-06, "loss": 1.1472480297088623, "step": 11240 }, { "epoch": 2.0462364612724127, "grad_norm": 13.0, "learning_rate": 1.9258764739651912e-06, "loss": 2.0814905166625977, "step": 11242 }, { "epoch": 2.0466005278966053, "grad_norm": 13.375, "learning_rate": 1.925230273497563e-06, "loss": 1.6026432514190674, "step": 11244 }, { "epoch": 2.0469645945207975, "grad_norm": 13.1875, "learning_rate": 1.924584230748038e-06, "loss": 0.7772610187530518, "step": 11246 }, { "epoch": 2.0473286611449897, "grad_norm": 11.125, "learning_rate": 1.9239383458114173e-06, "loss": 1.4164042472839355, "step": 11248 }, { "epoch": 2.047692727769182, "grad_norm": 21.0, "learning_rate": 1.9232926187824848e-06, "loss": 2.03822922706604, "step": 11250 }, { "epoch": 2.048056794393374, "grad_norm": 6.90625, "learning_rate": 1.9226470497559963e-06, "loss": 1.1361656188964844, "step": 11252 }, { "epoch": 2.0484208610175663, "grad_norm": 19.25, "learning_rate": 1.922001638826686e-06, "loss": 0.15727046132087708, "step": 11254 }, { "epoch": 2.0487849276417585, "grad_norm": 10.5, "learning_rate": 1.9213563860892687e-06, "loss": 1.4766499996185303, "step": 11256 }, { "epoch": 2.0491489942659507, "grad_norm": 6.34375, "learning_rate": 1.9207112916384287e-06, "loss": 0.9160822629928589, "step": 11258 }, { "epoch": 2.049513060890143, "grad_norm": 15.0625, "learning_rate": 1.9200663555688335e-06, "loss": 1.3961005210876465, "step": 11260 }, { "epoch": 2.049877127514335, "grad_norm": 48.25, "learning_rate": 1.919421577975124e-06, "loss": 0.7138979434967041, "step": 11262 }, { "epoch": 2.0502411941385272, "grad_norm": 3.15625, "learning_rate": 1.9187769589519174e-06, "loss": 0.7638934254646301, "step": 11264 }, { "epoch": 2.0506052607627194, "grad_norm": 9.0, "learning_rate": 1.9181324985938123e-06, "loss": 1.2059357166290283, "step": 11266 }, { "epoch": 2.0509693273869116, "grad_norm": 24.875, "learning_rate": 1.9174881969953766e-06, "loss": 0.5940987467765808, "step": 11268 }, { "epoch": 2.051333394011104, "grad_norm": 15.75, "learning_rate": 1.916844054251163e-06, "loss": 1.557391881942749, "step": 11270 }, { "epoch": 2.0516974606352965, "grad_norm": 34.0, "learning_rate": 1.916200070455694e-06, "loss": 1.5932555198669434, "step": 11272 }, { "epoch": 2.0520615272594886, "grad_norm": 25.125, "learning_rate": 1.9155562457034714e-06, "loss": 1.8712421655654907, "step": 11274 }, { "epoch": 2.052425593883681, "grad_norm": 4.34375, "learning_rate": 1.914912580088976e-06, "loss": 1.0210652351379395, "step": 11276 }, { "epoch": 2.052789660507873, "grad_norm": 28.5, "learning_rate": 1.914269073706661e-06, "loss": 2.2715117931365967, "step": 11278 }, { "epoch": 2.0531537271320652, "grad_norm": 20.625, "learning_rate": 1.913625726650961e-06, "loss": 2.012913465499878, "step": 11280 }, { "epoch": 2.0535177937562574, "grad_norm": 28.375, "learning_rate": 1.9129825390162817e-06, "loss": 1.8749830722808838, "step": 11282 }, { "epoch": 2.0538818603804496, "grad_norm": 5.90625, "learning_rate": 1.912339510897009e-06, "loss": 1.1219078302383423, "step": 11284 }, { "epoch": 2.054245927004642, "grad_norm": 9.4375, "learning_rate": 1.9116966423875067e-06, "loss": 1.5137276649475098, "step": 11286 }, { "epoch": 2.054609993628834, "grad_norm": 13.3125, "learning_rate": 1.91105393358211e-06, "loss": 1.8291330337524414, "step": 11288 }, { "epoch": 2.054974060253026, "grad_norm": 4.125, "learning_rate": 1.9104113845751372e-06, "loss": 1.0564454793930054, "step": 11290 }, { "epoch": 2.0553381268772184, "grad_norm": 12.0, "learning_rate": 1.9097689954608768e-06, "loss": 1.0356847047805786, "step": 11292 }, { "epoch": 2.0557021935014106, "grad_norm": 10.9375, "learning_rate": 1.9091267663335975e-06, "loss": 1.1149048805236816, "step": 11294 }, { "epoch": 2.056066260125603, "grad_norm": 10.9375, "learning_rate": 1.908484697287546e-06, "loss": 1.7005622386932373, "step": 11296 }, { "epoch": 2.0564303267497954, "grad_norm": 11.5625, "learning_rate": 1.90784278841694e-06, "loss": 1.4318026304244995, "step": 11298 }, { "epoch": 2.0567943933739876, "grad_norm": 41.5, "learning_rate": 1.90720103981598e-06, "loss": 0.7224030494689941, "step": 11300 }, { "epoch": 2.05715845999818, "grad_norm": 8.3125, "learning_rate": 1.9065594515788382e-06, "loss": 1.3860046863555908, "step": 11302 }, { "epoch": 2.057522526622372, "grad_norm": 11.6875, "learning_rate": 1.9059180237996646e-06, "loss": 1.1618001461029053, "step": 11304 }, { "epoch": 2.057886593246564, "grad_norm": 18.125, "learning_rate": 1.9052767565725887e-06, "loss": 2.2215492725372314, "step": 11306 }, { "epoch": 2.0582506598707564, "grad_norm": 26.0, "learning_rate": 1.9046356499917106e-06, "loss": 1.507474660873413, "step": 11308 }, { "epoch": 2.0586147264949486, "grad_norm": 14.875, "learning_rate": 1.9039947041511136e-06, "loss": 0.22918254137039185, "step": 11310 }, { "epoch": 2.058978793119141, "grad_norm": 74.5, "learning_rate": 1.903353919144851e-06, "loss": 1.789607048034668, "step": 11312 }, { "epoch": 2.059342859743333, "grad_norm": 19.75, "learning_rate": 1.9027132950669557e-06, "loss": 1.3877757787704468, "step": 11314 }, { "epoch": 2.059706926367525, "grad_norm": 16.875, "learning_rate": 1.902072832011439e-06, "loss": 1.781526803970337, "step": 11316 }, { "epoch": 2.0600709929917174, "grad_norm": 12.4375, "learning_rate": 1.9014325300722832e-06, "loss": 1.985640525817871, "step": 11318 }, { "epoch": 2.0604350596159096, "grad_norm": 13.8125, "learning_rate": 1.9007923893434533e-06, "loss": 1.6570340394973755, "step": 11320 }, { "epoch": 2.0607991262401018, "grad_norm": 18.875, "learning_rate": 1.9001524099188843e-06, "loss": 1.3645168542861938, "step": 11322 }, { "epoch": 2.061163192864294, "grad_norm": 8.0625, "learning_rate": 1.8995125918924915e-06, "loss": 1.377673625946045, "step": 11324 }, { "epoch": 2.0615272594884866, "grad_norm": 31.25, "learning_rate": 1.898872935358167e-06, "loss": 1.4514261484146118, "step": 11326 }, { "epoch": 2.061891326112679, "grad_norm": 11.25, "learning_rate": 1.8982334404097758e-06, "loss": 1.4237110614776611, "step": 11328 }, { "epoch": 2.062255392736871, "grad_norm": 4.28125, "learning_rate": 1.897594107141163e-06, "loss": 1.3030797243118286, "step": 11330 }, { "epoch": 2.062619459361063, "grad_norm": 12.3125, "learning_rate": 1.896954935646147e-06, "loss": 1.4342344999313354, "step": 11332 }, { "epoch": 2.0629835259852554, "grad_norm": 9.875, "learning_rate": 1.8963159260185233e-06, "loss": 1.4712165594100952, "step": 11334 }, { "epoch": 2.0633475926094476, "grad_norm": 19.375, "learning_rate": 1.8956770783520658e-06, "loss": 1.9412319660186768, "step": 11336 }, { "epoch": 2.0637116592336398, "grad_norm": 11.4375, "learning_rate": 1.8950383927405202e-06, "loss": 1.5182976722717285, "step": 11338 }, { "epoch": 2.064075725857832, "grad_norm": 20.25, "learning_rate": 1.894399869277614e-06, "loss": 1.6106157302856445, "step": 11340 }, { "epoch": 2.064439792482024, "grad_norm": 15.0, "learning_rate": 1.8937615080570448e-06, "loss": 1.080476999282837, "step": 11342 }, { "epoch": 2.0648038591062163, "grad_norm": 10.4375, "learning_rate": 1.893123309172492e-06, "loss": 1.4900929927825928, "step": 11344 }, { "epoch": 2.0651679257304085, "grad_norm": 17.0, "learning_rate": 1.8924852727176085e-06, "loss": 1.6501578092575073, "step": 11346 }, { "epoch": 2.0655319923546007, "grad_norm": 37.0, "learning_rate": 1.8918473987860214e-06, "loss": 1.373894453048706, "step": 11348 }, { "epoch": 2.065896058978793, "grad_norm": 7.40625, "learning_rate": 1.8912096874713392e-06, "loss": 1.5894296169281006, "step": 11350 }, { "epoch": 2.0662601256029856, "grad_norm": 5.28125, "learning_rate": 1.89057213886714e-06, "loss": 1.190136432647705, "step": 11352 }, { "epoch": 2.0666241922271777, "grad_norm": 14.1875, "learning_rate": 1.8899347530669843e-06, "loss": 1.1848104000091553, "step": 11354 }, { "epoch": 2.06698825885137, "grad_norm": 4.34375, "learning_rate": 1.8892975301644057e-06, "loss": 1.3796597719192505, "step": 11356 }, { "epoch": 2.067352325475562, "grad_norm": 3.84375, "learning_rate": 1.8886604702529113e-06, "loss": 0.8591499924659729, "step": 11358 }, { "epoch": 2.0677163920997543, "grad_norm": 9.8125, "learning_rate": 1.8880235734259911e-06, "loss": 1.543081521987915, "step": 11360 }, { "epoch": 2.0680804587239465, "grad_norm": 15.875, "learning_rate": 1.8873868397771031e-06, "loss": 1.3929495811462402, "step": 11362 }, { "epoch": 2.0684445253481387, "grad_norm": 8.625, "learning_rate": 1.8867502693996884e-06, "loss": 1.478927493095398, "step": 11364 }, { "epoch": 2.068808591972331, "grad_norm": 11.75, "learning_rate": 1.8861138623871605e-06, "loss": 1.5845457315444946, "step": 11366 }, { "epoch": 2.069172658596523, "grad_norm": 10.6875, "learning_rate": 1.885477618832908e-06, "loss": 1.8207017183303833, "step": 11368 }, { "epoch": 2.0695367252207153, "grad_norm": 18.875, "learning_rate": 1.8848415388302992e-06, "loss": 1.4906504154205322, "step": 11370 }, { "epoch": 2.0699007918449075, "grad_norm": 50.25, "learning_rate": 1.8842056224726742e-06, "loss": 1.6885826587677002, "step": 11372 }, { "epoch": 2.0702648584690997, "grad_norm": 8.875, "learning_rate": 1.8835698698533528e-06, "loss": 1.132664680480957, "step": 11374 }, { "epoch": 2.070628925093292, "grad_norm": 32.75, "learning_rate": 1.8829342810656293e-06, "loss": 0.6545571088790894, "step": 11376 }, { "epoch": 2.0709929917174845, "grad_norm": 11.8125, "learning_rate": 1.882298856202771e-06, "loss": 1.419360637664795, "step": 11378 }, { "epoch": 2.0713570583416767, "grad_norm": 7.875, "learning_rate": 1.8816635953580278e-06, "loss": 1.439836859703064, "step": 11380 }, { "epoch": 2.071721124965869, "grad_norm": 34.75, "learning_rate": 1.8810284986246185e-06, "loss": 1.1952608823776245, "step": 11382 }, { "epoch": 2.072085191590061, "grad_norm": 9.75, "learning_rate": 1.8803935660957427e-06, "loss": 1.596019983291626, "step": 11384 }, { "epoch": 2.0724492582142533, "grad_norm": 10.25, "learning_rate": 1.8797587978645743e-06, "loss": 1.488053798675537, "step": 11386 }, { "epoch": 2.0728133248384455, "grad_norm": 2.75, "learning_rate": 1.879124194024261e-06, "loss": 0.8447047472000122, "step": 11388 }, { "epoch": 2.0731773914626377, "grad_norm": 28.875, "learning_rate": 1.8784897546679314e-06, "loss": 1.9565701484680176, "step": 11390 }, { "epoch": 2.07354145808683, "grad_norm": 14.375, "learning_rate": 1.8778554798886837e-06, "loss": 1.5755465030670166, "step": 11392 }, { "epoch": 2.073905524711022, "grad_norm": 6.75, "learning_rate": 1.8772213697795972e-06, "loss": 1.4141664505004883, "step": 11394 }, { "epoch": 2.0742695913352143, "grad_norm": 6.96875, "learning_rate": 1.8765874244337254e-06, "loss": 1.6290302276611328, "step": 11396 }, { "epoch": 2.0746336579594065, "grad_norm": 10.625, "learning_rate": 1.8759536439440944e-06, "loss": 1.4229499101638794, "step": 11398 }, { "epoch": 2.0749977245835987, "grad_norm": 9.375, "learning_rate": 1.875320028403713e-06, "loss": 1.6379119157791138, "step": 11400 }, { "epoch": 2.075361791207791, "grad_norm": 4.25, "learning_rate": 1.8746865779055573e-06, "loss": 1.1094028949737549, "step": 11402 }, { "epoch": 2.075725857831983, "grad_norm": 44.75, "learning_rate": 1.874053292542587e-06, "loss": 1.3150646686553955, "step": 11404 }, { "epoch": 2.0760899244561757, "grad_norm": 21.125, "learning_rate": 1.8734201724077333e-06, "loss": 2.1382763385772705, "step": 11406 }, { "epoch": 2.076453991080368, "grad_norm": 11.375, "learning_rate": 1.8727872175939024e-06, "loss": 1.3936569690704346, "step": 11408 }, { "epoch": 2.07681805770456, "grad_norm": 12.6875, "learning_rate": 1.8721544281939808e-06, "loss": 1.3287220001220703, "step": 11410 }, { "epoch": 2.0771821243287523, "grad_norm": 10.3125, "learning_rate": 1.8715218043008243e-06, "loss": 1.4587661027908325, "step": 11412 }, { "epoch": 2.0775461909529445, "grad_norm": 11.0625, "learning_rate": 1.8708893460072708e-06, "loss": 1.1178665161132812, "step": 11414 }, { "epoch": 2.0779102575771367, "grad_norm": 9.5, "learning_rate": 1.8702570534061304e-06, "loss": 1.4152162075042725, "step": 11416 }, { "epoch": 2.078274324201329, "grad_norm": 23.875, "learning_rate": 1.8696249265901872e-06, "loss": 1.875107765197754, "step": 11418 }, { "epoch": 2.078638390825521, "grad_norm": 6.96875, "learning_rate": 1.868992965652207e-06, "loss": 1.3942734003067017, "step": 11420 }, { "epoch": 2.0790024574497132, "grad_norm": 8.9375, "learning_rate": 1.8683611706849237e-06, "loss": 1.4798282384872437, "step": 11422 }, { "epoch": 2.0793665240739054, "grad_norm": 29.625, "learning_rate": 1.8677295417810534e-06, "loss": 1.5472404956817627, "step": 11424 }, { "epoch": 2.0797305906980976, "grad_norm": 15.875, "learning_rate": 1.8670980790332848e-06, "loss": 1.4509446620941162, "step": 11426 }, { "epoch": 2.08009465732229, "grad_norm": 11.3125, "learning_rate": 1.8664667825342805e-06, "loss": 1.6181693077087402, "step": 11428 }, { "epoch": 2.080458723946482, "grad_norm": 2.640625, "learning_rate": 1.8658356523766833e-06, "loss": 1.261971116065979, "step": 11430 }, { "epoch": 2.080822790570674, "grad_norm": 18.25, "learning_rate": 1.8652046886531065e-06, "loss": 2.1572773456573486, "step": 11432 }, { "epoch": 2.081186857194867, "grad_norm": 8.9375, "learning_rate": 1.8645738914561435e-06, "loss": 1.513465404510498, "step": 11434 }, { "epoch": 2.081550923819059, "grad_norm": 21.25, "learning_rate": 1.863943260878361e-06, "loss": 0.9612942934036255, "step": 11436 }, { "epoch": 2.0819149904432512, "grad_norm": 27.125, "learning_rate": 1.8633127970122993e-06, "loss": 1.0310657024383545, "step": 11438 }, { "epoch": 2.0822790570674434, "grad_norm": 8.5, "learning_rate": 1.86268249995048e-06, "loss": 1.3813170194625854, "step": 11440 }, { "epoch": 2.0826431236916356, "grad_norm": 27.625, "learning_rate": 1.862052369785393e-06, "loss": 1.8864072561264038, "step": 11442 }, { "epoch": 2.083007190315828, "grad_norm": 9.75, "learning_rate": 1.8614224066095093e-06, "loss": 1.608127474784851, "step": 11444 }, { "epoch": 2.08337125694002, "grad_norm": 20.75, "learning_rate": 1.8607926105152744e-06, "loss": 0.6679896712303162, "step": 11446 }, { "epoch": 2.083735323564212, "grad_norm": 14.0, "learning_rate": 1.8601629815951055e-06, "loss": 1.3346567153930664, "step": 11448 }, { "epoch": 2.0840993901884044, "grad_norm": 45.75, "learning_rate": 1.8595335199414014e-06, "loss": 2.1126351356506348, "step": 11450 }, { "epoch": 2.0844634568125966, "grad_norm": 12.9375, "learning_rate": 1.8589042256465295e-06, "loss": 1.4173626899719238, "step": 11452 }, { "epoch": 2.084827523436789, "grad_norm": 136.0, "learning_rate": 1.8582750988028392e-06, "loss": 1.9854846000671387, "step": 11454 }, { "epoch": 2.085191590060981, "grad_norm": 11.4375, "learning_rate": 1.8576461395026516e-06, "loss": 1.5123332738876343, "step": 11456 }, { "epoch": 2.085555656685173, "grad_norm": 11.875, "learning_rate": 1.857017347838262e-06, "loss": 1.3140833377838135, "step": 11458 }, { "epoch": 2.085919723309366, "grad_norm": 10.5625, "learning_rate": 1.8563887239019459e-06, "loss": 0.822762131690979, "step": 11460 }, { "epoch": 2.086283789933558, "grad_norm": 8.3125, "learning_rate": 1.8557602677859488e-06, "loss": 1.1884052753448486, "step": 11462 }, { "epoch": 2.08664785655775, "grad_norm": 48.5, "learning_rate": 1.8551319795824953e-06, "loss": 0.8358457684516907, "step": 11464 }, { "epoch": 2.0870119231819424, "grad_norm": 6.5625, "learning_rate": 1.8545038593837855e-06, "loss": 1.085221529006958, "step": 11466 }, { "epoch": 2.0873759898061346, "grad_norm": 7.78125, "learning_rate": 1.85387590728199e-06, "loss": 1.3838255405426025, "step": 11468 }, { "epoch": 2.087740056430327, "grad_norm": 10.3125, "learning_rate": 1.8532481233692624e-06, "loss": 1.449247121810913, "step": 11470 }, { "epoch": 2.088104123054519, "grad_norm": 10.1875, "learning_rate": 1.8526205077377231e-06, "loss": 1.3740648031234741, "step": 11472 }, { "epoch": 2.088468189678711, "grad_norm": 29.875, "learning_rate": 1.8519930604794755e-06, "loss": 1.2248938083648682, "step": 11474 }, { "epoch": 2.0888322563029034, "grad_norm": 16.0, "learning_rate": 1.8513657816865946e-06, "loss": 1.5027263164520264, "step": 11476 }, { "epoch": 2.0891963229270956, "grad_norm": 9.75, "learning_rate": 1.8507386714511288e-06, "loss": 1.4471714496612549, "step": 11478 }, { "epoch": 2.0895603895512878, "grad_norm": 18.5, "learning_rate": 1.8501117298651067e-06, "loss": 2.0544333457946777, "step": 11480 }, { "epoch": 2.08992445617548, "grad_norm": 16.625, "learning_rate": 1.8494849570205264e-06, "loss": 1.851595401763916, "step": 11482 }, { "epoch": 2.090288522799672, "grad_norm": 16.25, "learning_rate": 1.8488583530093673e-06, "loss": 1.9838097095489502, "step": 11484 }, { "epoch": 2.090652589423865, "grad_norm": 13.0625, "learning_rate": 1.8482319179235802e-06, "loss": 0.8050888180732727, "step": 11486 }, { "epoch": 2.091016656048057, "grad_norm": 23.75, "learning_rate": 1.84760565185509e-06, "loss": 1.089007019996643, "step": 11488 }, { "epoch": 2.091380722672249, "grad_norm": 20.875, "learning_rate": 1.8469795548958017e-06, "loss": 1.2898389101028442, "step": 11490 }, { "epoch": 2.0917447892964414, "grad_norm": 67.0, "learning_rate": 1.8463536271375893e-06, "loss": 0.9535794258117676, "step": 11492 }, { "epoch": 2.0921088559206336, "grad_norm": 9.875, "learning_rate": 1.8457278686723079e-06, "loss": 1.5751750469207764, "step": 11494 }, { "epoch": 2.0924729225448258, "grad_norm": 36.75, "learning_rate": 1.8451022795917843e-06, "loss": 0.7495129108428955, "step": 11496 }, { "epoch": 2.092836989169018, "grad_norm": 12.8125, "learning_rate": 1.8444768599878192e-06, "loss": 1.828484296798706, "step": 11498 }, { "epoch": 2.09320105579321, "grad_norm": 11.0, "learning_rate": 1.843851609952194e-06, "loss": 1.5117626190185547, "step": 11500 }, { "epoch": 2.0935651224174023, "grad_norm": 17.0, "learning_rate": 1.8432265295766575e-06, "loss": 1.4968690872192383, "step": 11502 }, { "epoch": 2.0939291890415945, "grad_norm": 15.25, "learning_rate": 1.8426016189529407e-06, "loss": 0.9843392968177795, "step": 11504 }, { "epoch": 2.0942932556657867, "grad_norm": 11.0, "learning_rate": 1.8419768781727465e-06, "loss": 1.5971155166625977, "step": 11506 }, { "epoch": 2.094657322289979, "grad_norm": 14.1875, "learning_rate": 1.841352307327751e-06, "loss": 1.524049997329712, "step": 11508 }, { "epoch": 2.095021388914171, "grad_norm": 19.875, "learning_rate": 1.8407279065096106e-06, "loss": 1.6609448194503784, "step": 11510 }, { "epoch": 2.0953854555383633, "grad_norm": 18.5, "learning_rate": 1.84010367580995e-06, "loss": 2.1521108150482178, "step": 11512 }, { "epoch": 2.095749522162556, "grad_norm": 28.375, "learning_rate": 1.839479615320375e-06, "loss": 1.0616257190704346, "step": 11514 }, { "epoch": 2.096113588786748, "grad_norm": 9.75, "learning_rate": 1.838855725132464e-06, "loss": 1.5993046760559082, "step": 11516 }, { "epoch": 2.0964776554109403, "grad_norm": 15.8125, "learning_rate": 1.8382320053377681e-06, "loss": 1.424747109413147, "step": 11518 }, { "epoch": 2.0968417220351325, "grad_norm": 4.71875, "learning_rate": 1.837608456027819e-06, "loss": 1.0702464580535889, "step": 11520 }, { "epoch": 2.0972057886593247, "grad_norm": 7.28125, "learning_rate": 1.8369850772941166e-06, "loss": 1.176002860069275, "step": 11522 }, { "epoch": 2.097569855283517, "grad_norm": 8.75, "learning_rate": 1.8363618692281415e-06, "loss": 1.282271385192871, "step": 11524 }, { "epoch": 2.097933921907709, "grad_norm": 13.25, "learning_rate": 1.8357388319213467e-06, "loss": 1.453086256980896, "step": 11526 }, { "epoch": 2.0982979885319013, "grad_norm": 8.0625, "learning_rate": 1.835115965465159e-06, "loss": 1.1277140378952026, "step": 11528 }, { "epoch": 2.0986620551560935, "grad_norm": 6.125, "learning_rate": 1.8344932699509838e-06, "loss": 1.291964054107666, "step": 11530 }, { "epoch": 2.0990261217802857, "grad_norm": 6.03125, "learning_rate": 1.8338707454701965e-06, "loss": 1.5287799835205078, "step": 11532 }, { "epoch": 2.099390188404478, "grad_norm": 7.90625, "learning_rate": 1.833248392114152e-06, "loss": 1.4355192184448242, "step": 11534 }, { "epoch": 2.09975425502867, "grad_norm": 3.59375, "learning_rate": 1.8326262099741782e-06, "loss": 1.170735239982605, "step": 11536 }, { "epoch": 2.1001183216528623, "grad_norm": 15.3125, "learning_rate": 1.8320041991415757e-06, "loss": 1.3118293285369873, "step": 11538 }, { "epoch": 2.100482388277055, "grad_norm": 22.25, "learning_rate": 1.8313823597076249e-06, "loss": 0.5604899525642395, "step": 11540 }, { "epoch": 2.100846454901247, "grad_norm": 10.5, "learning_rate": 1.8307606917635756e-06, "loss": 1.3302524089813232, "step": 11542 }, { "epoch": 2.1012105215254393, "grad_norm": 15.5625, "learning_rate": 1.8301391954006568e-06, "loss": 1.8190664052963257, "step": 11544 }, { "epoch": 2.1015745881496315, "grad_norm": 14.125, "learning_rate": 1.8295178707100707e-06, "loss": 1.8432998657226562, "step": 11546 }, { "epoch": 2.1019386547738237, "grad_norm": 19.875, "learning_rate": 1.8288967177829922e-06, "loss": 1.3491830825805664, "step": 11548 }, { "epoch": 2.102302721398016, "grad_norm": 37.75, "learning_rate": 1.8282757367105757e-06, "loss": 1.7569167613983154, "step": 11550 }, { "epoch": 2.102666788022208, "grad_norm": 17.75, "learning_rate": 1.8276549275839451e-06, "loss": 1.0134644508361816, "step": 11552 }, { "epoch": 2.1030308546464003, "grad_norm": 31.375, "learning_rate": 1.827034290494203e-06, "loss": 1.3021458387374878, "step": 11554 }, { "epoch": 2.1033949212705925, "grad_norm": 7.25, "learning_rate": 1.8264138255324263e-06, "loss": 1.326372504234314, "step": 11556 }, { "epoch": 2.1037589878947847, "grad_norm": 5.34375, "learning_rate": 1.8257935327896628e-06, "loss": 0.8276577591896057, "step": 11558 }, { "epoch": 2.104123054518977, "grad_norm": 11.6875, "learning_rate": 1.8251734123569414e-06, "loss": 1.4027292728424072, "step": 11560 }, { "epoch": 2.104487121143169, "grad_norm": 10.0, "learning_rate": 1.824553464325259e-06, "loss": 1.4700348377227783, "step": 11562 }, { "epoch": 2.1048511877673612, "grad_norm": 46.5, "learning_rate": 1.823933688785593e-06, "loss": 1.4165862798690796, "step": 11564 }, { "epoch": 2.1052152543915534, "grad_norm": 9.3125, "learning_rate": 1.8233140858288922e-06, "loss": 1.682431697845459, "step": 11566 }, { "epoch": 2.105579321015746, "grad_norm": 19.75, "learning_rate": 1.8226946555460797e-06, "loss": 2.1505484580993652, "step": 11568 }, { "epoch": 2.1059433876399383, "grad_norm": 9.875, "learning_rate": 1.8220753980280567e-06, "loss": 1.4033162593841553, "step": 11570 }, { "epoch": 2.1063074542641305, "grad_norm": 8.9375, "learning_rate": 1.8214563133656936e-06, "loss": 1.615618109703064, "step": 11572 }, { "epoch": 2.1066715208883227, "grad_norm": 14.75, "learning_rate": 1.8208374016498412e-06, "loss": 1.4349865913391113, "step": 11574 }, { "epoch": 2.107035587512515, "grad_norm": 26.25, "learning_rate": 1.820218662971322e-06, "loss": 1.258475661277771, "step": 11576 }, { "epoch": 2.107399654136707, "grad_norm": 9.125, "learning_rate": 1.8196000974209315e-06, "loss": 1.0866085290908813, "step": 11578 }, { "epoch": 2.1077637207608992, "grad_norm": 8.4375, "learning_rate": 1.8189817050894442e-06, "loss": 1.5266196727752686, "step": 11580 }, { "epoch": 2.1081277873850914, "grad_norm": 2.390625, "learning_rate": 1.8183634860676042e-06, "loss": 1.2994287014007568, "step": 11582 }, { "epoch": 2.1084918540092836, "grad_norm": 15.4375, "learning_rate": 1.8177454404461344e-06, "loss": 1.6951885223388672, "step": 11584 }, { "epoch": 2.108855920633476, "grad_norm": 31.625, "learning_rate": 1.8171275683157309e-06, "loss": 1.796293020248413, "step": 11586 }, { "epoch": 2.109219987257668, "grad_norm": 5.28125, "learning_rate": 1.8165098697670614e-06, "loss": 1.2194533348083496, "step": 11588 }, { "epoch": 2.10958405388186, "grad_norm": 87.0, "learning_rate": 1.8158923448907733e-06, "loss": 1.293585181236267, "step": 11590 }, { "epoch": 2.1099481205060524, "grad_norm": 7.28125, "learning_rate": 1.8152749937774837e-06, "loss": 1.380223035812378, "step": 11592 }, { "epoch": 2.110312187130245, "grad_norm": 9.1875, "learning_rate": 1.8146578165177885e-06, "loss": 1.4090272188186646, "step": 11594 }, { "epoch": 2.1106762537544372, "grad_norm": 43.75, "learning_rate": 1.8140408132022554e-06, "loss": 1.5302612781524658, "step": 11596 }, { "epoch": 2.1110403203786294, "grad_norm": 13.5, "learning_rate": 1.8134239839214252e-06, "loss": 1.4695546627044678, "step": 11598 }, { "epoch": 2.1114043870028216, "grad_norm": 10.375, "learning_rate": 1.8128073287658183e-06, "loss": 1.1802515983581543, "step": 11600 }, { "epoch": 2.111768453627014, "grad_norm": 6.25, "learning_rate": 1.812190847825923e-06, "loss": 1.2398107051849365, "step": 11602 }, { "epoch": 2.112132520251206, "grad_norm": 10.375, "learning_rate": 1.8115745411922075e-06, "loss": 1.4469610452651978, "step": 11604 }, { "epoch": 2.112496586875398, "grad_norm": 6.65625, "learning_rate": 1.8109584089551127e-06, "loss": 1.2013463973999023, "step": 11606 }, { "epoch": 2.1128606534995904, "grad_norm": 8.3125, "learning_rate": 1.8103424512050516e-06, "loss": 1.3670557737350464, "step": 11608 }, { "epoch": 2.1132247201237826, "grad_norm": 12.375, "learning_rate": 1.8097266680324155e-06, "loss": 1.559103012084961, "step": 11610 }, { "epoch": 2.113588786747975, "grad_norm": 11.25, "learning_rate": 1.8091110595275657e-06, "loss": 1.3754302263259888, "step": 11612 }, { "epoch": 2.113952853372167, "grad_norm": 17.625, "learning_rate": 1.8084956257808424e-06, "loss": 1.7541909217834473, "step": 11614 }, { "epoch": 2.114316919996359, "grad_norm": 11.4375, "learning_rate": 1.8078803668825582e-06, "loss": 1.5109436511993408, "step": 11616 }, { "epoch": 2.1146809866205514, "grad_norm": 7.90625, "learning_rate": 1.8072652829229973e-06, "loss": 1.5829871892929077, "step": 11618 }, { "epoch": 2.115045053244744, "grad_norm": 20.0, "learning_rate": 1.8066503739924237e-06, "loss": 1.410038709640503, "step": 11620 }, { "epoch": 2.115409119868936, "grad_norm": 24.625, "learning_rate": 1.8060356401810705e-06, "loss": 1.8690016269683838, "step": 11622 }, { "epoch": 2.1157731864931284, "grad_norm": 7.71875, "learning_rate": 1.8054210815791486e-06, "loss": 1.4407477378845215, "step": 11624 }, { "epoch": 2.1161372531173206, "grad_norm": 41.0, "learning_rate": 1.804806698276843e-06, "loss": 1.4998970031738281, "step": 11626 }, { "epoch": 2.116501319741513, "grad_norm": 15.0, "learning_rate": 1.804192490364309e-06, "loss": 1.6051552295684814, "step": 11628 }, { "epoch": 2.116865386365705, "grad_norm": 10.1875, "learning_rate": 1.8035784579316823e-06, "loss": 1.5052504539489746, "step": 11630 }, { "epoch": 2.117229452989897, "grad_norm": 10.3125, "learning_rate": 1.8029646010690668e-06, "loss": 1.2871546745300293, "step": 11632 }, { "epoch": 2.1175935196140894, "grad_norm": 13.0625, "learning_rate": 1.8023509198665457e-06, "loss": 1.383264183998108, "step": 11634 }, { "epoch": 2.1179575862382816, "grad_norm": 9.375, "learning_rate": 1.8017374144141742e-06, "loss": 1.477445125579834, "step": 11636 }, { "epoch": 2.1183216528624738, "grad_norm": 5.03125, "learning_rate": 1.8011240848019796e-06, "loss": 0.9355295300483704, "step": 11638 }, { "epoch": 2.118685719486666, "grad_norm": 7.4375, "learning_rate": 1.8005109311199681e-06, "loss": 1.4660059213638306, "step": 11640 }, { "epoch": 2.119049786110858, "grad_norm": 15.0, "learning_rate": 1.7998979534581152e-06, "loss": 2.0586273670196533, "step": 11642 }, { "epoch": 2.1194138527350503, "grad_norm": 5.90625, "learning_rate": 1.7992851519063747e-06, "loss": 0.9837992191314697, "step": 11644 }, { "epoch": 2.1197779193592425, "grad_norm": 8.375, "learning_rate": 1.7986725265546726e-06, "loss": 0.9593755602836609, "step": 11646 }, { "epoch": 2.120141985983435, "grad_norm": 10.125, "learning_rate": 1.7980600774929074e-06, "loss": 0.8791125416755676, "step": 11648 }, { "epoch": 2.1205060526076274, "grad_norm": 3.234375, "learning_rate": 1.7974478048109562e-06, "loss": 0.9672107696533203, "step": 11650 }, { "epoch": 2.1208701192318196, "grad_norm": 14.1875, "learning_rate": 1.796835708598665e-06, "loss": 1.4134061336517334, "step": 11652 }, { "epoch": 2.1212341858560118, "grad_norm": 9.0625, "learning_rate": 1.7962237889458577e-06, "loss": 1.4089746475219727, "step": 11654 }, { "epoch": 2.121598252480204, "grad_norm": 16.875, "learning_rate": 1.7956120459423322e-06, "loss": 1.520780324935913, "step": 11656 }, { "epoch": 2.121962319104396, "grad_norm": 9.875, "learning_rate": 1.795000479677856e-06, "loss": 1.5086718797683716, "step": 11658 }, { "epoch": 2.1223263857285883, "grad_norm": 34.0, "learning_rate": 1.7943890902421779e-06, "loss": 2.127101421356201, "step": 11660 }, { "epoch": 2.1226904523527805, "grad_norm": 7.5, "learning_rate": 1.7937778777250132e-06, "loss": 1.1552338600158691, "step": 11662 }, { "epoch": 2.1230545189769727, "grad_norm": 8.1875, "learning_rate": 1.7931668422160572e-06, "loss": 1.1991102695465088, "step": 11664 }, { "epoch": 2.123418585601165, "grad_norm": 10.75, "learning_rate": 1.792555983804977e-06, "loss": 1.282840609550476, "step": 11666 }, { "epoch": 2.123782652225357, "grad_norm": 12.0625, "learning_rate": 1.7919453025814116e-06, "loss": 1.4351963996887207, "step": 11668 }, { "epoch": 2.1241467188495493, "grad_norm": 7.875, "learning_rate": 1.7913347986349784e-06, "loss": 1.3094508647918701, "step": 11670 }, { "epoch": 2.1245107854737415, "grad_norm": 2.15625, "learning_rate": 1.7907244720552641e-06, "loss": 0.9559741020202637, "step": 11672 }, { "epoch": 2.1248748520979337, "grad_norm": 12.25, "learning_rate": 1.7901143229318333e-06, "loss": 1.4657357931137085, "step": 11674 }, { "epoch": 2.1252389187221263, "grad_norm": 13.875, "learning_rate": 1.7895043513542228e-06, "loss": 1.4646859169006348, "step": 11676 }, { "epoch": 2.1256029853463185, "grad_norm": 10.0625, "learning_rate": 1.788894557411942e-06, "loss": 1.4770485162734985, "step": 11678 }, { "epoch": 2.1259670519705107, "grad_norm": 4.34375, "learning_rate": 1.7882849411944781e-06, "loss": 1.1873323917388916, "step": 11680 }, { "epoch": 2.126331118594703, "grad_norm": 3.765625, "learning_rate": 1.7876755027912869e-06, "loss": 1.1944563388824463, "step": 11682 }, { "epoch": 2.126695185218895, "grad_norm": 30.5, "learning_rate": 1.787066242291803e-06, "loss": 1.4382654428482056, "step": 11684 }, { "epoch": 2.1270592518430873, "grad_norm": 6.21875, "learning_rate": 1.7864571597854338e-06, "loss": 1.4378302097320557, "step": 11686 }, { "epoch": 2.1274233184672795, "grad_norm": 19.25, "learning_rate": 1.7858482553615564e-06, "loss": 2.002408266067505, "step": 11688 }, { "epoch": 2.1277873850914717, "grad_norm": 12.25, "learning_rate": 1.7852395291095288e-06, "loss": 1.3984942436218262, "step": 11690 }, { "epoch": 2.128151451715664, "grad_norm": 10.625, "learning_rate": 1.7846309811186757e-06, "loss": 1.1072252988815308, "step": 11692 }, { "epoch": 2.128515518339856, "grad_norm": 7.4375, "learning_rate": 1.784022611478301e-06, "loss": 1.2439546585083008, "step": 11694 }, { "epoch": 2.1288795849640483, "grad_norm": 21.5, "learning_rate": 1.7834144202776815e-06, "loss": 1.7651779651641846, "step": 11696 }, { "epoch": 2.1292436515882405, "grad_norm": 3.59375, "learning_rate": 1.7828064076060637e-06, "loss": 0.9770365953445435, "step": 11698 }, { "epoch": 2.1296077182124327, "grad_norm": 9.125, "learning_rate": 1.782198573552674e-06, "loss": 1.3989923000335693, "step": 11700 }, { "epoch": 2.1299717848366253, "grad_norm": 20.5, "learning_rate": 1.781590918206707e-06, "loss": 0.5772086381912231, "step": 11702 }, { "epoch": 2.1303358514608175, "grad_norm": 10.375, "learning_rate": 1.7809834416573356e-06, "loss": 1.3486251831054688, "step": 11704 }, { "epoch": 2.1306999180850097, "grad_norm": 25.0, "learning_rate": 1.780376143993705e-06, "loss": 0.9902925491333008, "step": 11706 }, { "epoch": 2.131063984709202, "grad_norm": 10.125, "learning_rate": 1.7797690253049307e-06, "loss": 1.8836326599121094, "step": 11708 }, { "epoch": 2.131428051333394, "grad_norm": 22.0, "learning_rate": 1.7791620856801084e-06, "loss": 1.520932912826538, "step": 11710 }, { "epoch": 2.1317921179575863, "grad_norm": 16.875, "learning_rate": 1.778555325208301e-06, "loss": 1.409934401512146, "step": 11712 }, { "epoch": 2.1321561845817785, "grad_norm": 9.875, "learning_rate": 1.7779487439785503e-06, "loss": 1.7099024057388306, "step": 11714 }, { "epoch": 2.1325202512059707, "grad_norm": 14.75, "learning_rate": 1.7773423420798697e-06, "loss": 1.797615647315979, "step": 11716 }, { "epoch": 2.132884317830163, "grad_norm": 6.59375, "learning_rate": 1.7767361196012434e-06, "loss": 0.8760505318641663, "step": 11718 }, { "epoch": 2.133248384454355, "grad_norm": 6.03125, "learning_rate": 1.776130076631636e-06, "loss": 1.057438611984253, "step": 11720 }, { "epoch": 2.1336124510785472, "grad_norm": 23.75, "learning_rate": 1.7755242132599784e-06, "loss": 1.3238999843597412, "step": 11722 }, { "epoch": 2.1339765177027394, "grad_norm": 10.5625, "learning_rate": 1.7749185295751808e-06, "loss": 1.4893951416015625, "step": 11724 }, { "epoch": 2.1343405843269316, "grad_norm": 8.4375, "learning_rate": 1.7743130256661252e-06, "loss": 1.3726822137832642, "step": 11726 }, { "epoch": 2.1347046509511243, "grad_norm": 15.125, "learning_rate": 1.773707701621664e-06, "loss": 1.9946835041046143, "step": 11728 }, { "epoch": 2.1350687175753165, "grad_norm": 10.125, "learning_rate": 1.7731025575306294e-06, "loss": 1.4197754859924316, "step": 11730 }, { "epoch": 2.1354327841995087, "grad_norm": 9.5, "learning_rate": 1.772497593481821e-06, "loss": 1.4092371463775635, "step": 11732 }, { "epoch": 2.135796850823701, "grad_norm": 17.375, "learning_rate": 1.7718928095640164e-06, "loss": 0.6433576345443726, "step": 11734 }, { "epoch": 2.136160917447893, "grad_norm": 14.4375, "learning_rate": 1.7712882058659664e-06, "loss": 1.433774471282959, "step": 11736 }, { "epoch": 2.1365249840720852, "grad_norm": 7.875, "learning_rate": 1.7706837824763907e-06, "loss": 1.1799637079238892, "step": 11738 }, { "epoch": 2.1368890506962774, "grad_norm": 61.25, "learning_rate": 1.7700795394839893e-06, "loss": 1.524416446685791, "step": 11740 }, { "epoch": 2.1372531173204696, "grad_norm": 11.1875, "learning_rate": 1.7694754769774298e-06, "loss": 1.4541678428649902, "step": 11742 }, { "epoch": 2.137617183944662, "grad_norm": 13.3125, "learning_rate": 1.7688715950453579e-06, "loss": 1.3971558809280396, "step": 11744 }, { "epoch": 2.137981250568854, "grad_norm": 26.375, "learning_rate": 1.7682678937763908e-06, "loss": 1.2316131591796875, "step": 11746 }, { "epoch": 2.138345317193046, "grad_norm": 7.09375, "learning_rate": 1.767664373259117e-06, "loss": 1.4580987691879272, "step": 11748 }, { "epoch": 2.1387093838172384, "grad_norm": 6.4375, "learning_rate": 1.7670610335821037e-06, "loss": 1.0925869941711426, "step": 11750 }, { "epoch": 2.1390734504414306, "grad_norm": 7.25, "learning_rate": 1.7664578748338857e-06, "loss": 1.4267220497131348, "step": 11752 }, { "epoch": 2.1394375170656232, "grad_norm": 15.6875, "learning_rate": 1.765854897102976e-06, "loss": 1.486244797706604, "step": 11754 }, { "epoch": 2.1398015836898154, "grad_norm": 10.5625, "learning_rate": 1.7652521004778595e-06, "loss": 1.3745989799499512, "step": 11756 }, { "epoch": 2.1401656503140076, "grad_norm": 14.0625, "learning_rate": 1.7646494850469917e-06, "loss": 1.432636022567749, "step": 11758 }, { "epoch": 2.1405297169382, "grad_norm": 9.6875, "learning_rate": 1.764047050898807e-06, "loss": 1.4629637002944946, "step": 11760 }, { "epoch": 2.140893783562392, "grad_norm": 9.8125, "learning_rate": 1.7634447981217074e-06, "loss": 1.3738616704940796, "step": 11762 }, { "epoch": 2.141257850186584, "grad_norm": 13.9375, "learning_rate": 1.7628427268040726e-06, "loss": 1.4615758657455444, "step": 11764 }, { "epoch": 2.1416219168107764, "grad_norm": 21.25, "learning_rate": 1.7622408370342551e-06, "loss": 1.390002727508545, "step": 11766 }, { "epoch": 2.1419859834349686, "grad_norm": 13.125, "learning_rate": 1.761639128900577e-06, "loss": 1.6146819591522217, "step": 11768 }, { "epoch": 2.142350050059161, "grad_norm": 12.5625, "learning_rate": 1.7610376024913394e-06, "loss": 1.960791826248169, "step": 11770 }, { "epoch": 2.142714116683353, "grad_norm": 16.625, "learning_rate": 1.7604362578948111e-06, "loss": 1.660319209098816, "step": 11772 }, { "epoch": 2.143078183307545, "grad_norm": 11.625, "learning_rate": 1.7598350951992393e-06, "loss": 1.188616156578064, "step": 11774 }, { "epoch": 2.1434422499317374, "grad_norm": 24.75, "learning_rate": 1.759234114492842e-06, "loss": 1.0207582712173462, "step": 11776 }, { "epoch": 2.1438063165559296, "grad_norm": 10.125, "learning_rate": 1.7586333158638089e-06, "loss": 1.4799736738204956, "step": 11778 }, { "epoch": 2.1441703831801218, "grad_norm": 16.25, "learning_rate": 1.758032699400307e-06, "loss": 1.4395723342895508, "step": 11780 }, { "epoch": 2.144534449804314, "grad_norm": 34.5, "learning_rate": 1.7574322651904718e-06, "loss": 1.3090319633483887, "step": 11782 }, { "epoch": 2.1448985164285066, "grad_norm": 11.875, "learning_rate": 1.7568320133224168e-06, "loss": 1.1724014282226562, "step": 11784 }, { "epoch": 2.145262583052699, "grad_norm": 33.25, "learning_rate": 1.7562319438842263e-06, "loss": 2.2555654048919678, "step": 11786 }, { "epoch": 2.145626649676891, "grad_norm": 6.28125, "learning_rate": 1.7556320569639563e-06, "loss": 1.5441243648529053, "step": 11788 }, { "epoch": 2.145990716301083, "grad_norm": 9.25, "learning_rate": 1.75503235264964e-06, "loss": 1.1939061880111694, "step": 11790 }, { "epoch": 2.1463547829252754, "grad_norm": 2.859375, "learning_rate": 1.754432831029279e-06, "loss": 1.3727716207504272, "step": 11792 }, { "epoch": 2.1467188495494676, "grad_norm": 18.125, "learning_rate": 1.7538334921908535e-06, "loss": 1.002747654914856, "step": 11794 }, { "epoch": 2.1470829161736598, "grad_norm": 24.75, "learning_rate": 1.7532343362223132e-06, "loss": 1.9813264608383179, "step": 11796 }, { "epoch": 2.147446982797852, "grad_norm": 22.75, "learning_rate": 1.75263536321158e-06, "loss": 2.0209238529205322, "step": 11798 }, { "epoch": 2.147811049422044, "grad_norm": 14.0625, "learning_rate": 1.7520365732465532e-06, "loss": 0.8212375640869141, "step": 11800 }, { "epoch": 2.1481751160462363, "grad_norm": 14.5, "learning_rate": 1.7514379664151005e-06, "loss": 1.586103081703186, "step": 11802 }, { "epoch": 2.1485391826704285, "grad_norm": 8.3125, "learning_rate": 1.7508395428050672e-06, "loss": 1.3431743383407593, "step": 11804 }, { "epoch": 2.1489032492946207, "grad_norm": 15.0, "learning_rate": 1.750241302504269e-06, "loss": 1.408813714981079, "step": 11806 }, { "epoch": 2.149267315918813, "grad_norm": 9.3125, "learning_rate": 1.7496432456004936e-06, "loss": 1.3541207313537598, "step": 11808 }, { "epoch": 2.1496313825430056, "grad_norm": 10.4375, "learning_rate": 1.749045372181506e-06, "loss": 1.5129245519638062, "step": 11810 }, { "epoch": 2.1499954491671978, "grad_norm": 14.5, "learning_rate": 1.7484476823350388e-06, "loss": 1.571367859840393, "step": 11812 }, { "epoch": 2.15035951579139, "grad_norm": 7.40625, "learning_rate": 1.747850176148803e-06, "loss": 1.1557633876800537, "step": 11814 }, { "epoch": 2.150723582415582, "grad_norm": 19.25, "learning_rate": 1.74725285371048e-06, "loss": 0.8231582045555115, "step": 11816 }, { "epoch": 2.1510876490397743, "grad_norm": 24.0, "learning_rate": 1.7466557151077224e-06, "loss": 1.566454291343689, "step": 11818 }, { "epoch": 2.1514517156639665, "grad_norm": 4.75, "learning_rate": 1.746058760428161e-06, "loss": 1.1101126670837402, "step": 11820 }, { "epoch": 2.1518157822881587, "grad_norm": 109.0, "learning_rate": 1.7454619897593927e-06, "loss": 1.3713303804397583, "step": 11822 }, { "epoch": 2.152179848912351, "grad_norm": 8.375, "learning_rate": 1.744865403188994e-06, "loss": 0.9665773510932922, "step": 11824 }, { "epoch": 2.152543915536543, "grad_norm": 21.875, "learning_rate": 1.7442690008045119e-06, "loss": 1.9788179397583008, "step": 11826 }, { "epoch": 2.1529079821607353, "grad_norm": 6.03125, "learning_rate": 1.743672782693463e-06, "loss": 1.493281364440918, "step": 11828 }, { "epoch": 2.1532720487849275, "grad_norm": 8.125, "learning_rate": 1.7430767489433436e-06, "loss": 1.3502285480499268, "step": 11830 }, { "epoch": 2.1536361154091197, "grad_norm": 9.375, "learning_rate": 1.742480899641616e-06, "loss": 1.347835659980774, "step": 11832 }, { "epoch": 2.154000182033312, "grad_norm": 9.75, "learning_rate": 1.7418852348757203e-06, "loss": 1.5300534963607788, "step": 11834 }, { "epoch": 2.1543642486575045, "grad_norm": 13.4375, "learning_rate": 1.7412897547330687e-06, "loss": 1.653989553451538, "step": 11836 }, { "epoch": 2.1547283152816967, "grad_norm": 21.75, "learning_rate": 1.7406944593010434e-06, "loss": 2.0581629276275635, "step": 11838 }, { "epoch": 2.155092381905889, "grad_norm": 12.5625, "learning_rate": 1.7400993486670038e-06, "loss": 1.4431942701339722, "step": 11840 }, { "epoch": 2.155456448530081, "grad_norm": 7.90625, "learning_rate": 1.7395044229182773e-06, "loss": 1.3695716857910156, "step": 11842 }, { "epoch": 2.1558205151542733, "grad_norm": 21.0, "learning_rate": 1.7389096821421691e-06, "loss": 1.3919975757598877, "step": 11844 }, { "epoch": 2.1561845817784655, "grad_norm": 12.25, "learning_rate": 1.738315126425955e-06, "loss": 1.3687949180603027, "step": 11846 }, { "epoch": 2.1565486484026577, "grad_norm": 3.953125, "learning_rate": 1.7377207558568822e-06, "loss": 1.2273038625717163, "step": 11848 }, { "epoch": 2.15691271502685, "grad_norm": 83.0, "learning_rate": 1.7371265705221735e-06, "loss": 1.2729640007019043, "step": 11850 }, { "epoch": 2.157276781651042, "grad_norm": 18.875, "learning_rate": 1.7365325705090213e-06, "loss": 0.47645875811576843, "step": 11852 }, { "epoch": 2.1576408482752343, "grad_norm": 15.0625, "learning_rate": 1.735938755904595e-06, "loss": 1.4482628107070923, "step": 11854 }, { "epoch": 2.1580049148994265, "grad_norm": 5.59375, "learning_rate": 1.735345126796034e-06, "loss": 1.3984953165054321, "step": 11856 }, { "epoch": 2.1583689815236187, "grad_norm": 5.15625, "learning_rate": 1.7347516832704492e-06, "loss": 1.4285850524902344, "step": 11858 }, { "epoch": 2.158733048147811, "grad_norm": 5.90625, "learning_rate": 1.7341584254149285e-06, "loss": 1.210035800933838, "step": 11860 }, { "epoch": 2.1590971147720035, "grad_norm": 8.1875, "learning_rate": 1.7335653533165275e-06, "loss": 1.5598353147506714, "step": 11862 }, { "epoch": 2.1594611813961957, "grad_norm": 18.125, "learning_rate": 1.7329724670622793e-06, "loss": 1.3323031663894653, "step": 11864 }, { "epoch": 2.159825248020388, "grad_norm": 8.0625, "learning_rate": 1.7323797667391877e-06, "loss": 1.4360469579696655, "step": 11866 }, { "epoch": 2.16018931464458, "grad_norm": 31.0, "learning_rate": 1.7317872524342262e-06, "loss": 1.5811915397644043, "step": 11868 }, { "epoch": 2.1605533812687723, "grad_norm": 18.875, "learning_rate": 1.7311949242343474e-06, "loss": 1.7797983884811401, "step": 11870 }, { "epoch": 2.1609174478929645, "grad_norm": 24.5, "learning_rate": 1.7306027822264699e-06, "loss": 1.5695608854293823, "step": 11872 }, { "epoch": 2.1612815145171567, "grad_norm": 12.875, "learning_rate": 1.7300108264974907e-06, "loss": 2.051684856414795, "step": 11874 }, { "epoch": 2.161645581141349, "grad_norm": 11.125, "learning_rate": 1.7294190571342762e-06, "loss": 1.3239431381225586, "step": 11876 }, { "epoch": 2.162009647765541, "grad_norm": 4.625, "learning_rate": 1.728827474223665e-06, "loss": 1.0789450407028198, "step": 11878 }, { "epoch": 2.1623737143897332, "grad_norm": 4.40625, "learning_rate": 1.7282360778524712e-06, "loss": 1.1580252647399902, "step": 11880 }, { "epoch": 2.1627377810139254, "grad_norm": 5.125, "learning_rate": 1.7276448681074778e-06, "loss": 0.9379229545593262, "step": 11882 }, { "epoch": 2.1631018476381176, "grad_norm": 6.40625, "learning_rate": 1.7270538450754443e-06, "loss": 1.0298399925231934, "step": 11884 }, { "epoch": 2.16346591426231, "grad_norm": 11.875, "learning_rate": 1.7264630088431006e-06, "loss": 1.3942416906356812, "step": 11886 }, { "epoch": 2.163829980886502, "grad_norm": 24.125, "learning_rate": 1.7258723594971483e-06, "loss": 1.5015668869018555, "step": 11888 }, { "epoch": 2.1641940475106947, "grad_norm": 21.375, "learning_rate": 1.725281897124265e-06, "loss": 1.5184601545333862, "step": 11890 }, { "epoch": 2.164558114134887, "grad_norm": 16.25, "learning_rate": 1.7246916218110956e-06, "loss": 1.4565767049789429, "step": 11892 }, { "epoch": 2.164922180759079, "grad_norm": 20.0, "learning_rate": 1.7241015336442629e-06, "loss": 1.44452702999115, "step": 11894 }, { "epoch": 2.1652862473832712, "grad_norm": 4.0625, "learning_rate": 1.7235116327103607e-06, "loss": 1.1152594089508057, "step": 11896 }, { "epoch": 2.1656503140074634, "grad_norm": 8.75, "learning_rate": 1.7229219190959515e-06, "loss": 1.0509376525878906, "step": 11898 }, { "epoch": 2.1660143806316556, "grad_norm": 11.125, "learning_rate": 1.7223323928875762e-06, "loss": 0.9874260425567627, "step": 11900 }, { "epoch": 2.166378447255848, "grad_norm": 7.84375, "learning_rate": 1.7217430541717434e-06, "loss": 1.4655461311340332, "step": 11902 }, { "epoch": 2.16674251388004, "grad_norm": 19.375, "learning_rate": 1.7211539030349379e-06, "loss": 1.5268408060073853, "step": 11904 }, { "epoch": 2.167106580504232, "grad_norm": 18.375, "learning_rate": 1.7205649395636147e-06, "loss": 1.2957861423492432, "step": 11906 }, { "epoch": 2.1674706471284244, "grad_norm": 8.5625, "learning_rate": 1.7199761638442003e-06, "loss": 1.5992405414581299, "step": 11908 }, { "epoch": 2.1678347137526166, "grad_norm": 14.3125, "learning_rate": 1.7193875759630976e-06, "loss": 1.5022704601287842, "step": 11910 }, { "epoch": 2.168198780376809, "grad_norm": 10.1875, "learning_rate": 1.7187991760066769e-06, "loss": 1.1741129159927368, "step": 11912 }, { "epoch": 2.168562847001001, "grad_norm": 11.9375, "learning_rate": 1.7182109640612857e-06, "loss": 1.4434632062911987, "step": 11914 }, { "epoch": 2.168926913625193, "grad_norm": 10.9375, "learning_rate": 1.7176229402132417e-06, "loss": 1.5435893535614014, "step": 11916 }, { "epoch": 2.169290980249386, "grad_norm": 17.25, "learning_rate": 1.7170351045488326e-06, "loss": 1.6838984489440918, "step": 11918 }, { "epoch": 2.169655046873578, "grad_norm": 20.375, "learning_rate": 1.7164474571543238e-06, "loss": 1.8975512981414795, "step": 11920 }, { "epoch": 2.17001911349777, "grad_norm": 12.0, "learning_rate": 1.7158599981159477e-06, "loss": 1.467161774635315, "step": 11922 }, { "epoch": 2.1703831801219624, "grad_norm": 5.125, "learning_rate": 1.7152727275199132e-06, "loss": 1.2082945108413696, "step": 11924 }, { "epoch": 2.1707472467461546, "grad_norm": 13.5, "learning_rate": 1.7146856454524003e-06, "loss": 1.388268232345581, "step": 11926 }, { "epoch": 2.171111313370347, "grad_norm": 9.75, "learning_rate": 1.7140987519995584e-06, "loss": 1.3517110347747803, "step": 11928 }, { "epoch": 2.171475379994539, "grad_norm": 29.25, "learning_rate": 1.7135120472475148e-06, "loss": 1.5776770114898682, "step": 11930 }, { "epoch": 2.171839446618731, "grad_norm": 28.25, "learning_rate": 1.7129255312823634e-06, "loss": 1.5649540424346924, "step": 11932 }, { "epoch": 2.1722035132429234, "grad_norm": 11.875, "learning_rate": 1.7123392041901748e-06, "loss": 1.4681792259216309, "step": 11934 }, { "epoch": 2.1725675798671156, "grad_norm": 10.6875, "learning_rate": 1.7117530660569904e-06, "loss": 1.0693531036376953, "step": 11936 }, { "epoch": 2.1729316464913078, "grad_norm": 8.8125, "learning_rate": 1.711167116968821e-06, "loss": 1.4105781316757202, "step": 11938 }, { "epoch": 2.1732957131155, "grad_norm": 14.3125, "learning_rate": 1.7105813570116558e-06, "loss": 1.5430963039398193, "step": 11940 }, { "epoch": 2.173659779739692, "grad_norm": 21.125, "learning_rate": 1.7099957862714492e-06, "loss": 1.362628698348999, "step": 11942 }, { "epoch": 2.174023846363885, "grad_norm": 9.25, "learning_rate": 1.7094104048341336e-06, "loss": 1.5510234832763672, "step": 11944 }, { "epoch": 2.174387912988077, "grad_norm": 26.375, "learning_rate": 1.708825212785612e-06, "loss": 1.6540273427963257, "step": 11946 }, { "epoch": 2.174751979612269, "grad_norm": 21.375, "learning_rate": 1.7082402102117559e-06, "loss": 1.9103442430496216, "step": 11948 }, { "epoch": 2.1751160462364614, "grad_norm": 12.625, "learning_rate": 1.7076553971984156e-06, "loss": 1.4040323495864868, "step": 11950 }, { "epoch": 2.1754801128606536, "grad_norm": 12.3125, "learning_rate": 1.7070707738314068e-06, "loss": 1.4501792192459106, "step": 11952 }, { "epoch": 2.1758441794848458, "grad_norm": 35.75, "learning_rate": 1.706486340196523e-06, "loss": 1.5558557510375977, "step": 11954 }, { "epoch": 2.176208246109038, "grad_norm": 25.0, "learning_rate": 1.705902096379527e-06, "loss": 2.25014591217041, "step": 11956 }, { "epoch": 2.17657231273323, "grad_norm": 7.4375, "learning_rate": 1.7053180424661525e-06, "loss": 1.3083001375198364, "step": 11958 }, { "epoch": 2.1769363793574223, "grad_norm": 5.65625, "learning_rate": 1.70473417854211e-06, "loss": 1.1901150941848755, "step": 11960 }, { "epoch": 2.1773004459816145, "grad_norm": 9.1875, "learning_rate": 1.7041505046930762e-06, "loss": 1.3908497095108032, "step": 11962 }, { "epoch": 2.1776645126058067, "grad_norm": 12.0, "learning_rate": 1.7035670210047044e-06, "loss": 1.6196521520614624, "step": 11964 }, { "epoch": 2.178028579229999, "grad_norm": 25.0, "learning_rate": 1.7029837275626198e-06, "loss": 1.610152244567871, "step": 11966 }, { "epoch": 2.178392645854191, "grad_norm": 15.8125, "learning_rate": 1.7024006244524148e-06, "loss": 1.960754156112671, "step": 11968 }, { "epoch": 2.1787567124783838, "grad_norm": 7.5, "learning_rate": 1.7018177117596612e-06, "loss": 1.3356890678405762, "step": 11970 }, { "epoch": 2.179120779102576, "grad_norm": 7.65625, "learning_rate": 1.7012349895698957e-06, "loss": 1.3218178749084473, "step": 11972 }, { "epoch": 2.179484845726768, "grad_norm": 12.3125, "learning_rate": 1.7006524579686329e-06, "loss": 1.16715407371521, "step": 11974 }, { "epoch": 2.1798489123509603, "grad_norm": 30.375, "learning_rate": 1.700070117041357e-06, "loss": 1.160258412361145, "step": 11976 }, { "epoch": 2.1802129789751525, "grad_norm": 8.0625, "learning_rate": 1.6994879668735211e-06, "loss": 1.2591595649719238, "step": 11978 }, { "epoch": 2.1805770455993447, "grad_norm": 9.4375, "learning_rate": 1.6989060075505575e-06, "loss": 1.5157290697097778, "step": 11980 }, { "epoch": 2.180941112223537, "grad_norm": 13.0625, "learning_rate": 1.698324239157863e-06, "loss": 1.4201197624206543, "step": 11982 }, { "epoch": 2.181305178847729, "grad_norm": 25.625, "learning_rate": 1.6977426617808118e-06, "loss": 1.615785837173462, "step": 11984 }, { "epoch": 2.1816692454719213, "grad_norm": 28.5, "learning_rate": 1.6971612755047485e-06, "loss": 2.0562760829925537, "step": 11986 }, { "epoch": 2.1820333120961135, "grad_norm": 8.75, "learning_rate": 1.696580080414986e-06, "loss": 1.227978229522705, "step": 11988 }, { "epoch": 2.1823973787203057, "grad_norm": 12.9375, "learning_rate": 1.6959990765968162e-06, "loss": 1.1672794818878174, "step": 11990 }, { "epoch": 2.182761445344498, "grad_norm": 11.5625, "learning_rate": 1.6954182641354957e-06, "loss": 0.8809859156608582, "step": 11992 }, { "epoch": 2.18312551196869, "grad_norm": 7.3125, "learning_rate": 1.6948376431162588e-06, "loss": 1.0424938201904297, "step": 11994 }, { "epoch": 2.1834895785928827, "grad_norm": 11.625, "learning_rate": 1.6942572136243087e-06, "loss": 0.13582243025302887, "step": 11996 }, { "epoch": 2.183853645217075, "grad_norm": 5.0, "learning_rate": 1.6936769757448202e-06, "loss": 0.4949483871459961, "step": 11998 }, { "epoch": 2.184217711841267, "grad_norm": 32.75, "learning_rate": 1.693096929562942e-06, "loss": 1.5824527740478516, "step": 12000 }, { "epoch": 2.1845817784654593, "grad_norm": 20.0, "learning_rate": 1.6925170751637921e-06, "loss": 1.7683151960372925, "step": 12002 }, { "epoch": 2.1849458450896515, "grad_norm": 9.625, "learning_rate": 1.691937412632463e-06, "loss": 1.6637481451034546, "step": 12004 }, { "epoch": 2.1853099117138437, "grad_norm": 35.0, "learning_rate": 1.6913579420540182e-06, "loss": 2.1081438064575195, "step": 12006 }, { "epoch": 2.185673978338036, "grad_norm": 10.3125, "learning_rate": 1.690778663513491e-06, "loss": 1.7586209774017334, "step": 12008 }, { "epoch": 2.186038044962228, "grad_norm": 14.0, "learning_rate": 1.69019957709589e-06, "loss": 1.1529549360275269, "step": 12010 }, { "epoch": 2.1864021115864203, "grad_norm": 10.75, "learning_rate": 1.6896206828861916e-06, "loss": 1.8441519737243652, "step": 12012 }, { "epoch": 2.1867661782106125, "grad_norm": 7.3125, "learning_rate": 1.6890419809693484e-06, "loss": 1.4527759552001953, "step": 12014 }, { "epoch": 2.1871302448348047, "grad_norm": 12.8125, "learning_rate": 1.6884634714302823e-06, "loss": 1.189206600189209, "step": 12016 }, { "epoch": 2.187494311458997, "grad_norm": 9.0625, "learning_rate": 1.687885154353885e-06, "loss": 1.6650750637054443, "step": 12018 }, { "epoch": 2.187858378083189, "grad_norm": 18.25, "learning_rate": 1.6873070298250255e-06, "loss": 1.5120407342910767, "step": 12020 }, { "epoch": 2.1882224447073813, "grad_norm": 8.0625, "learning_rate": 1.6867290979285377e-06, "loss": 1.513980746269226, "step": 12022 }, { "epoch": 2.1885865113315734, "grad_norm": 13.0625, "learning_rate": 1.6861513587492335e-06, "loss": 1.393868327140808, "step": 12024 }, { "epoch": 2.188950577955766, "grad_norm": 11.9375, "learning_rate": 1.6855738123718935e-06, "loss": 1.5611568689346313, "step": 12026 }, { "epoch": 2.1893146445799583, "grad_norm": 13.375, "learning_rate": 1.684996458881268e-06, "loss": 1.4893863201141357, "step": 12028 }, { "epoch": 2.1896787112041505, "grad_norm": 10.6875, "learning_rate": 1.6844192983620846e-06, "loss": 1.3925065994262695, "step": 12030 }, { "epoch": 2.1900427778283427, "grad_norm": 48.75, "learning_rate": 1.6838423308990362e-06, "loss": 1.2226086854934692, "step": 12032 }, { "epoch": 2.190406844452535, "grad_norm": 30.875, "learning_rate": 1.6832655565767924e-06, "loss": 1.2104977369308472, "step": 12034 }, { "epoch": 2.190770911076727, "grad_norm": 22.5, "learning_rate": 1.6826889754799925e-06, "loss": 1.4582310914993286, "step": 12036 }, { "epoch": 2.1911349777009193, "grad_norm": 10.8125, "learning_rate": 1.6821125876932456e-06, "loss": 1.4852790832519531, "step": 12038 }, { "epoch": 2.1914990443251114, "grad_norm": 10.25, "learning_rate": 1.6815363933011368e-06, "loss": 1.387447476387024, "step": 12040 }, { "epoch": 2.1918631109493036, "grad_norm": 14.75, "learning_rate": 1.6809603923882178e-06, "loss": 1.4417144060134888, "step": 12042 }, { "epoch": 2.192227177573496, "grad_norm": 13.0, "learning_rate": 1.6803845850390166e-06, "loss": 1.4992517232894897, "step": 12044 }, { "epoch": 2.192591244197688, "grad_norm": 15.25, "learning_rate": 1.6798089713380297e-06, "loss": 1.6413629055023193, "step": 12046 }, { "epoch": 2.19295531082188, "grad_norm": 13.6875, "learning_rate": 1.6792335513697248e-06, "loss": 1.8382233381271362, "step": 12048 }, { "epoch": 2.1933193774460724, "grad_norm": 9.4375, "learning_rate": 1.6786583252185451e-06, "loss": 1.2685561180114746, "step": 12050 }, { "epoch": 2.193683444070265, "grad_norm": 3.046875, "learning_rate": 1.6780832929688998e-06, "loss": 0.8487936854362488, "step": 12052 }, { "epoch": 2.1940475106944572, "grad_norm": 11.1875, "learning_rate": 1.6775084547051748e-06, "loss": 1.2040059566497803, "step": 12054 }, { "epoch": 2.1944115773186494, "grad_norm": 13.75, "learning_rate": 1.676933810511725e-06, "loss": 1.4764724969863892, "step": 12056 }, { "epoch": 2.1947756439428416, "grad_norm": 7.9375, "learning_rate": 1.6763593604728755e-06, "loss": 1.4257529973983765, "step": 12058 }, { "epoch": 2.195139710567034, "grad_norm": 5.59375, "learning_rate": 1.6757851046729267e-06, "loss": 1.0820488929748535, "step": 12060 }, { "epoch": 2.195503777191226, "grad_norm": 8.125, "learning_rate": 1.675211043196146e-06, "loss": 1.195681095123291, "step": 12062 }, { "epoch": 2.195867843815418, "grad_norm": 7.0, "learning_rate": 1.6746371761267765e-06, "loss": 1.1074368953704834, "step": 12064 }, { "epoch": 2.1962319104396104, "grad_norm": 8.5, "learning_rate": 1.6740635035490305e-06, "loss": 1.6307562589645386, "step": 12066 }, { "epoch": 2.1965959770638026, "grad_norm": 34.5, "learning_rate": 1.673490025547091e-06, "loss": 1.4520540237426758, "step": 12068 }, { "epoch": 2.196960043687995, "grad_norm": 26.75, "learning_rate": 1.672916742205115e-06, "loss": 1.390350580215454, "step": 12070 }, { "epoch": 2.197324110312187, "grad_norm": 7.375, "learning_rate": 1.6723436536072283e-06, "loss": 1.3479061126708984, "step": 12072 }, { "epoch": 2.197688176936379, "grad_norm": 7.90625, "learning_rate": 1.6717707598375302e-06, "loss": 1.0836222171783447, "step": 12074 }, { "epoch": 2.1980522435605714, "grad_norm": 12.875, "learning_rate": 1.671198060980091e-06, "loss": 1.4379175901412964, "step": 12076 }, { "epoch": 2.198416310184764, "grad_norm": 3.046875, "learning_rate": 1.6706255571189501e-06, "loss": 0.8740071058273315, "step": 12078 }, { "epoch": 2.198780376808956, "grad_norm": 23.625, "learning_rate": 1.6700532483381221e-06, "loss": 1.2940067052841187, "step": 12080 }, { "epoch": 2.1991444434331484, "grad_norm": 10.3125, "learning_rate": 1.6694811347215889e-06, "loss": 1.57370924949646, "step": 12082 }, { "epoch": 2.1995085100573406, "grad_norm": 34.5, "learning_rate": 1.6689092163533078e-06, "loss": 1.6122078895568848, "step": 12084 }, { "epoch": 2.199872576681533, "grad_norm": 42.0, "learning_rate": 1.6683374933172053e-06, "loss": 1.0024185180664062, "step": 12086 }, { "epoch": 2.200236643305725, "grad_norm": 29.375, "learning_rate": 1.6677659656971778e-06, "loss": 1.3415708541870117, "step": 12088 }, { "epoch": 2.200600709929917, "grad_norm": 13.1875, "learning_rate": 1.6671946335770971e-06, "loss": 1.4179677963256836, "step": 12090 }, { "epoch": 2.2009647765541094, "grad_norm": 9.9375, "learning_rate": 1.6666234970408012e-06, "loss": 1.82107675075531, "step": 12092 }, { "epoch": 2.2013288431783016, "grad_norm": 16.25, "learning_rate": 1.6660525561721036e-06, "loss": 1.6467400789260864, "step": 12094 }, { "epoch": 2.2016929098024938, "grad_norm": 11.4375, "learning_rate": 1.6654818110547888e-06, "loss": 1.57603120803833, "step": 12096 }, { "epoch": 2.202056976426686, "grad_norm": 8.4375, "learning_rate": 1.6649112617726082e-06, "loss": 1.3872840404510498, "step": 12098 }, { "epoch": 2.202421043050878, "grad_norm": 24.0, "learning_rate": 1.6643409084092904e-06, "loss": 1.3849416971206665, "step": 12100 }, { "epoch": 2.2027851096750704, "grad_norm": 5.40625, "learning_rate": 1.66377075104853e-06, "loss": 1.2443528175354004, "step": 12102 }, { "epoch": 2.203149176299263, "grad_norm": 12.5, "learning_rate": 1.6632007897739978e-06, "loss": 1.2464863061904907, "step": 12104 }, { "epoch": 2.203513242923455, "grad_norm": 15.375, "learning_rate": 1.6626310246693323e-06, "loss": 1.6990505456924438, "step": 12106 }, { "epoch": 2.2038773095476474, "grad_norm": 22.5, "learning_rate": 1.6620614558181427e-06, "loss": 1.447169303894043, "step": 12108 }, { "epoch": 2.2042413761718396, "grad_norm": 16.75, "learning_rate": 1.6614920833040138e-06, "loss": 1.8406569957733154, "step": 12110 }, { "epoch": 2.2046054427960318, "grad_norm": 11.375, "learning_rate": 1.660922907210496e-06, "loss": 1.4172639846801758, "step": 12112 }, { "epoch": 2.204969509420224, "grad_norm": 10.625, "learning_rate": 1.660353927621115e-06, "loss": 1.3564472198486328, "step": 12114 }, { "epoch": 2.205333576044416, "grad_norm": 14.4375, "learning_rate": 1.659785144619367e-06, "loss": 0.9050820469856262, "step": 12116 }, { "epoch": 2.2056976426686083, "grad_norm": 5.03125, "learning_rate": 1.6592165582887165e-06, "loss": 1.1530940532684326, "step": 12118 }, { "epoch": 2.2060617092928005, "grad_norm": 4.90625, "learning_rate": 1.6586481687126032e-06, "loss": 0.9225291609764099, "step": 12120 }, { "epoch": 2.2064257759169927, "grad_norm": 13.0625, "learning_rate": 1.658079975974434e-06, "loss": 1.1620161533355713, "step": 12122 }, { "epoch": 2.206789842541185, "grad_norm": 10.8125, "learning_rate": 1.6575119801575905e-06, "loss": 1.4045647382736206, "step": 12124 }, { "epoch": 2.207153909165377, "grad_norm": 9.9375, "learning_rate": 1.656944181345424e-06, "loss": 1.5692120790481567, "step": 12126 }, { "epoch": 2.2075179757895693, "grad_norm": 9.4375, "learning_rate": 1.656376579621255e-06, "loss": 1.4317455291748047, "step": 12128 }, { "epoch": 2.2078820424137615, "grad_norm": 11.4375, "learning_rate": 1.6558091750683787e-06, "loss": 1.3973381519317627, "step": 12130 }, { "epoch": 2.208246109037954, "grad_norm": 12.6875, "learning_rate": 1.655241967770057e-06, "loss": 1.379529595375061, "step": 12132 }, { "epoch": 2.2086101756621463, "grad_norm": 9.4375, "learning_rate": 1.6546749578095277e-06, "loss": 1.3875292539596558, "step": 12134 }, { "epoch": 2.2089742422863385, "grad_norm": 7.15625, "learning_rate": 1.6541081452699964e-06, "loss": 1.2197068929672241, "step": 12136 }, { "epoch": 2.2093383089105307, "grad_norm": 5.0625, "learning_rate": 1.6535415302346398e-06, "loss": 0.8297624588012695, "step": 12138 }, { "epoch": 2.209702375534723, "grad_norm": 25.0, "learning_rate": 1.6529751127866078e-06, "loss": 0.9787057042121887, "step": 12140 }, { "epoch": 2.210066442158915, "grad_norm": 7.75, "learning_rate": 1.6524088930090175e-06, "loss": 0.4380683898925781, "step": 12142 }, { "epoch": 2.2104305087831073, "grad_norm": 7.3125, "learning_rate": 1.6518428709849616e-06, "loss": 1.4496345520019531, "step": 12144 }, { "epoch": 2.2107945754072995, "grad_norm": 6.5625, "learning_rate": 1.6512770467975014e-06, "loss": 0.975763738155365, "step": 12146 }, { "epoch": 2.2111586420314917, "grad_norm": 7.125, "learning_rate": 1.6507114205296675e-06, "loss": 1.3833062648773193, "step": 12148 }, { "epoch": 2.211522708655684, "grad_norm": 17.375, "learning_rate": 1.6501459922644658e-06, "loss": 1.385900855064392, "step": 12150 }, { "epoch": 2.211886775279876, "grad_norm": 22.5, "learning_rate": 1.649580762084868e-06, "loss": 1.554508924484253, "step": 12152 }, { "epoch": 2.2122508419040683, "grad_norm": 186.0, "learning_rate": 1.6490157300738211e-06, "loss": 0.5537604689598083, "step": 12154 }, { "epoch": 2.2126149085282605, "grad_norm": 8.0, "learning_rate": 1.6484508963142411e-06, "loss": 1.0816677808761597, "step": 12156 }, { "epoch": 2.2129789751524527, "grad_norm": 12.875, "learning_rate": 1.6478862608890139e-06, "loss": 1.6150009632110596, "step": 12158 }, { "epoch": 2.2133430417766453, "grad_norm": 12.6875, "learning_rate": 1.6473218238809996e-06, "loss": 1.496718406677246, "step": 12160 }, { "epoch": 2.2137071084008375, "grad_norm": 9.8125, "learning_rate": 1.6467575853730238e-06, "loss": 1.4483039379119873, "step": 12162 }, { "epoch": 2.2140711750250297, "grad_norm": 6.625, "learning_rate": 1.6461935454478894e-06, "loss": 1.399533987045288, "step": 12164 }, { "epoch": 2.214435241649222, "grad_norm": 8.1875, "learning_rate": 1.6456297041883663e-06, "loss": 1.025894045829773, "step": 12166 }, { "epoch": 2.214799308273414, "grad_norm": 6.625, "learning_rate": 1.6450660616771941e-06, "loss": 1.3313698768615723, "step": 12168 }, { "epoch": 2.2151633748976063, "grad_norm": 24.125, "learning_rate": 1.6445026179970871e-06, "loss": 1.5325853824615479, "step": 12170 }, { "epoch": 2.2155274415217985, "grad_norm": 133.0, "learning_rate": 1.6439393732307265e-06, "loss": 1.9374743700027466, "step": 12172 }, { "epoch": 2.2158915081459907, "grad_norm": 11.625, "learning_rate": 1.6433763274607677e-06, "loss": 1.4798552989959717, "step": 12174 }, { "epoch": 2.216255574770183, "grad_norm": 8.4375, "learning_rate": 1.642813480769836e-06, "loss": 1.4079176187515259, "step": 12176 }, { "epoch": 2.216619641394375, "grad_norm": 15.875, "learning_rate": 1.6422508332405243e-06, "loss": 1.4054397344589233, "step": 12178 }, { "epoch": 2.2169837080185673, "grad_norm": 15.4375, "learning_rate": 1.6416883849554016e-06, "loss": 1.566229224205017, "step": 12180 }, { "epoch": 2.2173477746427595, "grad_norm": 96.5, "learning_rate": 1.6411261359970026e-06, "loss": 1.3326313495635986, "step": 12182 }, { "epoch": 2.2177118412669516, "grad_norm": 7.25, "learning_rate": 1.6405640864478367e-06, "loss": 1.3738365173339844, "step": 12184 }, { "epoch": 2.2180759078911443, "grad_norm": 8.9375, "learning_rate": 1.6400022363903823e-06, "loss": 1.4668095111846924, "step": 12186 }, { "epoch": 2.2184399745153365, "grad_norm": 9.8125, "learning_rate": 1.6394405859070866e-06, "loss": 1.1771247386932373, "step": 12188 }, { "epoch": 2.2188040411395287, "grad_norm": 15.8125, "learning_rate": 1.6388791350803725e-06, "loss": 0.7339882850646973, "step": 12190 }, { "epoch": 2.219168107763721, "grad_norm": 12.5, "learning_rate": 1.6383178839926284e-06, "loss": 0.5722537636756897, "step": 12192 }, { "epoch": 2.219532174387913, "grad_norm": 7.34375, "learning_rate": 1.6377568327262163e-06, "loss": 1.1518901586532593, "step": 12194 }, { "epoch": 2.2198962410121053, "grad_norm": 12.0625, "learning_rate": 1.6371959813634698e-06, "loss": 1.0115777254104614, "step": 12196 }, { "epoch": 2.2202603076362974, "grad_norm": 27.625, "learning_rate": 1.636635329986688e-06, "loss": 1.5801260471343994, "step": 12198 }, { "epoch": 2.2206243742604896, "grad_norm": 15.4375, "learning_rate": 1.6360748786781477e-06, "loss": 1.4054018259048462, "step": 12200 }, { "epoch": 2.220988440884682, "grad_norm": 8.625, "learning_rate": 1.6355146275200906e-06, "loss": 1.3353304862976074, "step": 12202 }, { "epoch": 2.221352507508874, "grad_norm": 8.625, "learning_rate": 1.6349545765947323e-06, "loss": 1.370603084564209, "step": 12204 }, { "epoch": 2.2217165741330662, "grad_norm": 2.96875, "learning_rate": 1.6343947259842584e-06, "loss": 1.177539587020874, "step": 12206 }, { "epoch": 2.2220806407572584, "grad_norm": 7.09375, "learning_rate": 1.6338350757708235e-06, "loss": 1.1190351247787476, "step": 12208 }, { "epoch": 2.2224447073814506, "grad_norm": 10.0625, "learning_rate": 1.6332756260365556e-06, "loss": 1.124096155166626, "step": 12210 }, { "epoch": 2.2228087740056433, "grad_norm": 101.0, "learning_rate": 1.6327163768635492e-06, "loss": 0.8702162504196167, "step": 12212 }, { "epoch": 2.2231728406298354, "grad_norm": 39.25, "learning_rate": 1.6321573283338744e-06, "loss": 1.3182525634765625, "step": 12214 }, { "epoch": 2.2235369072540276, "grad_norm": 6.15625, "learning_rate": 1.6315984805295688e-06, "loss": 1.3298591375350952, "step": 12216 }, { "epoch": 2.22390097387822, "grad_norm": 12.625, "learning_rate": 1.6310398335326394e-06, "loss": 1.2881121635437012, "step": 12218 }, { "epoch": 2.224265040502412, "grad_norm": 17.875, "learning_rate": 1.6304813874250674e-06, "loss": 1.5499582290649414, "step": 12220 }, { "epoch": 2.224629107126604, "grad_norm": 12.3125, "learning_rate": 1.6299231422888007e-06, "loss": 1.2902812957763672, "step": 12222 }, { "epoch": 2.2249931737507964, "grad_norm": 18.875, "learning_rate": 1.6293650982057607e-06, "loss": 1.243004560470581, "step": 12224 }, { "epoch": 2.2253572403749886, "grad_norm": 14.8125, "learning_rate": 1.6288072552578389e-06, "loss": 0.9448727369308472, "step": 12226 }, { "epoch": 2.225721306999181, "grad_norm": 17.75, "learning_rate": 1.6282496135268939e-06, "loss": 0.9821646213531494, "step": 12228 }, { "epoch": 2.226085373623373, "grad_norm": 17.875, "learning_rate": 1.6276921730947603e-06, "loss": 1.6493090391159058, "step": 12230 }, { "epoch": 2.226449440247565, "grad_norm": 38.25, "learning_rate": 1.6271349340432374e-06, "loss": 1.192422866821289, "step": 12232 }, { "epoch": 2.2268135068717574, "grad_norm": 7.6875, "learning_rate": 1.6265778964541002e-06, "loss": 1.085685133934021, "step": 12234 }, { "epoch": 2.2271775734959496, "grad_norm": 18.375, "learning_rate": 1.626021060409091e-06, "loss": 1.3583873510360718, "step": 12236 }, { "epoch": 2.227541640120142, "grad_norm": 6.03125, "learning_rate": 1.6254644259899216e-06, "loss": 1.170522928237915, "step": 12238 }, { "epoch": 2.2279057067443344, "grad_norm": 23.125, "learning_rate": 1.6249079932782785e-06, "loss": 0.8008841276168823, "step": 12240 }, { "epoch": 2.2282697733685266, "grad_norm": 9.75, "learning_rate": 1.6243517623558135e-06, "loss": 0.9121081829071045, "step": 12242 }, { "epoch": 2.228633839992719, "grad_norm": 11.875, "learning_rate": 1.623795733304153e-06, "loss": 1.4922815561294556, "step": 12244 }, { "epoch": 2.228997906616911, "grad_norm": 14.375, "learning_rate": 1.623239906204892e-06, "loss": 1.7578998804092407, "step": 12246 }, { "epoch": 2.229361973241103, "grad_norm": 8.6875, "learning_rate": 1.6226842811395938e-06, "loss": 1.3746850490570068, "step": 12248 }, { "epoch": 2.2297260398652954, "grad_norm": 8.6875, "learning_rate": 1.6221288581897968e-06, "loss": 1.275140404701233, "step": 12250 }, { "epoch": 2.2300901064894876, "grad_norm": 10.5, "learning_rate": 1.621573637437005e-06, "loss": 1.4401323795318604, "step": 12252 }, { "epoch": 2.2304541731136798, "grad_norm": 19.125, "learning_rate": 1.621018618962696e-06, "loss": 1.552638053894043, "step": 12254 }, { "epoch": 2.230818239737872, "grad_norm": 12.0625, "learning_rate": 1.6204638028483166e-06, "loss": 1.2832891941070557, "step": 12256 }, { "epoch": 2.231182306362064, "grad_norm": 10.25, "learning_rate": 1.6199091891752822e-06, "loss": 1.117211937904358, "step": 12258 }, { "epoch": 2.2315463729862564, "grad_norm": 58.0, "learning_rate": 1.6193547780249828e-06, "loss": 1.444494366645813, "step": 12260 }, { "epoch": 2.2319104396104485, "grad_norm": 7.90625, "learning_rate": 1.6188005694787728e-06, "loss": 0.7699679136276245, "step": 12262 }, { "epoch": 2.2322745062346407, "grad_norm": 19.625, "learning_rate": 1.6182465636179826e-06, "loss": 1.3388326168060303, "step": 12264 }, { "epoch": 2.232638572858833, "grad_norm": 11.125, "learning_rate": 1.6176927605239102e-06, "loss": 0.9454472661018372, "step": 12266 }, { "epoch": 2.2330026394830256, "grad_norm": 17.25, "learning_rate": 1.6171391602778214e-06, "loss": 1.6248373985290527, "step": 12268 }, { "epoch": 2.2333667061072178, "grad_norm": 10.9375, "learning_rate": 1.6165857629609582e-06, "loss": 1.4173344373703003, "step": 12270 }, { "epoch": 2.23373077273141, "grad_norm": 6.625, "learning_rate": 1.6160325686545263e-06, "loss": 1.4110231399536133, "step": 12272 }, { "epoch": 2.234094839355602, "grad_norm": 11.625, "learning_rate": 1.6154795774397073e-06, "loss": 1.4788658618927002, "step": 12274 }, { "epoch": 2.2344589059797944, "grad_norm": 106.0, "learning_rate": 1.6149267893976496e-06, "loss": 0.9729205965995789, "step": 12276 }, { "epoch": 2.2348229726039865, "grad_norm": 6.25, "learning_rate": 1.6143742046094713e-06, "loss": 0.5006702542304993, "step": 12278 }, { "epoch": 2.2351870392281787, "grad_norm": 23.5, "learning_rate": 1.6138218231562642e-06, "loss": 1.3185569047927856, "step": 12280 }, { "epoch": 2.235551105852371, "grad_norm": 12.1875, "learning_rate": 1.6132696451190854e-06, "loss": 1.0156015157699585, "step": 12282 }, { "epoch": 2.235915172476563, "grad_norm": 56.0, "learning_rate": 1.6127176705789673e-06, "loss": 1.0495911836624146, "step": 12284 }, { "epoch": 2.2362792391007553, "grad_norm": 54.25, "learning_rate": 1.6121658996169092e-06, "loss": 0.6994911432266235, "step": 12286 }, { "epoch": 2.2366433057249475, "grad_norm": 11.375, "learning_rate": 1.61161433231388e-06, "loss": 1.4782464504241943, "step": 12288 }, { "epoch": 2.2370073723491397, "grad_norm": 11.625, "learning_rate": 1.6110629687508217e-06, "loss": 1.3820947408676147, "step": 12290 }, { "epoch": 2.237371438973332, "grad_norm": 14.375, "learning_rate": 1.610511809008643e-06, "loss": 1.4437205791473389, "step": 12292 }, { "epoch": 2.2377355055975245, "grad_norm": 29.25, "learning_rate": 1.6099608531682256e-06, "loss": 1.6471656560897827, "step": 12294 }, { "epoch": 2.2380995722217167, "grad_norm": 8.1875, "learning_rate": 1.60941010131042e-06, "loss": 1.7767752408981323, "step": 12296 }, { "epoch": 2.238463638845909, "grad_norm": 2.21875, "learning_rate": 1.6088595535160458e-06, "loss": 0.8517869710922241, "step": 12298 }, { "epoch": 2.238827705470101, "grad_norm": 8.4375, "learning_rate": 1.6083092098658957e-06, "loss": 1.2293821573257446, "step": 12300 }, { "epoch": 2.2391917720942933, "grad_norm": 11.25, "learning_rate": 1.6077590704407272e-06, "loss": 1.3938345909118652, "step": 12302 }, { "epoch": 2.2395558387184855, "grad_norm": 8.9375, "learning_rate": 1.6072091353212737e-06, "loss": 1.5042243003845215, "step": 12304 }, { "epoch": 2.2399199053426777, "grad_norm": 4.25, "learning_rate": 1.6066594045882353e-06, "loss": 1.3572784662246704, "step": 12306 }, { "epoch": 2.24028397196687, "grad_norm": 8.0, "learning_rate": 1.6061098783222823e-06, "loss": 0.9582677483558655, "step": 12308 }, { "epoch": 2.240648038591062, "grad_norm": 11.1875, "learning_rate": 1.6055605566040565e-06, "loss": 1.9111570119857788, "step": 12310 }, { "epoch": 2.2410121052152543, "grad_norm": 22.5, "learning_rate": 1.6050114395141663e-06, "loss": 1.7372725009918213, "step": 12312 }, { "epoch": 2.2413761718394465, "grad_norm": 12.25, "learning_rate": 1.6044625271331948e-06, "loss": 1.4749634265899658, "step": 12314 }, { "epoch": 2.2417402384636387, "grad_norm": 13.125, "learning_rate": 1.603913819541692e-06, "loss": 1.3942590951919556, "step": 12316 }, { "epoch": 2.242104305087831, "grad_norm": 8.75, "learning_rate": 1.603365316820178e-06, "loss": 1.6264064311981201, "step": 12318 }, { "epoch": 2.2424683717120235, "grad_norm": 8.9375, "learning_rate": 1.6028170190491446e-06, "loss": 1.685206651687622, "step": 12320 }, { "epoch": 2.2428324383362157, "grad_norm": 11.0, "learning_rate": 1.60226892630905e-06, "loss": 1.4618518352508545, "step": 12322 }, { "epoch": 2.243196504960408, "grad_norm": 8.75, "learning_rate": 1.6017210386803264e-06, "loss": 1.468754768371582, "step": 12324 }, { "epoch": 2.2435605715846, "grad_norm": 8.875, "learning_rate": 1.601173356243374e-06, "loss": 1.5624163150787354, "step": 12326 }, { "epoch": 2.2439246382087923, "grad_norm": 27.5, "learning_rate": 1.6006258790785622e-06, "loss": 1.4366270303726196, "step": 12328 }, { "epoch": 2.2442887048329845, "grad_norm": 17.5, "learning_rate": 1.6000786072662326e-06, "loss": 1.4687341451644897, "step": 12330 }, { "epoch": 2.2446527714571767, "grad_norm": 4.40625, "learning_rate": 1.5995315408866925e-06, "loss": 1.3800256252288818, "step": 12332 }, { "epoch": 2.245016838081369, "grad_norm": 22.75, "learning_rate": 1.5989846800202235e-06, "loss": 1.4765992164611816, "step": 12334 }, { "epoch": 2.245380904705561, "grad_norm": 35.25, "learning_rate": 1.5984380247470755e-06, "loss": 1.745290994644165, "step": 12336 }, { "epoch": 2.2457449713297533, "grad_norm": 11.5625, "learning_rate": 1.597891575147467e-06, "loss": 1.479117751121521, "step": 12338 }, { "epoch": 2.2461090379539455, "grad_norm": 25.625, "learning_rate": 1.5973453313015884e-06, "loss": 1.6162757873535156, "step": 12340 }, { "epoch": 2.2464731045781376, "grad_norm": 9.8125, "learning_rate": 1.5967992932895963e-06, "loss": 1.4312381744384766, "step": 12342 }, { "epoch": 2.24683717120233, "grad_norm": 9.625, "learning_rate": 1.5962534611916223e-06, "loss": 1.2580519914627075, "step": 12344 }, { "epoch": 2.2472012378265225, "grad_norm": 20.5, "learning_rate": 1.5957078350877636e-06, "loss": 1.4227744340896606, "step": 12346 }, { "epoch": 2.2475653044507147, "grad_norm": 9.0, "learning_rate": 1.595162415058089e-06, "loss": 1.4911073446273804, "step": 12348 }, { "epoch": 2.247929371074907, "grad_norm": 3.96875, "learning_rate": 1.5946172011826376e-06, "loss": 0.872816264629364, "step": 12350 }, { "epoch": 2.248293437699099, "grad_norm": 10.6875, "learning_rate": 1.594072193541415e-06, "loss": 1.1604639291763306, "step": 12352 }, { "epoch": 2.2486575043232913, "grad_norm": 34.75, "learning_rate": 1.5935273922144013e-06, "loss": 1.5771641731262207, "step": 12354 }, { "epoch": 2.2490215709474835, "grad_norm": 7.78125, "learning_rate": 1.5929827972815423e-06, "loss": 1.4863736629486084, "step": 12356 }, { "epoch": 2.2493856375716756, "grad_norm": 21.25, "learning_rate": 1.592438408822756e-06, "loss": 1.3807090520858765, "step": 12358 }, { "epoch": 2.249749704195868, "grad_norm": 9.75, "learning_rate": 1.5918942269179294e-06, "loss": 1.4658913612365723, "step": 12360 }, { "epoch": 2.25011377082006, "grad_norm": 8.125, "learning_rate": 1.591350251646917e-06, "loss": 1.2913917303085327, "step": 12362 }, { "epoch": 2.2504778374442522, "grad_norm": 8.3125, "learning_rate": 1.5908064830895473e-06, "loss": 1.4570693969726562, "step": 12364 }, { "epoch": 2.2508419040684444, "grad_norm": 13.875, "learning_rate": 1.5902629213256148e-06, "loss": 1.5609384775161743, "step": 12366 }, { "epoch": 2.2512059706926366, "grad_norm": 11.6875, "learning_rate": 1.589719566434886e-06, "loss": 1.3474090099334717, "step": 12368 }, { "epoch": 2.251570037316829, "grad_norm": 12.3125, "learning_rate": 1.5891764184970959e-06, "loss": 1.6190539598464966, "step": 12370 }, { "epoch": 2.2519341039410214, "grad_norm": 17.625, "learning_rate": 1.5886334775919476e-06, "loss": 1.40696382522583, "step": 12372 }, { "epoch": 2.252298170565213, "grad_norm": 15.6875, "learning_rate": 1.5880907437991172e-06, "loss": 1.6238670349121094, "step": 12374 }, { "epoch": 2.252662237189406, "grad_norm": 9.5625, "learning_rate": 1.5875482171982482e-06, "loss": 1.4120055437088013, "step": 12376 }, { "epoch": 2.253026303813598, "grad_norm": 11.125, "learning_rate": 1.587005897868954e-06, "loss": 1.5295876264572144, "step": 12378 }, { "epoch": 2.2533903704377902, "grad_norm": 9.9375, "learning_rate": 1.5864637858908188e-06, "loss": 1.5951356887817383, "step": 12380 }, { "epoch": 2.2537544370619824, "grad_norm": 11.5625, "learning_rate": 1.585921881343393e-06, "loss": 1.8884718418121338, "step": 12382 }, { "epoch": 2.2541185036861746, "grad_norm": 13.3125, "learning_rate": 1.5853801843062011e-06, "loss": 1.7638633251190186, "step": 12384 }, { "epoch": 2.254482570310367, "grad_norm": 17.0, "learning_rate": 1.5848386948587343e-06, "loss": 1.535719871520996, "step": 12386 }, { "epoch": 2.254846636934559, "grad_norm": 13.4375, "learning_rate": 1.584297413080454e-06, "loss": 1.3890198469161987, "step": 12388 }, { "epoch": 2.255210703558751, "grad_norm": 11.4375, "learning_rate": 1.5837563390507907e-06, "loss": 1.531652569770813, "step": 12390 }, { "epoch": 2.2555747701829434, "grad_norm": 21.375, "learning_rate": 1.5832154728491452e-06, "loss": 1.6434849500656128, "step": 12392 }, { "epoch": 2.2559388368071356, "grad_norm": 11.375, "learning_rate": 1.5826748145548873e-06, "loss": 1.2316720485687256, "step": 12394 }, { "epoch": 2.2563029034313278, "grad_norm": 8.5625, "learning_rate": 1.5821343642473563e-06, "loss": 1.4487545490264893, "step": 12396 }, { "epoch": 2.25666697005552, "grad_norm": 6.9375, "learning_rate": 1.5815941220058618e-06, "loss": 1.2573072910308838, "step": 12398 }, { "epoch": 2.257031036679712, "grad_norm": 12.5625, "learning_rate": 1.5810540879096812e-06, "loss": 1.4514893293380737, "step": 12400 }, { "epoch": 2.257395103303905, "grad_norm": 11.9375, "learning_rate": 1.5805142620380625e-06, "loss": 1.320814609527588, "step": 12402 }, { "epoch": 2.257759169928097, "grad_norm": 10.1875, "learning_rate": 1.5799746444702236e-06, "loss": 1.299497127532959, "step": 12404 }, { "epoch": 2.258123236552289, "grad_norm": 19.625, "learning_rate": 1.5794352352853505e-06, "loss": 1.1353826522827148, "step": 12406 }, { "epoch": 2.2584873031764814, "grad_norm": 10.9375, "learning_rate": 1.5788960345625995e-06, "loss": 1.3488101959228516, "step": 12408 }, { "epoch": 2.2588513698006736, "grad_norm": 15.5625, "learning_rate": 1.5783570423810965e-06, "loss": 1.6760532855987549, "step": 12410 }, { "epoch": 2.2592154364248658, "grad_norm": 11.6875, "learning_rate": 1.5778182588199358e-06, "loss": 1.9968559741973877, "step": 12412 }, { "epoch": 2.259579503049058, "grad_norm": 11.5625, "learning_rate": 1.5772796839581821e-06, "loss": 1.4470630884170532, "step": 12414 }, { "epoch": 2.25994356967325, "grad_norm": 10.875, "learning_rate": 1.5767413178748691e-06, "loss": 1.539105772972107, "step": 12416 }, { "epoch": 2.2603076362974424, "grad_norm": 11.25, "learning_rate": 1.5762031606489999e-06, "loss": 1.4229347705841064, "step": 12418 }, { "epoch": 2.2606717029216346, "grad_norm": 8.0, "learning_rate": 1.5756652123595465e-06, "loss": 0.885093629360199, "step": 12420 }, { "epoch": 2.2610357695458267, "grad_norm": 14.125, "learning_rate": 1.575127473085451e-06, "loss": 0.6655827760696411, "step": 12422 }, { "epoch": 2.261399836170019, "grad_norm": 10.8125, "learning_rate": 1.5745899429056242e-06, "loss": 1.6897716522216797, "step": 12424 }, { "epoch": 2.261763902794211, "grad_norm": 6.03125, "learning_rate": 1.5740526218989466e-06, "loss": 1.2048144340515137, "step": 12426 }, { "epoch": 2.2621279694184038, "grad_norm": 10.125, "learning_rate": 1.573515510144268e-06, "loss": 1.3830907344818115, "step": 12428 }, { "epoch": 2.262492036042596, "grad_norm": 4.65625, "learning_rate": 1.572978607720407e-06, "loss": 1.5929877758026123, "step": 12430 }, { "epoch": 2.262856102666788, "grad_norm": 10.3125, "learning_rate": 1.5724419147061523e-06, "loss": 1.107445478439331, "step": 12432 }, { "epoch": 2.2632201692909804, "grad_norm": 7.46875, "learning_rate": 1.5719054311802612e-06, "loss": 1.279052495956421, "step": 12434 }, { "epoch": 2.2635842359151725, "grad_norm": 8.375, "learning_rate": 1.5713691572214607e-06, "loss": 1.4521898031234741, "step": 12436 }, { "epoch": 2.2639483025393647, "grad_norm": 12.9375, "learning_rate": 1.5708330929084463e-06, "loss": 1.4216667413711548, "step": 12438 }, { "epoch": 2.264312369163557, "grad_norm": 17.625, "learning_rate": 1.5702972383198836e-06, "loss": 1.4985642433166504, "step": 12440 }, { "epoch": 2.264676435787749, "grad_norm": 5.84375, "learning_rate": 1.5697615935344074e-06, "loss": 1.2892903089523315, "step": 12442 }, { "epoch": 2.2650405024119413, "grad_norm": 8.0, "learning_rate": 1.5692261586306209e-06, "loss": 1.0783270597457886, "step": 12444 }, { "epoch": 2.2654045690361335, "grad_norm": 12.25, "learning_rate": 1.5686909336870974e-06, "loss": 1.8017053604125977, "step": 12446 }, { "epoch": 2.2657686356603257, "grad_norm": 27.125, "learning_rate": 1.5681559187823785e-06, "loss": 1.4044363498687744, "step": 12448 }, { "epoch": 2.266132702284518, "grad_norm": 15.0, "learning_rate": 1.567621113994976e-06, "loss": 1.505138635635376, "step": 12450 }, { "epoch": 2.26649676890871, "grad_norm": 7.90625, "learning_rate": 1.56708651940337e-06, "loss": 1.4537688493728638, "step": 12452 }, { "epoch": 2.2668608355329027, "grad_norm": 11.75, "learning_rate": 1.5665521350860101e-06, "loss": 1.393488883972168, "step": 12454 }, { "epoch": 2.267224902157095, "grad_norm": 43.5, "learning_rate": 1.5660179611213152e-06, "loss": 1.892049789428711, "step": 12456 }, { "epoch": 2.267588968781287, "grad_norm": 15.875, "learning_rate": 1.5654839975876731e-06, "loss": 1.1731960773468018, "step": 12458 }, { "epoch": 2.2679530354054793, "grad_norm": 11.5, "learning_rate": 1.564950244563441e-06, "loss": 1.429764747619629, "step": 12460 }, { "epoch": 2.2683171020296715, "grad_norm": 19.75, "learning_rate": 1.5644167021269444e-06, "loss": 1.4573893547058105, "step": 12462 }, { "epoch": 2.2686811686538637, "grad_norm": 16.0, "learning_rate": 1.563883370356479e-06, "loss": 1.514305591583252, "step": 12464 }, { "epoch": 2.269045235278056, "grad_norm": 24.875, "learning_rate": 1.5633502493303087e-06, "loss": 1.2604882717132568, "step": 12466 }, { "epoch": 2.269409301902248, "grad_norm": 11.75, "learning_rate": 1.5628173391266674e-06, "loss": 1.7389116287231445, "step": 12468 }, { "epoch": 2.2697733685264403, "grad_norm": 31.375, "learning_rate": 1.562284639823757e-06, "loss": 1.2206616401672363, "step": 12470 }, { "epoch": 2.2701374351506325, "grad_norm": 11.75, "learning_rate": 1.5617521514997494e-06, "loss": 1.4983587265014648, "step": 12472 }, { "epoch": 2.2705015017748247, "grad_norm": 13.1875, "learning_rate": 1.5612198742327846e-06, "loss": 1.357822060585022, "step": 12474 }, { "epoch": 2.270865568399017, "grad_norm": 24.5, "learning_rate": 1.5606878081009724e-06, "loss": 1.7816073894500732, "step": 12476 }, { "epoch": 2.271229635023209, "grad_norm": 14.375, "learning_rate": 1.5601559531823917e-06, "loss": 1.7828789949417114, "step": 12478 }, { "epoch": 2.2715937016474017, "grad_norm": 13.4375, "learning_rate": 1.5596243095550891e-06, "loss": 1.7889257669448853, "step": 12480 }, { "epoch": 2.2719577682715935, "grad_norm": 8.75, "learning_rate": 1.5590928772970823e-06, "loss": 1.6126959323883057, "step": 12482 }, { "epoch": 2.272321834895786, "grad_norm": 11.0, "learning_rate": 1.558561656486356e-06, "loss": 1.0573344230651855, "step": 12484 }, { "epoch": 2.2726859015199783, "grad_norm": 46.0, "learning_rate": 1.558030647200865e-06, "loss": 1.019694209098816, "step": 12486 }, { "epoch": 2.2730499681441705, "grad_norm": 10.375, "learning_rate": 1.5574998495185325e-06, "loss": 1.4526140689849854, "step": 12488 }, { "epoch": 2.2734140347683627, "grad_norm": 7.84375, "learning_rate": 1.5569692635172518e-06, "loss": 0.9422599077224731, "step": 12490 }, { "epoch": 2.273778101392555, "grad_norm": 11.0, "learning_rate": 1.5564388892748827e-06, "loss": 1.4089980125427246, "step": 12492 }, { "epoch": 2.274142168016747, "grad_norm": 9.6875, "learning_rate": 1.555908726869257e-06, "loss": 1.8546347618103027, "step": 12494 }, { "epoch": 2.2745062346409393, "grad_norm": 102.0, "learning_rate": 1.555378776378173e-06, "loss": 1.5285102128982544, "step": 12496 }, { "epoch": 2.2748703012651315, "grad_norm": 12.1875, "learning_rate": 1.554849037879399e-06, "loss": 1.5162465572357178, "step": 12498 }, { "epoch": 2.2752343678893237, "grad_norm": 12.0625, "learning_rate": 1.5543195114506724e-06, "loss": 1.5895440578460693, "step": 12500 }, { "epoch": 2.275598434513516, "grad_norm": 31.625, "learning_rate": 1.5537901971696984e-06, "loss": 1.978537678718567, "step": 12502 }, { "epoch": 2.275962501137708, "grad_norm": 6.84375, "learning_rate": 1.553261095114152e-06, "loss": 1.6220439672470093, "step": 12504 }, { "epoch": 2.2763265677619007, "grad_norm": 8.25, "learning_rate": 1.5527322053616767e-06, "loss": 1.3209445476531982, "step": 12506 }, { "epoch": 2.2766906343860924, "grad_norm": 8.0625, "learning_rate": 1.552203527989885e-06, "loss": 1.5111479759216309, "step": 12508 }, { "epoch": 2.277054701010285, "grad_norm": 5.125, "learning_rate": 1.551675063076358e-06, "loss": 1.090872883796692, "step": 12510 }, { "epoch": 2.2774187676344773, "grad_norm": 15.9375, "learning_rate": 1.551146810698646e-06, "loss": 1.2567442655563354, "step": 12512 }, { "epoch": 2.2777828342586695, "grad_norm": 9.4375, "learning_rate": 1.550618770934268e-06, "loss": 1.4358798265457153, "step": 12514 }, { "epoch": 2.2781469008828616, "grad_norm": 15.0625, "learning_rate": 1.5500909438607115e-06, "loss": 1.5757757425308228, "step": 12516 }, { "epoch": 2.278510967507054, "grad_norm": 15.75, "learning_rate": 1.5495633295554332e-06, "loss": 1.732880711555481, "step": 12518 }, { "epoch": 2.278875034131246, "grad_norm": 24.625, "learning_rate": 1.5490359280958579e-06, "loss": 1.745815396308899, "step": 12520 }, { "epoch": 2.2792391007554382, "grad_norm": 13.5625, "learning_rate": 1.54850873955938e-06, "loss": 1.1320409774780273, "step": 12522 }, { "epoch": 2.2796031673796304, "grad_norm": 9.8125, "learning_rate": 1.5479817640233624e-06, "loss": 1.3969218730926514, "step": 12524 }, { "epoch": 2.2799672340038226, "grad_norm": 17.125, "learning_rate": 1.547455001565136e-06, "loss": 1.3373697996139526, "step": 12526 }, { "epoch": 2.280331300628015, "grad_norm": 9.6875, "learning_rate": 1.5469284522620022e-06, "loss": 0.9119505882263184, "step": 12528 }, { "epoch": 2.280695367252207, "grad_norm": 6.90625, "learning_rate": 1.5464021161912285e-06, "loss": 1.4338898658752441, "step": 12530 }, { "epoch": 2.281059433876399, "grad_norm": 13.375, "learning_rate": 1.5458759934300536e-06, "loss": 1.3429218530654907, "step": 12532 }, { "epoch": 2.2814235005005914, "grad_norm": 16.75, "learning_rate": 1.5453500840556834e-06, "loss": 0.916692852973938, "step": 12534 }, { "epoch": 2.281787567124784, "grad_norm": 11.0, "learning_rate": 1.5448243881452934e-06, "loss": 1.6242117881774902, "step": 12536 }, { "epoch": 2.2821516337489762, "grad_norm": 17.625, "learning_rate": 1.5442989057760272e-06, "loss": 1.32588529586792, "step": 12538 }, { "epoch": 2.2825157003731684, "grad_norm": 11.5, "learning_rate": 1.543773637024997e-06, "loss": 1.3498769998550415, "step": 12540 }, { "epoch": 2.2828797669973606, "grad_norm": 11.75, "learning_rate": 1.5432485819692842e-06, "loss": 1.480819582939148, "step": 12542 }, { "epoch": 2.283243833621553, "grad_norm": 9.75, "learning_rate": 1.542723740685938e-06, "loss": 1.4214507341384888, "step": 12544 }, { "epoch": 2.283607900245745, "grad_norm": 8.9375, "learning_rate": 1.542199113251977e-06, "loss": 1.4767481088638306, "step": 12546 }, { "epoch": 2.283971966869937, "grad_norm": 8.0625, "learning_rate": 1.5416746997443884e-06, "loss": 1.2315850257873535, "step": 12548 }, { "epoch": 2.2843360334941294, "grad_norm": 11.4375, "learning_rate": 1.5411505002401275e-06, "loss": 1.0710111856460571, "step": 12550 }, { "epoch": 2.2847001001183216, "grad_norm": 4.5625, "learning_rate": 1.5406265148161183e-06, "loss": 1.2519993782043457, "step": 12552 }, { "epoch": 2.285064166742514, "grad_norm": 10.4375, "learning_rate": 1.540102743549254e-06, "loss": 1.3542646169662476, "step": 12554 }, { "epoch": 2.285428233366706, "grad_norm": 6.96875, "learning_rate": 1.5395791865163957e-06, "loss": 1.4727967977523804, "step": 12556 }, { "epoch": 2.285792299990898, "grad_norm": 6.84375, "learning_rate": 1.539055843794373e-06, "loss": 1.5567904710769653, "step": 12558 }, { "epoch": 2.2861563666150904, "grad_norm": 12.25, "learning_rate": 1.5385327154599846e-06, "loss": 1.1173269748687744, "step": 12560 }, { "epoch": 2.286520433239283, "grad_norm": 10.9375, "learning_rate": 1.5380098015899972e-06, "loss": 1.3361327648162842, "step": 12562 }, { "epoch": 2.286884499863475, "grad_norm": 8.8125, "learning_rate": 1.5374871022611467e-06, "loss": 1.285651445388794, "step": 12564 }, { "epoch": 2.2872485664876674, "grad_norm": 9.75, "learning_rate": 1.536964617550137e-06, "loss": 1.582836627960205, "step": 12566 }, { "epoch": 2.2876126331118596, "grad_norm": 22.875, "learning_rate": 1.5364423475336405e-06, "loss": 1.2831053733825684, "step": 12568 }, { "epoch": 2.2879766997360518, "grad_norm": 33.5, "learning_rate": 1.535920292288298e-06, "loss": 1.605603814125061, "step": 12570 }, { "epoch": 2.288340766360244, "grad_norm": 8.875, "learning_rate": 1.5353984518907195e-06, "loss": 1.3263009786605835, "step": 12572 }, { "epoch": 2.288704832984436, "grad_norm": 6.8125, "learning_rate": 1.5348768264174821e-06, "loss": 1.0126454830169678, "step": 12574 }, { "epoch": 2.2890688996086284, "grad_norm": 4.03125, "learning_rate": 1.5343554159451336e-06, "loss": 1.14657461643219, "step": 12576 }, { "epoch": 2.2894329662328206, "grad_norm": 4.21875, "learning_rate": 1.5338342205501874e-06, "loss": 1.3293346166610718, "step": 12578 }, { "epoch": 2.2897970328570127, "grad_norm": 12.25, "learning_rate": 1.5333132403091278e-06, "loss": 1.091294765472412, "step": 12580 }, { "epoch": 2.290161099481205, "grad_norm": 7.71875, "learning_rate": 1.532792475298406e-06, "loss": 1.4288572072982788, "step": 12582 }, { "epoch": 2.290525166105397, "grad_norm": 9.1875, "learning_rate": 1.5322719255944427e-06, "loss": 1.4103387594223022, "step": 12584 }, { "epoch": 2.2908892327295893, "grad_norm": 10.25, "learning_rate": 1.5317515912736259e-06, "loss": 1.4835950136184692, "step": 12586 }, { "epoch": 2.291253299353782, "grad_norm": 13.5625, "learning_rate": 1.5312314724123128e-06, "loss": 1.3269966840744019, "step": 12588 }, { "epoch": 2.291617365977974, "grad_norm": 13.5625, "learning_rate": 1.5307115690868289e-06, "loss": 1.1279107332229614, "step": 12590 }, { "epoch": 2.2919814326021664, "grad_norm": 7.9375, "learning_rate": 1.5301918813734673e-06, "loss": 0.5846623182296753, "step": 12592 }, { "epoch": 2.2923454992263586, "grad_norm": 5.53125, "learning_rate": 1.529672409348491e-06, "loss": 1.2363202571868896, "step": 12594 }, { "epoch": 2.2927095658505507, "grad_norm": 11.5, "learning_rate": 1.5291531530881299e-06, "loss": 1.5694537162780762, "step": 12596 }, { "epoch": 2.293073632474743, "grad_norm": 12.8125, "learning_rate": 1.5286341126685825e-06, "loss": 1.369071364402771, "step": 12598 }, { "epoch": 2.293437699098935, "grad_norm": 12.0625, "learning_rate": 1.5281152881660163e-06, "loss": 1.3592339754104614, "step": 12600 }, { "epoch": 2.2938017657231273, "grad_norm": 10.125, "learning_rate": 1.5275966796565665e-06, "loss": 1.4833550453186035, "step": 12602 }, { "epoch": 2.2941658323473195, "grad_norm": 16.125, "learning_rate": 1.5270782872163367e-06, "loss": 1.3515369892120361, "step": 12604 }, { "epoch": 2.2945298989715117, "grad_norm": 4.59375, "learning_rate": 1.526560110921399e-06, "loss": 1.2067272663116455, "step": 12606 }, { "epoch": 2.294893965595704, "grad_norm": 6.78125, "learning_rate": 1.526042150847794e-06, "loss": 1.2377903461456299, "step": 12608 }, { "epoch": 2.295258032219896, "grad_norm": 7.0, "learning_rate": 1.5255244070715298e-06, "loss": 1.2793469429016113, "step": 12610 }, { "epoch": 2.2956220988440883, "grad_norm": 8.5625, "learning_rate": 1.5250068796685833e-06, "loss": 1.3406494855880737, "step": 12612 }, { "epoch": 2.295986165468281, "grad_norm": 5.9375, "learning_rate": 1.5244895687148994e-06, "loss": 1.2264078855514526, "step": 12614 }, { "epoch": 2.2963502320924727, "grad_norm": 63.0, "learning_rate": 1.5239724742863914e-06, "loss": 1.5421836376190186, "step": 12616 }, { "epoch": 2.2967142987166653, "grad_norm": 14.875, "learning_rate": 1.5234555964589415e-06, "loss": 1.8008121252059937, "step": 12618 }, { "epoch": 2.2970783653408575, "grad_norm": 8.75, "learning_rate": 1.522938935308399e-06, "loss": 1.0468926429748535, "step": 12620 }, { "epoch": 2.2974424319650497, "grad_norm": 31.375, "learning_rate": 1.522422490910581e-06, "loss": 1.2860817909240723, "step": 12622 }, { "epoch": 2.297806498589242, "grad_norm": 5.59375, "learning_rate": 1.521906263341275e-06, "loss": 0.12845967710018158, "step": 12624 }, { "epoch": 2.298170565213434, "grad_norm": 198.0, "learning_rate": 1.5213902526762348e-06, "loss": 0.43172675371170044, "step": 12626 }, { "epoch": 2.2985346318376263, "grad_norm": 21.75, "learning_rate": 1.5208744589911823e-06, "loss": 1.3381685018539429, "step": 12628 }, { "epoch": 2.2988986984618185, "grad_norm": 184.0, "learning_rate": 1.5203588823618087e-06, "loss": 0.9716833829879761, "step": 12630 }, { "epoch": 2.2992627650860107, "grad_norm": 5.71875, "learning_rate": 1.5198435228637726e-06, "loss": 1.551675796508789, "step": 12632 }, { "epoch": 2.299626831710203, "grad_norm": 5.4375, "learning_rate": 1.519328380572701e-06, "loss": 1.4420608282089233, "step": 12634 }, { "epoch": 2.299990898334395, "grad_norm": 6.34375, "learning_rate": 1.518813455564189e-06, "loss": 0.9131681323051453, "step": 12636 }, { "epoch": 2.3003549649585873, "grad_norm": 9.0, "learning_rate": 1.5182987479137994e-06, "loss": 1.5336073637008667, "step": 12638 }, { "epoch": 2.3007190315827795, "grad_norm": 17.25, "learning_rate": 1.5177842576970641e-06, "loss": 1.2791965007781982, "step": 12640 }, { "epoch": 2.3010830982069717, "grad_norm": 12.375, "learning_rate": 1.5172699849894821e-06, "loss": 1.0568195581436157, "step": 12642 }, { "epoch": 2.3014471648311643, "grad_norm": 16.125, "learning_rate": 1.5167559298665206e-06, "loss": 1.4356865882873535, "step": 12644 }, { "epoch": 2.3018112314553565, "grad_norm": 21.5, "learning_rate": 1.5162420924036152e-06, "loss": 1.3333125114440918, "step": 12646 }, { "epoch": 2.3021752980795487, "grad_norm": 59.25, "learning_rate": 1.51572847267617e-06, "loss": 0.6198864579200745, "step": 12648 }, { "epoch": 2.302539364703741, "grad_norm": 18.875, "learning_rate": 1.5152150707595558e-06, "loss": 1.824110507965088, "step": 12650 }, { "epoch": 2.302903431327933, "grad_norm": 30.625, "learning_rate": 1.514701886729113e-06, "loss": 1.3971707820892334, "step": 12652 }, { "epoch": 2.3032674979521253, "grad_norm": 9.875, "learning_rate": 1.5141889206601488e-06, "loss": 1.489194393157959, "step": 12654 }, { "epoch": 2.3036315645763175, "grad_norm": 11.9375, "learning_rate": 1.513676172627939e-06, "loss": 1.4018131494522095, "step": 12656 }, { "epoch": 2.3039956312005097, "grad_norm": 25.125, "learning_rate": 1.5131636427077274e-06, "loss": 1.383791208267212, "step": 12658 }, { "epoch": 2.304359697824702, "grad_norm": 7.21875, "learning_rate": 1.5126513309747255e-06, "loss": 0.8677065372467041, "step": 12660 }, { "epoch": 2.304723764448894, "grad_norm": 21.5, "learning_rate": 1.512139237504113e-06, "loss": 1.1065874099731445, "step": 12662 }, { "epoch": 2.3050878310730862, "grad_norm": 4.0, "learning_rate": 1.5116273623710375e-06, "loss": 0.5159171223640442, "step": 12664 }, { "epoch": 2.3054518976972784, "grad_norm": 10.5, "learning_rate": 1.5111157056506155e-06, "loss": 1.3095202445983887, "step": 12666 }, { "epoch": 2.3058159643214706, "grad_norm": 12.125, "learning_rate": 1.510604267417929e-06, "loss": 1.2780394554138184, "step": 12668 }, { "epoch": 2.3061800309456633, "grad_norm": 7.0625, "learning_rate": 1.5100930477480305e-06, "loss": 1.5795389413833618, "step": 12670 }, { "epoch": 2.3065440975698555, "grad_norm": 7.21875, "learning_rate": 1.5095820467159391e-06, "loss": 0.9675000309944153, "step": 12672 }, { "epoch": 2.3069081641940477, "grad_norm": 7.96875, "learning_rate": 1.5090712643966423e-06, "loss": 1.3001408576965332, "step": 12674 }, { "epoch": 2.30727223081824, "grad_norm": 10.9375, "learning_rate": 1.5085607008650955e-06, "loss": 1.3690695762634277, "step": 12676 }, { "epoch": 2.307636297442432, "grad_norm": 5.5625, "learning_rate": 1.5080503561962212e-06, "loss": 1.2771575450897217, "step": 12678 }, { "epoch": 2.3080003640666242, "grad_norm": 7.625, "learning_rate": 1.507540230464911e-06, "loss": 1.159590721130371, "step": 12680 }, { "epoch": 2.3083644306908164, "grad_norm": 13.875, "learning_rate": 1.5070303237460235e-06, "loss": 1.3532944917678833, "step": 12682 }, { "epoch": 2.3087284973150086, "grad_norm": 26.875, "learning_rate": 1.5065206361143852e-06, "loss": 1.2736201286315918, "step": 12684 }, { "epoch": 2.309092563939201, "grad_norm": 60.5, "learning_rate": 1.5060111676447914e-06, "loss": 0.7054938077926636, "step": 12686 }, { "epoch": 2.309456630563393, "grad_norm": 7.09375, "learning_rate": 1.505501918412004e-06, "loss": 1.06803297996521, "step": 12688 }, { "epoch": 2.309820697187585, "grad_norm": 9.6875, "learning_rate": 1.5049928884907536e-06, "loss": 1.3874268531799316, "step": 12690 }, { "epoch": 2.3101847638117774, "grad_norm": 10.875, "learning_rate": 1.5044840779557379e-06, "loss": 1.5525838136672974, "step": 12692 }, { "epoch": 2.3105488304359696, "grad_norm": 29.125, "learning_rate": 1.5039754868816227e-06, "loss": 1.3969459533691406, "step": 12694 }, { "epoch": 2.3109128970601622, "grad_norm": 13.875, "learning_rate": 1.5034671153430425e-06, "loss": 1.265852451324463, "step": 12696 }, { "epoch": 2.3112769636843544, "grad_norm": 27.875, "learning_rate": 1.502958963414598e-06, "loss": 1.1846003532409668, "step": 12698 }, { "epoch": 2.3116410303085466, "grad_norm": 17.5, "learning_rate": 1.5024510311708583e-06, "loss": 1.091532826423645, "step": 12700 }, { "epoch": 2.312005096932739, "grad_norm": 7.15625, "learning_rate": 1.5019433186863612e-06, "loss": 1.253321886062622, "step": 12702 }, { "epoch": 2.312369163556931, "grad_norm": 8.625, "learning_rate": 1.5014358260356106e-06, "loss": 1.3607995510101318, "step": 12704 }, { "epoch": 2.312733230181123, "grad_norm": 8.6875, "learning_rate": 1.5009285532930796e-06, "loss": 1.1928050518035889, "step": 12706 }, { "epoch": 2.3130972968053154, "grad_norm": 5.78125, "learning_rate": 1.5004215005332082e-06, "loss": 1.1566638946533203, "step": 12708 }, { "epoch": 2.3134613634295076, "grad_norm": 16.375, "learning_rate": 1.4999146678304044e-06, "loss": 1.54642653465271, "step": 12710 }, { "epoch": 2.3138254300537, "grad_norm": 12.75, "learning_rate": 1.4994080552590437e-06, "loss": 1.730119228363037, "step": 12712 }, { "epoch": 2.314189496677892, "grad_norm": 22.375, "learning_rate": 1.4989016628934695e-06, "loss": 1.4265369176864624, "step": 12714 }, { "epoch": 2.314553563302084, "grad_norm": 43.5, "learning_rate": 1.4983954908079929e-06, "loss": 1.4489706754684448, "step": 12716 }, { "epoch": 2.3149176299262764, "grad_norm": 5.9375, "learning_rate": 1.4978895390768925e-06, "loss": 1.1474939584732056, "step": 12718 }, { "epoch": 2.3152816965504686, "grad_norm": 12.8125, "learning_rate": 1.497383807774415e-06, "loss": 1.4372098445892334, "step": 12720 }, { "epoch": 2.315645763174661, "grad_norm": 9.375, "learning_rate": 1.4968782969747736e-06, "loss": 1.4117785692214966, "step": 12722 }, { "epoch": 2.316009829798853, "grad_norm": 19.5, "learning_rate": 1.496373006752151e-06, "loss": 1.0667873620986938, "step": 12724 }, { "epoch": 2.3163738964230456, "grad_norm": 7.9375, "learning_rate": 1.4958679371806956e-06, "loss": 1.2744520902633667, "step": 12726 }, { "epoch": 2.316737963047238, "grad_norm": 15.0, "learning_rate": 1.495363088334525e-06, "loss": 1.5304172039031982, "step": 12728 }, { "epoch": 2.31710202967143, "grad_norm": 14.5625, "learning_rate": 1.4948584602877233e-06, "loss": 1.7262049913406372, "step": 12730 }, { "epoch": 2.317466096295622, "grad_norm": 21.75, "learning_rate": 1.4943540531143428e-06, "loss": 1.7382382154464722, "step": 12732 }, { "epoch": 2.3178301629198144, "grad_norm": 11.1875, "learning_rate": 1.493849866888403e-06, "loss": 1.9376696348190308, "step": 12734 }, { "epoch": 2.3181942295440066, "grad_norm": 4.15625, "learning_rate": 1.4933459016838914e-06, "loss": 1.2478326559066772, "step": 12736 }, { "epoch": 2.3185582961681988, "grad_norm": 6.75, "learning_rate": 1.492842157574763e-06, "loss": 1.0598152875900269, "step": 12738 }, { "epoch": 2.318922362792391, "grad_norm": 27.0, "learning_rate": 1.4923386346349398e-06, "loss": 1.518973708152771, "step": 12740 }, { "epoch": 2.319286429416583, "grad_norm": 56.0, "learning_rate": 1.4918353329383117e-06, "loss": 1.2738940715789795, "step": 12742 }, { "epoch": 2.3196504960407753, "grad_norm": 12.125, "learning_rate": 1.491332252558737e-06, "loss": 1.4766879081726074, "step": 12744 }, { "epoch": 2.3200145626649675, "grad_norm": 14.625, "learning_rate": 1.4908293935700398e-06, "loss": 1.2525542974472046, "step": 12746 }, { "epoch": 2.3203786292891597, "grad_norm": 6.5625, "learning_rate": 1.4903267560460134e-06, "loss": 1.22401762008667, "step": 12748 }, { "epoch": 2.320742695913352, "grad_norm": 7.90625, "learning_rate": 1.4898243400604169e-06, "loss": 1.0784289836883545, "step": 12750 }, { "epoch": 2.3211067625375446, "grad_norm": 8.375, "learning_rate": 1.4893221456869783e-06, "loss": 1.6045362949371338, "step": 12752 }, { "epoch": 2.3214708291617367, "grad_norm": 6.25, "learning_rate": 1.4888201729993925e-06, "loss": 1.3778455257415771, "step": 12754 }, { "epoch": 2.321834895785929, "grad_norm": 11.9375, "learning_rate": 1.4883184220713224e-06, "loss": 1.2218191623687744, "step": 12756 }, { "epoch": 2.322198962410121, "grad_norm": 27.25, "learning_rate": 1.4878168929763972e-06, "loss": 1.3763898611068726, "step": 12758 }, { "epoch": 2.3225630290343133, "grad_norm": 10.5625, "learning_rate": 1.4873155857882148e-06, "loss": 1.164544701576233, "step": 12760 }, { "epoch": 2.3229270956585055, "grad_norm": 17.125, "learning_rate": 1.48681450058034e-06, "loss": 0.44405442476272583, "step": 12762 }, { "epoch": 2.3232911622826977, "grad_norm": 7.8125, "learning_rate": 1.4863136374263044e-06, "loss": 1.2949330806732178, "step": 12764 }, { "epoch": 2.32365522890689, "grad_norm": 6.5625, "learning_rate": 1.4858129963996083e-06, "loss": 1.571161150932312, "step": 12766 }, { "epoch": 2.324019295531082, "grad_norm": 33.5, "learning_rate": 1.4853125775737187e-06, "loss": 1.4868299961090088, "step": 12768 }, { "epoch": 2.3243833621552743, "grad_norm": 18.875, "learning_rate": 1.4848123810220693e-06, "loss": 1.7010656595230103, "step": 12770 }, { "epoch": 2.3247474287794665, "grad_norm": 9.6875, "learning_rate": 1.4843124068180632e-06, "loss": 1.5108423233032227, "step": 12772 }, { "epoch": 2.3251114954036587, "grad_norm": 10.0625, "learning_rate": 1.4838126550350684e-06, "loss": 1.2086381912231445, "step": 12774 }, { "epoch": 2.325475562027851, "grad_norm": 15.8125, "learning_rate": 1.483313125746422e-06, "loss": 1.6754951477050781, "step": 12776 }, { "epoch": 2.3258396286520435, "grad_norm": 14.0625, "learning_rate": 1.4828138190254276e-06, "loss": 1.8837347030639648, "step": 12778 }, { "epoch": 2.3262036952762357, "grad_norm": 13.5625, "learning_rate": 1.482314734945357e-06, "loss": 1.5949786901474, "step": 12780 }, { "epoch": 2.326567761900428, "grad_norm": 5.5, "learning_rate": 1.4818158735794483e-06, "loss": 1.2288943529129028, "step": 12782 }, { "epoch": 2.32693182852462, "grad_norm": 16.5, "learning_rate": 1.4813172350009074e-06, "loss": 0.8689623475074768, "step": 12784 }, { "epoch": 2.3272958951488123, "grad_norm": 18.125, "learning_rate": 1.4808188192829076e-06, "loss": 0.6625009775161743, "step": 12786 }, { "epoch": 2.3276599617730045, "grad_norm": 21.625, "learning_rate": 1.4803206264985891e-06, "loss": 1.7881476879119873, "step": 12788 }, { "epoch": 2.3280240283971967, "grad_norm": 9.1875, "learning_rate": 1.4798226567210605e-06, "loss": 1.4295833110809326, "step": 12790 }, { "epoch": 2.328388095021389, "grad_norm": 13.0625, "learning_rate": 1.4793249100233962e-06, "loss": 1.4551790952682495, "step": 12792 }, { "epoch": 2.328752161645581, "grad_norm": 6.6875, "learning_rate": 1.4788273864786382e-06, "loss": 1.314556360244751, "step": 12794 }, { "epoch": 2.3291162282697733, "grad_norm": 6.96875, "learning_rate": 1.4783300861597965e-06, "loss": 1.07609224319458, "step": 12796 }, { "epoch": 2.3294802948939655, "grad_norm": 7.9375, "learning_rate": 1.4778330091398482e-06, "loss": 0.8711978793144226, "step": 12798 }, { "epoch": 2.3298443615181577, "grad_norm": 30.875, "learning_rate": 1.4773361554917367e-06, "loss": 1.1399784088134766, "step": 12800 }, { "epoch": 2.33020842814235, "grad_norm": 17.5, "learning_rate": 1.4768395252883737e-06, "loss": 1.6996500492095947, "step": 12802 }, { "epoch": 2.3305724947665425, "grad_norm": 16.5, "learning_rate": 1.4763431186026378e-06, "loss": 1.6008769273757935, "step": 12804 }, { "epoch": 2.3309365613907347, "grad_norm": 12.0, "learning_rate": 1.475846935507374e-06, "loss": 1.4288322925567627, "step": 12806 }, { "epoch": 2.331300628014927, "grad_norm": 12.75, "learning_rate": 1.4753509760753956e-06, "loss": 1.8797374963760376, "step": 12808 }, { "epoch": 2.331664694639119, "grad_norm": 10.375, "learning_rate": 1.4748552403794827e-06, "loss": 1.3842031955718994, "step": 12810 }, { "epoch": 2.3320287612633113, "grad_norm": 23.875, "learning_rate": 1.4743597284923824e-06, "loss": 1.2714418172836304, "step": 12812 }, { "epoch": 2.3323928278875035, "grad_norm": 73.5, "learning_rate": 1.473864440486809e-06, "loss": 0.7885069847106934, "step": 12814 }, { "epoch": 2.3327568945116957, "grad_norm": 4.25, "learning_rate": 1.4733693764354442e-06, "loss": 0.9025139808654785, "step": 12816 }, { "epoch": 2.333120961135888, "grad_norm": 19.625, "learning_rate": 1.4728745364109364e-06, "loss": 1.0751862525939941, "step": 12818 }, { "epoch": 2.33348502776008, "grad_norm": 8.375, "learning_rate": 1.4723799204859016e-06, "loss": 1.555716633796692, "step": 12820 }, { "epoch": 2.3338490943842722, "grad_norm": 13.0, "learning_rate": 1.4718855287329226e-06, "loss": 1.2870285511016846, "step": 12822 }, { "epoch": 2.3342131610084644, "grad_norm": 15.0, "learning_rate": 1.4713913612245492e-06, "loss": 1.0989220142364502, "step": 12824 }, { "epoch": 2.3345772276326566, "grad_norm": 27.25, "learning_rate": 1.470897418033299e-06, "loss": 1.764319658279419, "step": 12826 }, { "epoch": 2.334941294256849, "grad_norm": 4.78125, "learning_rate": 1.470403699231655e-06, "loss": 0.9202785491943359, "step": 12828 }, { "epoch": 2.3353053608810415, "grad_norm": 19.0, "learning_rate": 1.46991020489207e-06, "loss": 1.3015995025634766, "step": 12830 }, { "epoch": 2.335669427505233, "grad_norm": 124.0, "learning_rate": 1.469416935086961e-06, "loss": 1.7507283687591553, "step": 12832 }, { "epoch": 2.336033494129426, "grad_norm": 6.3125, "learning_rate": 1.4689238898887144e-06, "loss": 1.1765103340148926, "step": 12834 }, { "epoch": 2.336397560753618, "grad_norm": 11.3125, "learning_rate": 1.4684310693696815e-06, "loss": 1.5808298587799072, "step": 12836 }, { "epoch": 2.3367616273778102, "grad_norm": 13.25, "learning_rate": 1.4679384736021827e-06, "loss": 1.6481205224990845, "step": 12838 }, { "epoch": 2.3371256940020024, "grad_norm": 13.1875, "learning_rate": 1.4674461026585038e-06, "loss": 1.370032548904419, "step": 12840 }, { "epoch": 2.3374897606261946, "grad_norm": 27.25, "learning_rate": 1.466953956610898e-06, "loss": 1.4947459697723389, "step": 12842 }, { "epoch": 2.337853827250387, "grad_norm": 11.375, "learning_rate": 1.466462035531587e-06, "loss": 1.8175482749938965, "step": 12844 }, { "epoch": 2.338217893874579, "grad_norm": 10.875, "learning_rate": 1.465970339492757e-06, "loss": 1.192372441291809, "step": 12846 }, { "epoch": 2.338581960498771, "grad_norm": 30.5, "learning_rate": 1.4654788685665627e-06, "loss": 0.791659951210022, "step": 12848 }, { "epoch": 2.3389460271229634, "grad_norm": 39.5, "learning_rate": 1.4649876228251259e-06, "loss": 1.5135085582733154, "step": 12850 }, { "epoch": 2.3393100937471556, "grad_norm": 22.75, "learning_rate": 1.464496602340534e-06, "loss": 1.6889326572418213, "step": 12852 }, { "epoch": 2.339674160371348, "grad_norm": 129.0, "learning_rate": 1.4640058071848434e-06, "loss": 1.2184324264526367, "step": 12854 }, { "epoch": 2.3400382269955404, "grad_norm": 32.0, "learning_rate": 1.4635152374300754e-06, "loss": 1.1586614847183228, "step": 12856 }, { "epoch": 2.340402293619732, "grad_norm": 15.3125, "learning_rate": 1.46302489314822e-06, "loss": 1.2337902784347534, "step": 12858 }, { "epoch": 2.340766360243925, "grad_norm": 9.0, "learning_rate": 1.4625347744112323e-06, "loss": 1.2980743646621704, "step": 12860 }, { "epoch": 2.341130426868117, "grad_norm": 19.375, "learning_rate": 1.4620448812910357e-06, "loss": 1.7018345594406128, "step": 12862 }, { "epoch": 2.341494493492309, "grad_norm": 29.625, "learning_rate": 1.46155521385952e-06, "loss": 2.0326168537139893, "step": 12864 }, { "epoch": 2.3418585601165014, "grad_norm": 25.5, "learning_rate": 1.461065772188542e-06, "loss": 1.3540562391281128, "step": 12866 }, { "epoch": 2.3422226267406936, "grad_norm": 26.75, "learning_rate": 1.460576556349925e-06, "loss": 1.3052375316619873, "step": 12868 }, { "epoch": 2.342586693364886, "grad_norm": 28.625, "learning_rate": 1.46008756641546e-06, "loss": 2.268367290496826, "step": 12870 }, { "epoch": 2.342950759989078, "grad_norm": 22.0, "learning_rate": 1.4595988024569032e-06, "loss": 1.1996707916259766, "step": 12872 }, { "epoch": 2.34331482661327, "grad_norm": 13.0625, "learning_rate": 1.4591102645459798e-06, "loss": 0.9140642285346985, "step": 12874 }, { "epoch": 2.3436788932374624, "grad_norm": 32.75, "learning_rate": 1.4586219527543808e-06, "loss": 1.5103952884674072, "step": 12876 }, { "epoch": 2.3440429598616546, "grad_norm": 18.375, "learning_rate": 1.458133867153763e-06, "loss": 1.570326566696167, "step": 12878 }, { "epoch": 2.3444070264858468, "grad_norm": 18.625, "learning_rate": 1.4576460078157518e-06, "loss": 1.4378412961959839, "step": 12880 }, { "epoch": 2.344771093110039, "grad_norm": 11.0, "learning_rate": 1.4571583748119382e-06, "loss": 1.5854227542877197, "step": 12882 }, { "epoch": 2.345135159734231, "grad_norm": 9.8125, "learning_rate": 1.4566709682138808e-06, "loss": 1.4638748168945312, "step": 12884 }, { "epoch": 2.345499226358424, "grad_norm": 5.9375, "learning_rate": 1.456183788093104e-06, "loss": 0.9610429406166077, "step": 12886 }, { "epoch": 2.345863292982616, "grad_norm": 37.0, "learning_rate": 1.4556968345210998e-06, "loss": 0.9435508251190186, "step": 12888 }, { "epoch": 2.346227359606808, "grad_norm": 13.125, "learning_rate": 1.4552101075693268e-06, "loss": 0.8131308555603027, "step": 12890 }, { "epoch": 2.3465914262310004, "grad_norm": 9.3125, "learning_rate": 1.4547236073092096e-06, "loss": 1.4127264022827148, "step": 12892 }, { "epoch": 2.3469554928551926, "grad_norm": 13.9375, "learning_rate": 1.454237333812141e-06, "loss": 1.47605562210083, "step": 12894 }, { "epoch": 2.3473195594793848, "grad_norm": 17.0, "learning_rate": 1.453751287149479e-06, "loss": 1.350528597831726, "step": 12896 }, { "epoch": 2.347683626103577, "grad_norm": 10.1875, "learning_rate": 1.4532654673925495e-06, "loss": 1.346376895904541, "step": 12898 }, { "epoch": 2.348047692727769, "grad_norm": 6.40625, "learning_rate": 1.4527798746126442e-06, "loss": 1.2344372272491455, "step": 12900 }, { "epoch": 2.3484117593519613, "grad_norm": 3.375, "learning_rate": 1.4522945088810217e-06, "loss": 0.715396523475647, "step": 12902 }, { "epoch": 2.3487758259761535, "grad_norm": 7.28125, "learning_rate": 1.4518093702689079e-06, "loss": 1.3802067041397095, "step": 12904 }, { "epoch": 2.3491398926003457, "grad_norm": 9.4375, "learning_rate": 1.4513244588474948e-06, "loss": 1.1022003889083862, "step": 12906 }, { "epoch": 2.349503959224538, "grad_norm": 13.25, "learning_rate": 1.4508397746879411e-06, "loss": 1.291582703590393, "step": 12908 }, { "epoch": 2.34986802584873, "grad_norm": 8.5625, "learning_rate": 1.450355317861372e-06, "loss": 1.2321122884750366, "step": 12910 }, { "epoch": 2.3502320924729228, "grad_norm": 10.25, "learning_rate": 1.44987108843888e-06, "loss": 0.9633653163909912, "step": 12912 }, { "epoch": 2.350596159097115, "grad_norm": 11.25, "learning_rate": 1.449387086491524e-06, "loss": 1.5249788761138916, "step": 12914 }, { "epoch": 2.350960225721307, "grad_norm": 9.375, "learning_rate": 1.4489033120903284e-06, "loss": 1.3060550689697266, "step": 12916 }, { "epoch": 2.3513242923454993, "grad_norm": 14.625, "learning_rate": 1.4484197653062863e-06, "loss": 1.2838326692581177, "step": 12918 }, { "epoch": 2.3516883589696915, "grad_norm": 6.09375, "learning_rate": 1.4479364462103551e-06, "loss": 1.2784196138381958, "step": 12920 }, { "epoch": 2.3520524255938837, "grad_norm": 21.875, "learning_rate": 1.4474533548734607e-06, "loss": 1.5399810075759888, "step": 12922 }, { "epoch": 2.352416492218076, "grad_norm": 16.125, "learning_rate": 1.4469704913664947e-06, "loss": 1.7008895874023438, "step": 12924 }, { "epoch": 2.352780558842268, "grad_norm": 26.125, "learning_rate": 1.446487855760315e-06, "loss": 1.6886892318725586, "step": 12926 }, { "epoch": 2.3531446254664603, "grad_norm": 19.5, "learning_rate": 1.4460054481257468e-06, "loss": 0.9687001705169678, "step": 12928 }, { "epoch": 2.3535086920906525, "grad_norm": 15.6875, "learning_rate": 1.445523268533581e-06, "loss": 1.6733012199401855, "step": 12930 }, { "epoch": 2.3538727587148447, "grad_norm": 20.75, "learning_rate": 1.445041317054576e-06, "loss": 2.175306797027588, "step": 12932 }, { "epoch": 2.354236825339037, "grad_norm": 18.25, "learning_rate": 1.4445595937594558e-06, "loss": 1.5332789421081543, "step": 12934 }, { "epoch": 2.354600891963229, "grad_norm": 16.125, "learning_rate": 1.4440780987189118e-06, "loss": 1.5125454664230347, "step": 12936 }, { "epoch": 2.3549649585874217, "grad_norm": 13.6875, "learning_rate": 1.4435968320036014e-06, "loss": 1.656247615814209, "step": 12938 }, { "epoch": 2.355329025211614, "grad_norm": 18.125, "learning_rate": 1.443115793684148e-06, "loss": 1.7316515445709229, "step": 12940 }, { "epoch": 2.355693091835806, "grad_norm": 11.0625, "learning_rate": 1.4426349838311427e-06, "loss": 1.5492905378341675, "step": 12942 }, { "epoch": 2.3560571584599983, "grad_norm": 11.125, "learning_rate": 1.4421544025151418e-06, "loss": 1.2327100038528442, "step": 12944 }, { "epoch": 2.3564212250841905, "grad_norm": 17.75, "learning_rate": 1.4416740498066692e-06, "loss": 1.5096063613891602, "step": 12946 }, { "epoch": 2.3567852917083827, "grad_norm": 13.5625, "learning_rate": 1.4411939257762142e-06, "loss": 1.390622854232788, "step": 12948 }, { "epoch": 2.357149358332575, "grad_norm": 14.3125, "learning_rate": 1.4407140304942332e-06, "loss": 1.573535442352295, "step": 12950 }, { "epoch": 2.357513424956767, "grad_norm": 16.25, "learning_rate": 1.4402343640311491e-06, "loss": 1.849221110343933, "step": 12952 }, { "epoch": 2.3578774915809593, "grad_norm": 9.875, "learning_rate": 1.439754926457351e-06, "loss": 1.3351898193359375, "step": 12954 }, { "epoch": 2.3582415582051515, "grad_norm": 14.4375, "learning_rate": 1.4392757178431947e-06, "loss": 1.5842418670654297, "step": 12956 }, { "epoch": 2.3586056248293437, "grad_norm": 7.8125, "learning_rate": 1.438796738259001e-06, "loss": 1.1496270895004272, "step": 12958 }, { "epoch": 2.358969691453536, "grad_norm": 40.0, "learning_rate": 1.4383179877750595e-06, "loss": 0.5406174659729004, "step": 12960 }, { "epoch": 2.359333758077728, "grad_norm": 9.875, "learning_rate": 1.437839466461624e-06, "loss": 1.2391849756240845, "step": 12962 }, { "epoch": 2.3596978247019207, "grad_norm": 16.625, "learning_rate": 1.437361174388916e-06, "loss": 1.6558117866516113, "step": 12964 }, { "epoch": 2.3600618913261124, "grad_norm": 10.875, "learning_rate": 1.436883111627123e-06, "loss": 1.1513111591339111, "step": 12966 }, { "epoch": 2.360425957950305, "grad_norm": 12.25, "learning_rate": 1.4364052782463985e-06, "loss": 1.433065414428711, "step": 12968 }, { "epoch": 2.3607900245744973, "grad_norm": 12.0, "learning_rate": 1.4359276743168626e-06, "loss": 1.4959712028503418, "step": 12970 }, { "epoch": 2.3611540911986895, "grad_norm": 14.6875, "learning_rate": 1.435450299908602e-06, "loss": 1.7500238418579102, "step": 12972 }, { "epoch": 2.3615181578228817, "grad_norm": 29.375, "learning_rate": 1.4349731550916692e-06, "loss": 1.6094770431518555, "step": 12974 }, { "epoch": 2.361882224447074, "grad_norm": 7.0625, "learning_rate": 1.4344962399360836e-06, "loss": 0.9987369775772095, "step": 12976 }, { "epoch": 2.362246291071266, "grad_norm": 22.25, "learning_rate": 1.4340195545118304e-06, "loss": 1.464593768119812, "step": 12978 }, { "epoch": 2.3626103576954582, "grad_norm": 12.125, "learning_rate": 1.433543098888861e-06, "loss": 0.6411446928977966, "step": 12980 }, { "epoch": 2.3629744243196504, "grad_norm": 11.625, "learning_rate": 1.4330668731370937e-06, "loss": 1.3931729793548584, "step": 12982 }, { "epoch": 2.3633384909438426, "grad_norm": 51.5, "learning_rate": 1.4325908773264125e-06, "loss": 1.8375945091247559, "step": 12984 }, { "epoch": 2.363702557568035, "grad_norm": 11.1875, "learning_rate": 1.4321151115266676e-06, "loss": 1.5373510122299194, "step": 12986 }, { "epoch": 2.364066624192227, "grad_norm": 11.1875, "learning_rate": 1.4316395758076765e-06, "loss": 1.1396845579147339, "step": 12988 }, { "epoch": 2.364430690816419, "grad_norm": 16.5, "learning_rate": 1.4311642702392215e-06, "loss": 0.9452848434448242, "step": 12990 }, { "epoch": 2.3647947574406114, "grad_norm": 7.46875, "learning_rate": 1.4306891948910517e-06, "loss": 1.2993203401565552, "step": 12992 }, { "epoch": 2.365158824064804, "grad_norm": 3.6875, "learning_rate": 1.4302143498328828e-06, "loss": 1.0312904119491577, "step": 12994 }, { "epoch": 2.3655228906889962, "grad_norm": 8.3125, "learning_rate": 1.4297397351343965e-06, "loss": 1.2052390575408936, "step": 12996 }, { "epoch": 2.3658869573131884, "grad_norm": 32.0, "learning_rate": 1.4292653508652398e-06, "loss": 1.3997597694396973, "step": 12998 }, { "epoch": 2.3662510239373806, "grad_norm": 9.4375, "learning_rate": 1.4287911970950275e-06, "loss": 1.3373398780822754, "step": 13000 }, { "epoch": 2.366615090561573, "grad_norm": 12.0, "learning_rate": 1.4283172738933396e-06, "loss": 1.4318392276763916, "step": 13002 }, { "epoch": 2.366979157185765, "grad_norm": 7.0, "learning_rate": 1.4278435813297223e-06, "loss": 1.2079908847808838, "step": 13004 }, { "epoch": 2.367343223809957, "grad_norm": 187.0, "learning_rate": 1.427370119473688e-06, "loss": 1.459062933921814, "step": 13006 }, { "epoch": 2.3677072904341494, "grad_norm": 9.125, "learning_rate": 1.4268968883947154e-06, "loss": 1.410860300064087, "step": 13008 }, { "epoch": 2.3680713570583416, "grad_norm": 44.0, "learning_rate": 1.4264238881622492e-06, "loss": 1.0161633491516113, "step": 13010 }, { "epoch": 2.368435423682534, "grad_norm": 6.96875, "learning_rate": 1.4259511188456998e-06, "loss": 1.3491284847259521, "step": 13012 }, { "epoch": 2.368799490306726, "grad_norm": 8.0625, "learning_rate": 1.4254785805144452e-06, "loss": 1.3301050662994385, "step": 13014 }, { "epoch": 2.369163556930918, "grad_norm": 15.0, "learning_rate": 1.425006273237828e-06, "loss": 1.3427374362945557, "step": 13016 }, { "epoch": 2.3695276235551104, "grad_norm": 16.125, "learning_rate": 1.4245341970851568e-06, "loss": 1.4216352701187134, "step": 13018 }, { "epoch": 2.369891690179303, "grad_norm": 37.0, "learning_rate": 1.424062352125708e-06, "loss": 1.4281244277954102, "step": 13020 }, { "epoch": 2.370255756803495, "grad_norm": 12.0, "learning_rate": 1.4235907384287218e-06, "loss": 1.3793773651123047, "step": 13022 }, { "epoch": 2.3706198234276874, "grad_norm": 11.0, "learning_rate": 1.423119356063406e-06, "loss": 1.1660332679748535, "step": 13024 }, { "epoch": 2.3709838900518796, "grad_norm": 6.0, "learning_rate": 1.4226482050989345e-06, "loss": 1.2736250162124634, "step": 13026 }, { "epoch": 2.371347956676072, "grad_norm": 9.1875, "learning_rate": 1.4221772856044467e-06, "loss": 1.399125337600708, "step": 13028 }, { "epoch": 2.371712023300264, "grad_norm": 16.875, "learning_rate": 1.4217065976490474e-06, "loss": 1.475401759147644, "step": 13030 }, { "epoch": 2.372076089924456, "grad_norm": 13.9375, "learning_rate": 1.4212361413018088e-06, "loss": 1.6186275482177734, "step": 13032 }, { "epoch": 2.3724401565486484, "grad_norm": 9.5625, "learning_rate": 1.4207659166317683e-06, "loss": 1.314518928527832, "step": 13034 }, { "epoch": 2.3728042231728406, "grad_norm": 16.375, "learning_rate": 1.4202959237079295e-06, "loss": 1.5972141027450562, "step": 13036 }, { "epoch": 2.3731682897970328, "grad_norm": 6.71875, "learning_rate": 1.4198261625992618e-06, "loss": 1.229283094406128, "step": 13038 }, { "epoch": 2.373532356421225, "grad_norm": 19.625, "learning_rate": 1.4193566333747012e-06, "loss": 1.0320490598678589, "step": 13040 }, { "epoch": 2.373896423045417, "grad_norm": 6.34375, "learning_rate": 1.4188873361031482e-06, "loss": 1.1893830299377441, "step": 13042 }, { "epoch": 2.3742604896696093, "grad_norm": 10.0, "learning_rate": 1.4184182708534713e-06, "loss": 1.5255910158157349, "step": 13044 }, { "epoch": 2.374624556293802, "grad_norm": 7.125, "learning_rate": 1.4179494376945036e-06, "loss": 1.460092306137085, "step": 13046 }, { "epoch": 2.374988622917994, "grad_norm": 24.125, "learning_rate": 1.4174808366950442e-06, "loss": 1.7898707389831543, "step": 13048 }, { "epoch": 2.3753526895421864, "grad_norm": 36.25, "learning_rate": 1.4170124679238592e-06, "loss": 1.7407951354980469, "step": 13050 }, { "epoch": 2.3757167561663786, "grad_norm": 11.125, "learning_rate": 1.4165443314496789e-06, "loss": 1.8305128812789917, "step": 13052 }, { "epoch": 2.3760808227905708, "grad_norm": 10.1875, "learning_rate": 1.4160764273412008e-06, "loss": 1.3565661907196045, "step": 13054 }, { "epoch": 2.376444889414763, "grad_norm": 26.0, "learning_rate": 1.4156087556670877e-06, "loss": 1.6626286506652832, "step": 13056 }, { "epoch": 2.376808956038955, "grad_norm": 18.625, "learning_rate": 1.415141316495969e-06, "loss": 1.8741836547851562, "step": 13058 }, { "epoch": 2.3771730226631473, "grad_norm": 13.0, "learning_rate": 1.4146741098964389e-06, "loss": 1.4989471435546875, "step": 13060 }, { "epoch": 2.3775370892873395, "grad_norm": 10.5, "learning_rate": 1.4142071359370587e-06, "loss": 1.6329776048660278, "step": 13062 }, { "epoch": 2.3779011559115317, "grad_norm": 17.625, "learning_rate": 1.4137403946863547e-06, "loss": 0.9275729060173035, "step": 13064 }, { "epoch": 2.378265222535724, "grad_norm": 692.0, "learning_rate": 1.4132738862128192e-06, "loss": 0.6249986886978149, "step": 13066 }, { "epoch": 2.378629289159916, "grad_norm": 4.0, "learning_rate": 1.4128076105849103e-06, "loss": 1.0405938625335693, "step": 13068 }, { "epoch": 2.3789933557841083, "grad_norm": 9.75, "learning_rate": 1.4123415678710522e-06, "loss": 1.0645229816436768, "step": 13070 }, { "epoch": 2.379357422408301, "grad_norm": 11.1875, "learning_rate": 1.411875758139635e-06, "loss": 1.3818163871765137, "step": 13072 }, { "epoch": 2.3797214890324927, "grad_norm": 11.25, "learning_rate": 1.4114101814590143e-06, "loss": 1.4613906145095825, "step": 13074 }, { "epoch": 2.3800855556566853, "grad_norm": 9.625, "learning_rate": 1.410944837897511e-06, "loss": 1.4235641956329346, "step": 13076 }, { "epoch": 2.3804496222808775, "grad_norm": 5.90625, "learning_rate": 1.4104797275234131e-06, "loss": 0.8895462155342102, "step": 13078 }, { "epoch": 2.3808136889050697, "grad_norm": 20.5, "learning_rate": 1.4100148504049736e-06, "loss": 1.2731987237930298, "step": 13080 }, { "epoch": 2.381177755529262, "grad_norm": 10.0, "learning_rate": 1.4095502066104107e-06, "loss": 1.770280361175537, "step": 13082 }, { "epoch": 2.381541822153454, "grad_norm": 9.125, "learning_rate": 1.4090857962079099e-06, "loss": 1.5567989349365234, "step": 13084 }, { "epoch": 2.3819058887776463, "grad_norm": 13.6875, "learning_rate": 1.408621619265621e-06, "loss": 1.588033676147461, "step": 13086 }, { "epoch": 2.3822699554018385, "grad_norm": 14.1875, "learning_rate": 1.40815767585166e-06, "loss": 1.3291270732879639, "step": 13088 }, { "epoch": 2.3826340220260307, "grad_norm": 14.5625, "learning_rate": 1.407693966034109e-06, "loss": 1.4294660091400146, "step": 13090 }, { "epoch": 2.382998088650223, "grad_norm": 9.8125, "learning_rate": 1.4072304898810155e-06, "loss": 1.2907848358154297, "step": 13092 }, { "epoch": 2.383362155274415, "grad_norm": 11.0, "learning_rate": 1.4067672474603928e-06, "loss": 0.9829186201095581, "step": 13094 }, { "epoch": 2.3837262218986073, "grad_norm": 10.0625, "learning_rate": 1.4063042388402193e-06, "loss": 1.4701520204544067, "step": 13096 }, { "epoch": 2.3840902885228, "grad_norm": 18.5, "learning_rate": 1.4058414640884404e-06, "loss": 1.5229731798171997, "step": 13098 }, { "epoch": 2.3844543551469917, "grad_norm": 9.375, "learning_rate": 1.4053789232729661e-06, "loss": 1.069821834564209, "step": 13100 }, { "epoch": 2.3848184217711843, "grad_norm": 15.5, "learning_rate": 1.4049166164616724e-06, "loss": 1.087325930595398, "step": 13102 }, { "epoch": 2.3851824883953765, "grad_norm": 23.625, "learning_rate": 1.4044545437224008e-06, "loss": 1.6770304441452026, "step": 13104 }, { "epoch": 2.3855465550195687, "grad_norm": 10.4375, "learning_rate": 1.4039927051229584e-06, "loss": 1.6059675216674805, "step": 13106 }, { "epoch": 2.385910621643761, "grad_norm": 18.25, "learning_rate": 1.4035311007311192e-06, "loss": 1.6826673746109009, "step": 13108 }, { "epoch": 2.386274688267953, "grad_norm": 22.625, "learning_rate": 1.4030697306146205e-06, "loss": 1.8292741775512695, "step": 13110 }, { "epoch": 2.3866387548921453, "grad_norm": 16.125, "learning_rate": 1.4026085948411672e-06, "loss": 1.5285316705703735, "step": 13112 }, { "epoch": 2.3870028215163375, "grad_norm": 8.375, "learning_rate": 1.402147693478429e-06, "loss": 1.4824053049087524, "step": 13114 }, { "epoch": 2.3873668881405297, "grad_norm": 9.1875, "learning_rate": 1.401687026594041e-06, "loss": 1.4457030296325684, "step": 13116 }, { "epoch": 2.387730954764722, "grad_norm": 4.90625, "learning_rate": 1.4012265942556046e-06, "loss": 1.1098920106887817, "step": 13118 }, { "epoch": 2.388095021388914, "grad_norm": 11.3125, "learning_rate": 1.4007663965306863e-06, "loss": 1.609438180923462, "step": 13120 }, { "epoch": 2.3884590880131062, "grad_norm": 7.75, "learning_rate": 1.4003064334868183e-06, "loss": 1.530211091041565, "step": 13122 }, { "epoch": 2.3888231546372984, "grad_norm": 14.375, "learning_rate": 1.3998467051914983e-06, "loss": 1.5139378309249878, "step": 13124 }, { "epoch": 2.3891872212614906, "grad_norm": 23.125, "learning_rate": 1.399387211712189e-06, "loss": 1.7802497148513794, "step": 13126 }, { "epoch": 2.3895512878856833, "grad_norm": 10.5625, "learning_rate": 1.39892795311632e-06, "loss": 1.1451196670532227, "step": 13128 }, { "epoch": 2.3899153545098755, "grad_norm": 12.8125, "learning_rate": 1.398468929471285e-06, "loss": 1.1933187246322632, "step": 13130 }, { "epoch": 2.3902794211340677, "grad_norm": 24.625, "learning_rate": 1.3980101408444446e-06, "loss": 1.3968465328216553, "step": 13132 }, { "epoch": 2.39064348775826, "grad_norm": 14.3125, "learning_rate": 1.397551587303123e-06, "loss": 1.8935859203338623, "step": 13134 }, { "epoch": 2.391007554382452, "grad_norm": 10.9375, "learning_rate": 1.3970932689146127e-06, "loss": 1.4720077514648438, "step": 13136 }, { "epoch": 2.3913716210066442, "grad_norm": 6.15625, "learning_rate": 1.3966351857461688e-06, "loss": 1.1271402835845947, "step": 13138 }, { "epoch": 2.3917356876308364, "grad_norm": 10.3125, "learning_rate": 1.3961773378650135e-06, "loss": 1.4992789030075073, "step": 13140 }, { "epoch": 2.3920997542550286, "grad_norm": 7.375, "learning_rate": 1.3957197253383339e-06, "loss": 1.4290014505386353, "step": 13142 }, { "epoch": 2.392463820879221, "grad_norm": 22.125, "learning_rate": 1.3952623482332833e-06, "loss": 1.307976245880127, "step": 13144 }, { "epoch": 2.392827887503413, "grad_norm": 9.625, "learning_rate": 1.3948052066169794e-06, "loss": 1.1577008962631226, "step": 13146 }, { "epoch": 2.393191954127605, "grad_norm": 16.5, "learning_rate": 1.3943483005565068e-06, "loss": 1.9710172414779663, "step": 13148 }, { "epoch": 2.3935560207517974, "grad_norm": 7.90625, "learning_rate": 1.393891630118913e-06, "loss": 1.7011219263076782, "step": 13150 }, { "epoch": 2.3939200873759896, "grad_norm": 77.0, "learning_rate": 1.3934351953712145e-06, "loss": 1.2155952453613281, "step": 13152 }, { "epoch": 2.3942841540001822, "grad_norm": 15.625, "learning_rate": 1.3929789963803897e-06, "loss": 1.8661668300628662, "step": 13154 }, { "epoch": 2.3946482206243744, "grad_norm": 15.0, "learning_rate": 1.3925230332133844e-06, "loss": 1.055245041847229, "step": 13156 }, { "epoch": 2.3950122872485666, "grad_norm": 9.125, "learning_rate": 1.3920673059371095e-06, "loss": 1.388941764831543, "step": 13158 }, { "epoch": 2.395376353872759, "grad_norm": 11.6875, "learning_rate": 1.3916118146184412e-06, "loss": 1.5239473581314087, "step": 13160 }, { "epoch": 2.395740420496951, "grad_norm": 22.375, "learning_rate": 1.391156559324221e-06, "loss": 0.7819581031799316, "step": 13162 }, { "epoch": 2.396104487121143, "grad_norm": 7.125, "learning_rate": 1.3907015401212553e-06, "loss": 0.7420260906219482, "step": 13164 }, { "epoch": 2.3964685537453354, "grad_norm": 17.125, "learning_rate": 1.390246757076317e-06, "loss": 1.4705394506454468, "step": 13166 }, { "epoch": 2.3968326203695276, "grad_norm": 18.625, "learning_rate": 1.3897922102561433e-06, "loss": 1.556272268295288, "step": 13168 }, { "epoch": 2.39719668699372, "grad_norm": 13.0, "learning_rate": 1.3893378997274371e-06, "loss": 1.8583375215530396, "step": 13170 }, { "epoch": 2.397560753617912, "grad_norm": 22.125, "learning_rate": 1.3888838255568666e-06, "loss": 1.7090548276901245, "step": 13172 }, { "epoch": 2.397924820242104, "grad_norm": 16.75, "learning_rate": 1.3884299878110651e-06, "loss": 0.9558186531066895, "step": 13174 }, { "epoch": 2.3982888868662964, "grad_norm": 12.125, "learning_rate": 1.3879763865566323e-06, "loss": 1.4936777353286743, "step": 13176 }, { "epoch": 2.3986529534904886, "grad_norm": 12.8125, "learning_rate": 1.3875230218601315e-06, "loss": 1.2443791627883911, "step": 13178 }, { "epoch": 2.399017020114681, "grad_norm": 3.15625, "learning_rate": 1.3870698937880928e-06, "loss": 1.115652084350586, "step": 13180 }, { "epoch": 2.3993810867388734, "grad_norm": 31.375, "learning_rate": 1.3866170024070102e-06, "loss": 1.2917252779006958, "step": 13182 }, { "epoch": 2.3997451533630656, "grad_norm": 24.25, "learning_rate": 1.3861643477833442e-06, "loss": 1.609562873840332, "step": 13184 }, { "epoch": 2.400109219987258, "grad_norm": 9.5, "learning_rate": 1.3857119299835197e-06, "loss": 0.6737778186798096, "step": 13186 }, { "epoch": 2.40047328661145, "grad_norm": 17.25, "learning_rate": 1.3852597490739272e-06, "loss": 1.5048739910125732, "step": 13188 }, { "epoch": 2.400837353235642, "grad_norm": 7.8125, "learning_rate": 1.384807805120923e-06, "loss": 1.414874792098999, "step": 13190 }, { "epoch": 2.4012014198598344, "grad_norm": 55.5, "learning_rate": 1.3843560981908274e-06, "loss": 1.2349931001663208, "step": 13192 }, { "epoch": 2.4015654864840266, "grad_norm": 3.875, "learning_rate": 1.383904628349927e-06, "loss": 1.2970219850540161, "step": 13194 }, { "epoch": 2.4019295531082188, "grad_norm": 10.25, "learning_rate": 1.3834533956644724e-06, "loss": 1.1217862367630005, "step": 13196 }, { "epoch": 2.402293619732411, "grad_norm": 35.5, "learning_rate": 1.383002400200681e-06, "loss": 1.3421778678894043, "step": 13198 }, { "epoch": 2.402657686356603, "grad_norm": 13.25, "learning_rate": 1.3825516420247342e-06, "loss": 1.5503783226013184, "step": 13200 }, { "epoch": 2.4030217529807953, "grad_norm": 18.375, "learning_rate": 1.382101121202779e-06, "loss": 1.7319862842559814, "step": 13202 }, { "epoch": 2.4033858196049875, "grad_norm": 18.875, "learning_rate": 1.3816508378009274e-06, "loss": 1.868557095527649, "step": 13204 }, { "epoch": 2.40374988622918, "grad_norm": 24.5, "learning_rate": 1.3812007918852568e-06, "loss": 1.9943368434906006, "step": 13206 }, { "epoch": 2.404113952853372, "grad_norm": 9.8125, "learning_rate": 1.3807509835218097e-06, "loss": 1.491589903831482, "step": 13208 }, { "epoch": 2.4044780194775646, "grad_norm": 8.75, "learning_rate": 1.3803014127765935e-06, "loss": 1.505659580230713, "step": 13210 }, { "epoch": 2.4048420861017568, "grad_norm": 39.0, "learning_rate": 1.3798520797155809e-06, "loss": 1.269268274307251, "step": 13212 }, { "epoch": 2.405206152725949, "grad_norm": 8.5, "learning_rate": 1.3794029844047097e-06, "loss": 1.273883581161499, "step": 13214 }, { "epoch": 2.405570219350141, "grad_norm": 8.9375, "learning_rate": 1.3789541269098827e-06, "loss": 1.296435832977295, "step": 13216 }, { "epoch": 2.4059342859743333, "grad_norm": 12.5625, "learning_rate": 1.3785055072969682e-06, "loss": 1.137270212173462, "step": 13218 }, { "epoch": 2.4062983525985255, "grad_norm": 20.375, "learning_rate": 1.378057125631799e-06, "loss": 1.4742759466171265, "step": 13220 }, { "epoch": 2.4066624192227177, "grad_norm": 38.5, "learning_rate": 1.3776089819801738e-06, "loss": 1.1375072002410889, "step": 13222 }, { "epoch": 2.40702648584691, "grad_norm": 33.75, "learning_rate": 1.3771610764078552e-06, "loss": 1.6485258340835571, "step": 13224 }, { "epoch": 2.407390552471102, "grad_norm": 9.25, "learning_rate": 1.376713408980572e-06, "loss": 1.494165062904358, "step": 13226 }, { "epoch": 2.4077546190952943, "grad_norm": 9.25, "learning_rate": 1.3762659797640174e-06, "loss": 1.4244030714035034, "step": 13228 }, { "epoch": 2.4081186857194865, "grad_norm": 21.25, "learning_rate": 1.3758187888238496e-06, "loss": 1.434171438217163, "step": 13230 }, { "epoch": 2.4084827523436787, "grad_norm": 14.8125, "learning_rate": 1.3753718362256927e-06, "loss": 1.387436032295227, "step": 13232 }, { "epoch": 2.408846818967871, "grad_norm": 60.0, "learning_rate": 1.3749251220351345e-06, "loss": 1.6377493143081665, "step": 13234 }, { "epoch": 2.4092108855920635, "grad_norm": 9.375, "learning_rate": 1.374478646317729e-06, "loss": 1.6165059804916382, "step": 13236 }, { "epoch": 2.4095749522162557, "grad_norm": 3.671875, "learning_rate": 1.3740324091389945e-06, "loss": 1.4072531461715698, "step": 13238 }, { "epoch": 2.409939018840448, "grad_norm": 7.03125, "learning_rate": 1.3735864105644142e-06, "loss": 1.107566475868225, "step": 13240 }, { "epoch": 2.41030308546464, "grad_norm": 13.8125, "learning_rate": 1.3731406506594373e-06, "loss": 1.654891848564148, "step": 13242 }, { "epoch": 2.4106671520888323, "grad_norm": 10.4375, "learning_rate": 1.3726951294894764e-06, "loss": 1.5468419790267944, "step": 13244 }, { "epoch": 2.4110312187130245, "grad_norm": 10.125, "learning_rate": 1.3722498471199105e-06, "loss": 1.3897886276245117, "step": 13246 }, { "epoch": 2.4113952853372167, "grad_norm": 14.3125, "learning_rate": 1.371804803616083e-06, "loss": 1.7811107635498047, "step": 13248 }, { "epoch": 2.411759351961409, "grad_norm": 9.625, "learning_rate": 1.3713599990433018e-06, "loss": 1.4439624547958374, "step": 13250 }, { "epoch": 2.412123418585601, "grad_norm": 21.875, "learning_rate": 1.3709154334668406e-06, "loss": 1.201629877090454, "step": 13252 }, { "epoch": 2.4124874852097933, "grad_norm": 12.875, "learning_rate": 1.3704711069519374e-06, "loss": 1.766385793685913, "step": 13254 }, { "epoch": 2.4128515518339855, "grad_norm": 8.6875, "learning_rate": 1.3700270195637954e-06, "loss": 1.387178897857666, "step": 13256 }, { "epoch": 2.4132156184581777, "grad_norm": 10.0625, "learning_rate": 1.3695831713675829e-06, "loss": 1.2538191080093384, "step": 13258 }, { "epoch": 2.41357968508237, "grad_norm": 29.875, "learning_rate": 1.3691395624284321e-06, "loss": 1.4911770820617676, "step": 13260 }, { "epoch": 2.4139437517065625, "grad_norm": 10.6875, "learning_rate": 1.3686961928114411e-06, "loss": 1.6768076419830322, "step": 13262 }, { "epoch": 2.4143078183307547, "grad_norm": 9.3125, "learning_rate": 1.3682530625816729e-06, "loss": 1.6474988460540771, "step": 13264 }, { "epoch": 2.414671884954947, "grad_norm": 22.625, "learning_rate": 1.3678101718041547e-06, "loss": 1.2055500745773315, "step": 13266 }, { "epoch": 2.415035951579139, "grad_norm": 17.25, "learning_rate": 1.3673675205438796e-06, "loss": 1.3327999114990234, "step": 13268 }, { "epoch": 2.4154000182033313, "grad_norm": 10.9375, "learning_rate": 1.3669251088658038e-06, "loss": 1.4991543292999268, "step": 13270 }, { "epoch": 2.4157640848275235, "grad_norm": 13.5, "learning_rate": 1.3664829368348504e-06, "loss": 1.6645715236663818, "step": 13272 }, { "epoch": 2.4161281514517157, "grad_norm": 5.5, "learning_rate": 1.366041004515906e-06, "loss": 1.266193151473999, "step": 13274 }, { "epoch": 2.416492218075908, "grad_norm": 15.9375, "learning_rate": 1.365599311973822e-06, "loss": 1.4692879915237427, "step": 13276 }, { "epoch": 2.4168562847001, "grad_norm": 6.84375, "learning_rate": 1.3651578592734155e-06, "loss": 1.4225361347198486, "step": 13278 }, { "epoch": 2.4172203513242922, "grad_norm": 9.6875, "learning_rate": 1.3647166464794675e-06, "loss": 1.0867118835449219, "step": 13280 }, { "epoch": 2.4175844179484844, "grad_norm": 11.8125, "learning_rate": 1.3642756736567247e-06, "loss": 1.4150571823120117, "step": 13282 }, { "epoch": 2.4179484845726766, "grad_norm": 14.125, "learning_rate": 1.3638349408698976e-06, "loss": 1.2924549579620361, "step": 13284 }, { "epoch": 2.418312551196869, "grad_norm": 11.5625, "learning_rate": 1.3633944481836623e-06, "loss": 1.2613023519515991, "step": 13286 }, { "epoch": 2.4186766178210615, "grad_norm": 11.8125, "learning_rate": 1.3629541956626592e-06, "loss": 1.3378260135650635, "step": 13288 }, { "epoch": 2.4190406844452537, "grad_norm": 18.5, "learning_rate": 1.362514183371493e-06, "loss": 1.5379137992858887, "step": 13290 }, { "epoch": 2.419404751069446, "grad_norm": 13.3125, "learning_rate": 1.3620744113747347e-06, "loss": 1.6859205961227417, "step": 13292 }, { "epoch": 2.419768817693638, "grad_norm": 15.25, "learning_rate": 1.3616348797369183e-06, "loss": 1.432577133178711, "step": 13294 }, { "epoch": 2.4201328843178302, "grad_norm": 15.3125, "learning_rate": 1.3611955885225438e-06, "loss": 1.45803701877594, "step": 13296 }, { "epoch": 2.4204969509420224, "grad_norm": 9.875, "learning_rate": 1.3607565377960752e-06, "loss": 1.2603408098220825, "step": 13298 }, { "epoch": 2.4208610175662146, "grad_norm": 6.1875, "learning_rate": 1.3603177276219415e-06, "loss": 0.9207422733306885, "step": 13300 }, { "epoch": 2.421225084190407, "grad_norm": 3.390625, "learning_rate": 1.359879158064536e-06, "loss": 1.081762433052063, "step": 13302 }, { "epoch": 2.421589150814599, "grad_norm": 49.5, "learning_rate": 1.3594408291882175e-06, "loss": 1.3305965662002563, "step": 13304 }, { "epoch": 2.421953217438791, "grad_norm": 38.25, "learning_rate": 1.3590027410573085e-06, "loss": 0.6426777839660645, "step": 13306 }, { "epoch": 2.4223172840629834, "grad_norm": 17.875, "learning_rate": 1.3585648937360969e-06, "loss": 1.423840880393982, "step": 13308 }, { "epoch": 2.4226813506871756, "grad_norm": 3.078125, "learning_rate": 1.3581272872888348e-06, "loss": 1.2661268711090088, "step": 13310 }, { "epoch": 2.423045417311368, "grad_norm": 6.90625, "learning_rate": 1.3576899217797395e-06, "loss": 0.9752209186553955, "step": 13312 }, { "epoch": 2.4234094839355604, "grad_norm": 54.0, "learning_rate": 1.3572527972729927e-06, "loss": 1.5633896589279175, "step": 13314 }, { "epoch": 2.423773550559752, "grad_norm": 12.5625, "learning_rate": 1.3568159138327402e-06, "loss": 1.8636209964752197, "step": 13316 }, { "epoch": 2.424137617183945, "grad_norm": 3.609375, "learning_rate": 1.3563792715230932e-06, "loss": 1.0624810457229614, "step": 13318 }, { "epoch": 2.424501683808137, "grad_norm": 19.25, "learning_rate": 1.355942870408127e-06, "loss": 1.0762466192245483, "step": 13320 }, { "epoch": 2.424865750432329, "grad_norm": 10.5625, "learning_rate": 1.3555067105518817e-06, "loss": 1.7579777240753174, "step": 13322 }, { "epoch": 2.4252298170565214, "grad_norm": 15.8125, "learning_rate": 1.3550707920183625e-06, "loss": 1.549422025680542, "step": 13324 }, { "epoch": 2.4255938836807136, "grad_norm": 9.0, "learning_rate": 1.3546351148715378e-06, "loss": 1.5649856328964233, "step": 13326 }, { "epoch": 2.425957950304906, "grad_norm": 17.375, "learning_rate": 1.354199679175342e-06, "loss": 0.6884002685546875, "step": 13328 }, { "epoch": 2.426322016929098, "grad_norm": 29.0, "learning_rate": 1.3537644849936738e-06, "loss": 0.46129125356674194, "step": 13330 }, { "epoch": 2.42668608355329, "grad_norm": 18.75, "learning_rate": 1.3533295323903954e-06, "loss": 0.8968048095703125, "step": 13332 }, { "epoch": 2.4270501501774824, "grad_norm": 8.125, "learning_rate": 1.3528948214293347e-06, "loss": 1.4076263904571533, "step": 13334 }, { "epoch": 2.4274142168016746, "grad_norm": 15.625, "learning_rate": 1.3524603521742842e-06, "loss": 1.6677645444869995, "step": 13336 }, { "epoch": 2.4277782834258668, "grad_norm": 17.625, "learning_rate": 1.352026124689e-06, "loss": 1.8949885368347168, "step": 13338 }, { "epoch": 2.4281423500500594, "grad_norm": 17.625, "learning_rate": 1.3515921390372032e-06, "loss": 1.4756065607070923, "step": 13340 }, { "epoch": 2.428506416674251, "grad_norm": 21.375, "learning_rate": 1.3511583952825795e-06, "loss": 1.4696450233459473, "step": 13342 }, { "epoch": 2.428870483298444, "grad_norm": 4.65625, "learning_rate": 1.3507248934887795e-06, "loss": 1.1072717905044556, "step": 13344 }, { "epoch": 2.429234549922636, "grad_norm": 7.40625, "learning_rate": 1.3502916337194171e-06, "loss": 0.9419214725494385, "step": 13346 }, { "epoch": 2.429598616546828, "grad_norm": 15.9375, "learning_rate": 1.3498586160380722e-06, "loss": 1.3977391719818115, "step": 13348 }, { "epoch": 2.4299626831710204, "grad_norm": 17.75, "learning_rate": 1.3494258405082874e-06, "loss": 1.435898780822754, "step": 13350 }, { "epoch": 2.4303267497952126, "grad_norm": 7.1875, "learning_rate": 1.3489933071935715e-06, "loss": 1.3148350715637207, "step": 13352 }, { "epoch": 2.4306908164194048, "grad_norm": 8.3125, "learning_rate": 1.348561016157397e-06, "loss": 1.386268973350525, "step": 13354 }, { "epoch": 2.431054883043597, "grad_norm": 8.75, "learning_rate": 1.3481289674632006e-06, "loss": 1.4000086784362793, "step": 13356 }, { "epoch": 2.431418949667789, "grad_norm": 16.875, "learning_rate": 1.347697161174384e-06, "loss": 1.3190433979034424, "step": 13358 }, { "epoch": 2.4317830162919813, "grad_norm": 9.4375, "learning_rate": 1.3472655973543124e-06, "loss": 1.4581642150878906, "step": 13360 }, { "epoch": 2.4321470829161735, "grad_norm": 8.8125, "learning_rate": 1.3468342760663167e-06, "loss": 1.1164554357528687, "step": 13362 }, { "epoch": 2.4325111495403657, "grad_norm": 17.875, "learning_rate": 1.3464031973736912e-06, "loss": 0.44168874621391296, "step": 13364 }, { "epoch": 2.432875216164558, "grad_norm": 44.75, "learning_rate": 1.3459723613396949e-06, "loss": 1.346256971359253, "step": 13366 }, { "epoch": 2.43323928278875, "grad_norm": 3.625, "learning_rate": 1.3455417680275518e-06, "loss": 1.3774802684783936, "step": 13368 }, { "epoch": 2.4336033494129428, "grad_norm": 14.125, "learning_rate": 1.3451114175004487e-06, "loss": 1.4471994638442993, "step": 13370 }, { "epoch": 2.433967416037135, "grad_norm": 10.5625, "learning_rate": 1.3446813098215388e-06, "loss": 1.5925532579421997, "step": 13372 }, { "epoch": 2.434331482661327, "grad_norm": 8.75, "learning_rate": 1.3442514450539381e-06, "loss": 1.4256774187088013, "step": 13374 }, { "epoch": 2.4346955492855193, "grad_norm": 17.375, "learning_rate": 1.343821823260728e-06, "loss": 1.3854000568389893, "step": 13376 }, { "epoch": 2.4350596159097115, "grad_norm": 16.0, "learning_rate": 1.3433924445049532e-06, "loss": 1.7081060409545898, "step": 13378 }, { "epoch": 2.4354236825339037, "grad_norm": 16.875, "learning_rate": 1.3429633088496236e-06, "loss": 2.032484531402588, "step": 13380 }, { "epoch": 2.435787749158096, "grad_norm": 9.75, "learning_rate": 1.3425344163577128e-06, "loss": 1.6272929906845093, "step": 13382 }, { "epoch": 2.436151815782288, "grad_norm": 7.25, "learning_rate": 1.3421057670921594e-06, "loss": 1.413218379020691, "step": 13384 }, { "epoch": 2.4365158824064803, "grad_norm": 10.3125, "learning_rate": 1.341677361115866e-06, "loss": 1.455143690109253, "step": 13386 }, { "epoch": 2.4368799490306725, "grad_norm": 74.0, "learning_rate": 1.3412491984916992e-06, "loss": 1.5953041315078735, "step": 13388 }, { "epoch": 2.4372440156548647, "grad_norm": 17.375, "learning_rate": 1.34082127928249e-06, "loss": 1.3930224180221558, "step": 13390 }, { "epoch": 2.437608082279057, "grad_norm": 14.25, "learning_rate": 1.3403936035510342e-06, "loss": 1.5179580450057983, "step": 13392 }, { "epoch": 2.437972148903249, "grad_norm": 51.25, "learning_rate": 1.3399661713600912e-06, "loss": 1.6847363710403442, "step": 13394 }, { "epoch": 2.4383362155274417, "grad_norm": 18.125, "learning_rate": 1.339538982772385e-06, "loss": 1.1017258167266846, "step": 13396 }, { "epoch": 2.438700282151634, "grad_norm": 11.5, "learning_rate": 1.339112037850604e-06, "loss": 1.2716079950332642, "step": 13398 }, { "epoch": 2.439064348775826, "grad_norm": 22.5, "learning_rate": 1.3386853366574004e-06, "loss": 1.6934351921081543, "step": 13400 }, { "epoch": 2.4394284154000183, "grad_norm": 53.0, "learning_rate": 1.3382588792553908e-06, "loss": 1.311166524887085, "step": 13402 }, { "epoch": 2.4397924820242105, "grad_norm": 11.3125, "learning_rate": 1.3378326657071562e-06, "loss": 1.944077730178833, "step": 13404 }, { "epoch": 2.4401565486484027, "grad_norm": 9.4375, "learning_rate": 1.337406696075242e-06, "loss": 1.2993947267532349, "step": 13406 }, { "epoch": 2.440520615272595, "grad_norm": 12.6875, "learning_rate": 1.336980970422157e-06, "loss": 0.9996879696846008, "step": 13408 }, { "epoch": 2.440884681896787, "grad_norm": 18.25, "learning_rate": 1.336555488810375e-06, "loss": 2.0626003742218018, "step": 13410 }, { "epoch": 2.4412487485209793, "grad_norm": 13.0, "learning_rate": 1.3361302513023335e-06, "loss": 1.2710504531860352, "step": 13412 }, { "epoch": 2.4416128151451715, "grad_norm": 8.0, "learning_rate": 1.3357052579604347e-06, "loss": 1.3482321500778198, "step": 13414 }, { "epoch": 2.4419768817693637, "grad_norm": 6.75, "learning_rate": 1.3352805088470443e-06, "loss": 1.0457558631896973, "step": 13416 }, { "epoch": 2.442340948393556, "grad_norm": 16.75, "learning_rate": 1.3348560040244932e-06, "loss": 1.7914769649505615, "step": 13418 }, { "epoch": 2.442705015017748, "grad_norm": 9.1875, "learning_rate": 1.334431743555075e-06, "loss": 1.1658673286437988, "step": 13420 }, { "epoch": 2.4430690816419407, "grad_norm": 9.9375, "learning_rate": 1.3340077275010486e-06, "loss": 1.1554853916168213, "step": 13422 }, { "epoch": 2.443433148266133, "grad_norm": 12.375, "learning_rate": 1.3335839559246364e-06, "loss": 1.5388017892837524, "step": 13424 }, { "epoch": 2.443797214890325, "grad_norm": 20.625, "learning_rate": 1.3331604288880251e-06, "loss": 1.7255887985229492, "step": 13426 }, { "epoch": 2.4441612815145173, "grad_norm": 26.25, "learning_rate": 1.332737146453366e-06, "loss": 1.684288501739502, "step": 13428 }, { "epoch": 2.4445253481387095, "grad_norm": 6.8125, "learning_rate": 1.3323141086827736e-06, "loss": 1.2902215719223022, "step": 13430 }, { "epoch": 2.4448894147629017, "grad_norm": 20.0, "learning_rate": 1.3318913156383273e-06, "loss": 1.3036847114562988, "step": 13432 }, { "epoch": 2.445253481387094, "grad_norm": 8.0625, "learning_rate": 1.3314687673820703e-06, "loss": 1.2368851900100708, "step": 13434 }, { "epoch": 2.445617548011286, "grad_norm": 6.125, "learning_rate": 1.33104646397601e-06, "loss": 1.3138872385025024, "step": 13436 }, { "epoch": 2.4459816146354783, "grad_norm": 12.6875, "learning_rate": 1.3306244054821169e-06, "loss": 1.2879775762557983, "step": 13438 }, { "epoch": 2.4463456812596704, "grad_norm": 14.1875, "learning_rate": 1.330202591962327e-06, "loss": 2.1288092136383057, "step": 13440 }, { "epoch": 2.4467097478838626, "grad_norm": 11.3125, "learning_rate": 1.32978102347854e-06, "loss": 1.3249249458312988, "step": 13442 }, { "epoch": 2.447073814508055, "grad_norm": 14.75, "learning_rate": 1.3293597000926185e-06, "loss": 1.4727728366851807, "step": 13444 }, { "epoch": 2.447437881132247, "grad_norm": 15.8125, "learning_rate": 1.3289386218663907e-06, "loss": 1.378298282623291, "step": 13446 }, { "epoch": 2.4478019477564397, "grad_norm": 18.25, "learning_rate": 1.3285177888616483e-06, "loss": 1.4411320686340332, "step": 13448 }, { "epoch": 2.4481660143806314, "grad_norm": 10.8125, "learning_rate": 1.328097201140146e-06, "loss": 1.1870819330215454, "step": 13450 }, { "epoch": 2.448530081004824, "grad_norm": 35.5, "learning_rate": 1.3276768587636037e-06, "loss": 1.219420313835144, "step": 13452 }, { "epoch": 2.4488941476290162, "grad_norm": 12.5625, "learning_rate": 1.3272567617937054e-06, "loss": 1.8146917819976807, "step": 13454 }, { "epoch": 2.4492582142532084, "grad_norm": 5.03125, "learning_rate": 1.326836910292098e-06, "loss": 1.1944459676742554, "step": 13456 }, { "epoch": 2.4496222808774006, "grad_norm": 10.625, "learning_rate": 1.3264173043203934e-06, "loss": 1.3364241123199463, "step": 13458 }, { "epoch": 2.449986347501593, "grad_norm": 6.9375, "learning_rate": 1.3259979439401671e-06, "loss": 1.3773140907287598, "step": 13460 }, { "epoch": 2.450350414125785, "grad_norm": 72.0, "learning_rate": 1.325578829212958e-06, "loss": 1.3781545162200928, "step": 13462 }, { "epoch": 2.450714480749977, "grad_norm": 23.875, "learning_rate": 1.3251599602002704e-06, "loss": 1.4792027473449707, "step": 13464 }, { "epoch": 2.4510785473741694, "grad_norm": 13.0, "learning_rate": 1.324741336963571e-06, "loss": 1.3898544311523438, "step": 13466 }, { "epoch": 2.4514426139983616, "grad_norm": 7.90625, "learning_rate": 1.3243229595642907e-06, "loss": 1.2882496118545532, "step": 13468 }, { "epoch": 2.451806680622554, "grad_norm": 14.5625, "learning_rate": 1.3239048280638255e-06, "loss": 1.4602361917495728, "step": 13470 }, { "epoch": 2.452170747246746, "grad_norm": 6.09375, "learning_rate": 1.323486942523534e-06, "loss": 1.4485259056091309, "step": 13472 }, { "epoch": 2.452534813870938, "grad_norm": 4.90625, "learning_rate": 1.3230693030047398e-06, "loss": 1.1378285884857178, "step": 13474 }, { "epoch": 2.4528988804951304, "grad_norm": 16.25, "learning_rate": 1.322651909568729e-06, "loss": 1.4630377292633057, "step": 13476 }, { "epoch": 2.453262947119323, "grad_norm": 4.15625, "learning_rate": 1.3222347622767529e-06, "loss": 1.0651907920837402, "step": 13478 }, { "epoch": 2.453627013743515, "grad_norm": 7.96875, "learning_rate": 1.321817861190026e-06, "loss": 1.4467761516571045, "step": 13480 }, { "epoch": 2.4539910803677074, "grad_norm": 9.4375, "learning_rate": 1.3214012063697268e-06, "loss": 1.0389844179153442, "step": 13482 }, { "epoch": 2.4543551469918996, "grad_norm": 50.25, "learning_rate": 1.320984797876998e-06, "loss": 0.5134612321853638, "step": 13484 }, { "epoch": 2.454719213616092, "grad_norm": 10.3125, "learning_rate": 1.3205686357729452e-06, "loss": 1.3701632022857666, "step": 13486 }, { "epoch": 2.455083280240284, "grad_norm": 15.8125, "learning_rate": 1.3201527201186396e-06, "loss": 1.4806716442108154, "step": 13488 }, { "epoch": 2.455447346864476, "grad_norm": 52.25, "learning_rate": 1.3197370509751143e-06, "loss": 1.6775561571121216, "step": 13490 }, { "epoch": 2.4558114134886684, "grad_norm": 17.375, "learning_rate": 1.3193216284033672e-06, "loss": 1.1514748334884644, "step": 13492 }, { "epoch": 2.4561754801128606, "grad_norm": 12.25, "learning_rate": 1.3189064524643597e-06, "loss": 1.6332601308822632, "step": 13494 }, { "epoch": 2.4565395467370528, "grad_norm": 10.875, "learning_rate": 1.3184915232190175e-06, "loss": 2.0473432540893555, "step": 13496 }, { "epoch": 2.456903613361245, "grad_norm": 105.5, "learning_rate": 1.31807684072823e-06, "loss": 1.3464536666870117, "step": 13498 }, { "epoch": 2.457267679985437, "grad_norm": 31.0, "learning_rate": 1.3176624050528498e-06, "loss": 1.6327688694000244, "step": 13500 }, { "epoch": 2.4576317466096294, "grad_norm": 23.625, "learning_rate": 1.3172482162536936e-06, "loss": 1.3721489906311035, "step": 13502 }, { "epoch": 2.457995813233822, "grad_norm": 7.0625, "learning_rate": 1.316834274391542e-06, "loss": 1.2554636001586914, "step": 13504 }, { "epoch": 2.458359879858014, "grad_norm": 7.46875, "learning_rate": 1.3164205795271397e-06, "loss": 1.1718668937683105, "step": 13506 }, { "epoch": 2.4587239464822064, "grad_norm": 4.375, "learning_rate": 1.3160071317211943e-06, "loss": 1.576371192932129, "step": 13508 }, { "epoch": 2.4590880131063986, "grad_norm": 12.875, "learning_rate": 1.3155939310343773e-06, "loss": 1.374147891998291, "step": 13510 }, { "epoch": 2.4594520797305908, "grad_norm": 6.5, "learning_rate": 1.315180977527325e-06, "loss": 1.2366358041763306, "step": 13512 }, { "epoch": 2.459816146354783, "grad_norm": 16.375, "learning_rate": 1.3147682712606364e-06, "loss": 1.3463104963302612, "step": 13514 }, { "epoch": 2.460180212978975, "grad_norm": 25.125, "learning_rate": 1.314355812294874e-06, "loss": 1.144848346710205, "step": 13516 }, { "epoch": 2.4605442796031674, "grad_norm": 15.25, "learning_rate": 1.3139436006905648e-06, "loss": 1.053707242012024, "step": 13518 }, { "epoch": 2.4609083462273595, "grad_norm": 17.25, "learning_rate": 1.3135316365081996e-06, "loss": 1.5874720811843872, "step": 13520 }, { "epoch": 2.4612724128515517, "grad_norm": 11.8125, "learning_rate": 1.3131199198082318e-06, "loss": 1.547202229499817, "step": 13522 }, { "epoch": 2.461636479475744, "grad_norm": 15.4375, "learning_rate": 1.3127084506510792e-06, "loss": 1.4658070802688599, "step": 13524 }, { "epoch": 2.462000546099936, "grad_norm": 8.4375, "learning_rate": 1.3122972290971239e-06, "loss": 1.4734885692596436, "step": 13526 }, { "epoch": 2.4623646127241283, "grad_norm": 8.25, "learning_rate": 1.3118862552067104e-06, "loss": 1.2806057929992676, "step": 13528 }, { "epoch": 2.462728679348321, "grad_norm": 8.75, "learning_rate": 1.311475529040148e-06, "loss": 1.4752672910690308, "step": 13530 }, { "epoch": 2.463092745972513, "grad_norm": 9.5, "learning_rate": 1.3110650506577083e-06, "loss": 1.4311320781707764, "step": 13532 }, { "epoch": 2.4634568125967053, "grad_norm": 26.125, "learning_rate": 1.310654820119628e-06, "loss": 1.4307315349578857, "step": 13534 }, { "epoch": 2.4638208792208975, "grad_norm": 8.5625, "learning_rate": 1.310244837486106e-06, "loss": 1.4179456233978271, "step": 13536 }, { "epoch": 2.4641849458450897, "grad_norm": 15.0625, "learning_rate": 1.3098351028173065e-06, "loss": 1.43107271194458, "step": 13538 }, { "epoch": 2.464549012469282, "grad_norm": 5.09375, "learning_rate": 1.309425616173356e-06, "loss": 1.3568236827850342, "step": 13540 }, { "epoch": 2.464913079093474, "grad_norm": 5.15625, "learning_rate": 1.309016377614345e-06, "loss": 1.3038747310638428, "step": 13542 }, { "epoch": 2.4652771457176663, "grad_norm": 12.5625, "learning_rate": 1.308607387200328e-06, "loss": 1.2644062042236328, "step": 13544 }, { "epoch": 2.4656412123418585, "grad_norm": 105.5, "learning_rate": 1.3081986449913218e-06, "loss": 2.071988582611084, "step": 13546 }, { "epoch": 2.4660052789660507, "grad_norm": 6.8125, "learning_rate": 1.3077901510473082e-06, "loss": 1.0824509859085083, "step": 13548 }, { "epoch": 2.466369345590243, "grad_norm": 11.9375, "learning_rate": 1.3073819054282322e-06, "loss": 1.4564602375030518, "step": 13550 }, { "epoch": 2.466733412214435, "grad_norm": 6.65625, "learning_rate": 1.306973908194002e-06, "loss": 1.32082998752594, "step": 13552 }, { "epoch": 2.4670974788386273, "grad_norm": 7.0625, "learning_rate": 1.3065661594044896e-06, "loss": 1.1079126596450806, "step": 13554 }, { "epoch": 2.46746154546282, "grad_norm": 81.0, "learning_rate": 1.3061586591195303e-06, "loss": 1.4088371992111206, "step": 13556 }, { "epoch": 2.4678256120870117, "grad_norm": 11.5, "learning_rate": 1.305751407398923e-06, "loss": 1.3359097242355347, "step": 13558 }, { "epoch": 2.4681896787112043, "grad_norm": 12.3125, "learning_rate": 1.305344404302431e-06, "loss": 1.022810697555542, "step": 13560 }, { "epoch": 2.4685537453353965, "grad_norm": 10.6875, "learning_rate": 1.3049376498897794e-06, "loss": 1.8778188228607178, "step": 13562 }, { "epoch": 2.4689178119595887, "grad_norm": 27.875, "learning_rate": 1.3045311442206585e-06, "loss": 1.6414706707000732, "step": 13564 }, { "epoch": 2.469281878583781, "grad_norm": 7.0, "learning_rate": 1.3041248873547208e-06, "loss": 1.1584283113479614, "step": 13566 }, { "epoch": 2.469645945207973, "grad_norm": 6.65625, "learning_rate": 1.3037188793515831e-06, "loss": 1.1896963119506836, "step": 13568 }, { "epoch": 2.4700100118321653, "grad_norm": 17.25, "learning_rate": 1.3033131202708257e-06, "loss": 1.256224274635315, "step": 13570 }, { "epoch": 2.4703740784563575, "grad_norm": 18.5, "learning_rate": 1.3029076101719917e-06, "loss": 1.3375985622406006, "step": 13572 }, { "epoch": 2.4707381450805497, "grad_norm": 19.625, "learning_rate": 1.3025023491145883e-06, "loss": 1.067662239074707, "step": 13574 }, { "epoch": 2.471102211704742, "grad_norm": 12.25, "learning_rate": 1.3020973371580855e-06, "loss": 1.5700691938400269, "step": 13576 }, { "epoch": 2.471466278328934, "grad_norm": 9.25, "learning_rate": 1.3016925743619176e-06, "loss": 1.3685240745544434, "step": 13578 }, { "epoch": 2.4718303449531263, "grad_norm": 8.1875, "learning_rate": 1.3012880607854816e-06, "loss": 1.2128101587295532, "step": 13580 }, { "epoch": 2.472194411577319, "grad_norm": 3.5625, "learning_rate": 1.3008837964881387e-06, "loss": 1.1041147708892822, "step": 13582 }, { "epoch": 2.4725584782015106, "grad_norm": 13.0625, "learning_rate": 1.3004797815292127e-06, "loss": 1.4154976606369019, "step": 13584 }, { "epoch": 2.4729225448257033, "grad_norm": 16.875, "learning_rate": 1.3000760159679911e-06, "loss": 1.392364501953125, "step": 13586 }, { "epoch": 2.4732866114498955, "grad_norm": 8.8125, "learning_rate": 1.2996724998637253e-06, "loss": 1.1981555223464966, "step": 13588 }, { "epoch": 2.4736506780740877, "grad_norm": 8.3125, "learning_rate": 1.299269233275629e-06, "loss": 1.2037687301635742, "step": 13590 }, { "epoch": 2.47401474469828, "grad_norm": 2.890625, "learning_rate": 1.2988662162628803e-06, "loss": 0.8786299228668213, "step": 13592 }, { "epoch": 2.474378811322472, "grad_norm": 10.6875, "learning_rate": 1.2984634488846204e-06, "loss": 0.37407782673835754, "step": 13594 }, { "epoch": 2.4747428779466643, "grad_norm": 12.4375, "learning_rate": 1.2980609311999535e-06, "loss": 0.9029257893562317, "step": 13596 }, { "epoch": 2.4751069445708564, "grad_norm": 7.0, "learning_rate": 1.2976586632679478e-06, "loss": 1.2732961177825928, "step": 13598 }, { "epoch": 2.4754710111950486, "grad_norm": 11.3125, "learning_rate": 1.297256645147634e-06, "loss": 1.5063642263412476, "step": 13600 }, { "epoch": 2.475835077819241, "grad_norm": 13.6875, "learning_rate": 1.2968548768980068e-06, "loss": 1.6139041185379028, "step": 13602 }, { "epoch": 2.476199144443433, "grad_norm": 9.125, "learning_rate": 1.2964533585780246e-06, "loss": 1.352710485458374, "step": 13604 }, { "epoch": 2.4765632110676252, "grad_norm": 3.234375, "learning_rate": 1.2960520902466077e-06, "loss": 0.8745146989822388, "step": 13606 }, { "epoch": 2.4769272776918174, "grad_norm": 8.3125, "learning_rate": 1.2956510719626413e-06, "loss": 1.3983957767486572, "step": 13608 }, { "epoch": 2.4772913443160096, "grad_norm": 13.5, "learning_rate": 1.2952503037849731e-06, "loss": 1.4222064018249512, "step": 13610 }, { "epoch": 2.4776554109402023, "grad_norm": 41.25, "learning_rate": 1.294849785772414e-06, "loss": 1.4276666641235352, "step": 13612 }, { "epoch": 2.4780194775643944, "grad_norm": 17.75, "learning_rate": 1.2944495179837383e-06, "loss": 1.8202000856399536, "step": 13614 }, { "epoch": 2.4783835441885866, "grad_norm": 11.6875, "learning_rate": 1.294049500477684e-06, "loss": 1.3318727016448975, "step": 13616 }, { "epoch": 2.478747610812779, "grad_norm": 7.0, "learning_rate": 1.2936497333129519e-06, "loss": 1.1300780773162842, "step": 13618 }, { "epoch": 2.479111677436971, "grad_norm": 26.875, "learning_rate": 1.2932502165482063e-06, "loss": 1.384740948677063, "step": 13620 }, { "epoch": 2.4794757440611632, "grad_norm": 10.9375, "learning_rate": 1.2928509502420745e-06, "loss": 1.481090784072876, "step": 13622 }, { "epoch": 2.4798398106853554, "grad_norm": 4.75, "learning_rate": 1.2924519344531472e-06, "loss": 1.0039970874786377, "step": 13624 }, { "epoch": 2.4802038773095476, "grad_norm": 4.03125, "learning_rate": 1.2920531692399781e-06, "loss": 0.9357774257659912, "step": 13626 }, { "epoch": 2.48056794393374, "grad_norm": 6.8125, "learning_rate": 1.2916546546610854e-06, "loss": 1.233557105064392, "step": 13628 }, { "epoch": 2.480932010557932, "grad_norm": 5.75, "learning_rate": 1.2912563907749483e-06, "loss": 1.4808787107467651, "step": 13630 }, { "epoch": 2.481296077182124, "grad_norm": 5.25, "learning_rate": 1.290858377640011e-06, "loss": 1.4696505069732666, "step": 13632 }, { "epoch": 2.4816601438063164, "grad_norm": 7.15625, "learning_rate": 1.2904606153146803e-06, "loss": 1.3675892353057861, "step": 13634 }, { "epoch": 2.4820242104305086, "grad_norm": 9.1875, "learning_rate": 1.2900631038573263e-06, "loss": 1.5780583620071411, "step": 13636 }, { "epoch": 2.482388277054701, "grad_norm": 10.75, "learning_rate": 1.2896658433262817e-06, "loss": 1.161513090133667, "step": 13638 }, { "epoch": 2.4827523436788934, "grad_norm": 8.8125, "learning_rate": 1.2892688337798438e-06, "loss": 1.2856149673461914, "step": 13640 }, { "epoch": 2.4831164103030856, "grad_norm": 14.1875, "learning_rate": 1.288872075276271e-06, "loss": 1.4421100616455078, "step": 13642 }, { "epoch": 2.483480476927278, "grad_norm": 15.5, "learning_rate": 1.2884755678737867e-06, "loss": 1.3901095390319824, "step": 13644 }, { "epoch": 2.48384454355147, "grad_norm": 30.0, "learning_rate": 1.2880793116305767e-06, "loss": 1.324196457862854, "step": 13646 }, { "epoch": 2.484208610175662, "grad_norm": 16.875, "learning_rate": 1.28768330660479e-06, "loss": 1.491485834121704, "step": 13648 }, { "epoch": 2.4845726767998544, "grad_norm": 17.25, "learning_rate": 1.2872875528545382e-06, "loss": 1.3070697784423828, "step": 13650 }, { "epoch": 2.4849367434240466, "grad_norm": 17.5, "learning_rate": 1.2868920504378973e-06, "loss": 1.5530606508255005, "step": 13652 }, { "epoch": 2.4853008100482388, "grad_norm": 19.625, "learning_rate": 1.2864967994129055e-06, "loss": 1.5536575317382812, "step": 13654 }, { "epoch": 2.485664876672431, "grad_norm": 17.625, "learning_rate": 1.286101799837564e-06, "loss": 1.6284408569335938, "step": 13656 }, { "epoch": 2.486028943296623, "grad_norm": 21.125, "learning_rate": 1.2857070517698378e-06, "loss": 1.4892117977142334, "step": 13658 }, { "epoch": 2.4863930099208154, "grad_norm": 14.75, "learning_rate": 1.285312555267654e-06, "loss": 1.4760229587554932, "step": 13660 }, { "epoch": 2.4867570765450075, "grad_norm": 18.5, "learning_rate": 1.2849183103889036e-06, "loss": 1.417324185371399, "step": 13662 }, { "epoch": 2.4871211431692, "grad_norm": 14.875, "learning_rate": 1.2845243171914408e-06, "loss": 1.4205467700958252, "step": 13664 }, { "epoch": 2.4874852097933924, "grad_norm": 22.0, "learning_rate": 1.2841305757330824e-06, "loss": 1.2719480991363525, "step": 13666 }, { "epoch": 2.4878492764175846, "grad_norm": 17.125, "learning_rate": 1.2837370860716081e-06, "loss": 1.2793290615081787, "step": 13668 }, { "epoch": 2.4882133430417768, "grad_norm": 13.9375, "learning_rate": 1.2833438482647608e-06, "loss": 0.9162566065788269, "step": 13670 }, { "epoch": 2.488577409665969, "grad_norm": 9.1875, "learning_rate": 1.2829508623702469e-06, "loss": 1.3726885318756104, "step": 13672 }, { "epoch": 2.488941476290161, "grad_norm": 31.875, "learning_rate": 1.2825581284457354e-06, "loss": 1.452609896659851, "step": 13674 }, { "epoch": 2.4893055429143534, "grad_norm": 27.0, "learning_rate": 1.2821656465488584e-06, "loss": 1.7444946765899658, "step": 13676 }, { "epoch": 2.4896696095385455, "grad_norm": 9.75, "learning_rate": 1.2817734167372105e-06, "loss": 1.543503761291504, "step": 13678 }, { "epoch": 2.4900336761627377, "grad_norm": 70.0, "learning_rate": 1.281381439068351e-06, "loss": 1.2844178676605225, "step": 13680 }, { "epoch": 2.49039774278693, "grad_norm": 17.5, "learning_rate": 1.2809897135998e-06, "loss": 0.8564822673797607, "step": 13682 }, { "epoch": 2.490761809411122, "grad_norm": 8.0625, "learning_rate": 1.280598240389042e-06, "loss": 1.3028268814086914, "step": 13684 }, { "epoch": 2.4911258760353143, "grad_norm": 19.625, "learning_rate": 1.2802070194935244e-06, "loss": 1.1162965297698975, "step": 13686 }, { "epoch": 2.4914899426595065, "grad_norm": 9.8125, "learning_rate": 1.2798160509706568e-06, "loss": 1.4655704498291016, "step": 13688 }, { "epoch": 2.491854009283699, "grad_norm": 38.0, "learning_rate": 1.2794253348778122e-06, "loss": 1.3618659973144531, "step": 13690 }, { "epoch": 2.492218075907891, "grad_norm": 8.25, "learning_rate": 1.279034871272327e-06, "loss": 1.208526372909546, "step": 13692 }, { "epoch": 2.4925821425320835, "grad_norm": 12.0625, "learning_rate": 1.2786446602114998e-06, "loss": 1.5387649536132812, "step": 13694 }, { "epoch": 2.4929462091562757, "grad_norm": 16.625, "learning_rate": 1.2782547017525928e-06, "loss": 1.4620836973190308, "step": 13696 }, { "epoch": 2.493310275780468, "grad_norm": 5.8125, "learning_rate": 1.277864995952831e-06, "loss": 1.17047917842865, "step": 13698 }, { "epoch": 2.49367434240466, "grad_norm": 12.9375, "learning_rate": 1.2774755428694017e-06, "loss": 1.5245435237884521, "step": 13700 }, { "epoch": 2.4940384090288523, "grad_norm": 16.75, "learning_rate": 1.2770863425594553e-06, "loss": 1.2447148561477661, "step": 13702 }, { "epoch": 2.4944024756530445, "grad_norm": 9.0625, "learning_rate": 1.2766973950801062e-06, "loss": 0.7250251770019531, "step": 13704 }, { "epoch": 2.4947665422772367, "grad_norm": 16.5, "learning_rate": 1.2763087004884303e-06, "loss": 1.4915473461151123, "step": 13706 }, { "epoch": 2.495130608901429, "grad_norm": 10.875, "learning_rate": 1.275920258841467e-06, "loss": 1.5500664710998535, "step": 13708 }, { "epoch": 2.495494675525621, "grad_norm": 6.84375, "learning_rate": 1.275532070196219e-06, "loss": 1.169064998626709, "step": 13710 }, { "epoch": 2.4958587421498133, "grad_norm": 16.625, "learning_rate": 1.2751441346096506e-06, "loss": 1.3000593185424805, "step": 13712 }, { "epoch": 2.4962228087740055, "grad_norm": 9.125, "learning_rate": 1.2747564521386905e-06, "loss": 1.241957664489746, "step": 13714 }, { "epoch": 2.4965868753981977, "grad_norm": 10.3125, "learning_rate": 1.2743690228402293e-06, "loss": 1.7536050081253052, "step": 13716 }, { "epoch": 2.49695094202239, "grad_norm": 30.25, "learning_rate": 1.2739818467711202e-06, "loss": 1.396217942237854, "step": 13718 }, { "epoch": 2.4973150086465825, "grad_norm": 9.5625, "learning_rate": 1.2735949239881807e-06, "loss": 1.2755478620529175, "step": 13720 }, { "epoch": 2.4976790752707747, "grad_norm": 3.609375, "learning_rate": 1.2732082545481892e-06, "loss": 1.0886292457580566, "step": 13722 }, { "epoch": 2.498043141894967, "grad_norm": 13.4375, "learning_rate": 1.2728218385078883e-06, "loss": 1.5048471689224243, "step": 13724 }, { "epoch": 2.498407208519159, "grad_norm": 17.0, "learning_rate": 1.2724356759239831e-06, "loss": 1.2710556983947754, "step": 13726 }, { "epoch": 2.4987712751433513, "grad_norm": 18.75, "learning_rate": 1.2720497668531409e-06, "loss": 1.5529038906097412, "step": 13728 }, { "epoch": 2.4991353417675435, "grad_norm": 8.0625, "learning_rate": 1.2716641113519932e-06, "loss": 1.694190263748169, "step": 13730 }, { "epoch": 2.4994994083917357, "grad_norm": 13.25, "learning_rate": 1.2712787094771326e-06, "loss": 1.212223768234253, "step": 13732 }, { "epoch": 2.499863475015928, "grad_norm": 29.75, "learning_rate": 1.2708935612851153e-06, "loss": 1.7869689464569092, "step": 13734 }, { "epoch": 2.50022754164012, "grad_norm": 17.0, "learning_rate": 1.2705086668324606e-06, "loss": 1.857490062713623, "step": 13736 }, { "epoch": 2.5005916082643123, "grad_norm": 11.875, "learning_rate": 1.2701240261756497e-06, "loss": 1.4428120851516724, "step": 13738 }, { "epoch": 2.5009556748885045, "grad_norm": 9.25, "learning_rate": 1.2697396393711281e-06, "loss": 1.1334147453308105, "step": 13740 }, { "epoch": 2.501319741512697, "grad_norm": 19.5, "learning_rate": 1.2693555064753016e-06, "loss": 1.4924687147140503, "step": 13742 }, { "epoch": 2.501683808136889, "grad_norm": 15.0, "learning_rate": 1.2689716275445413e-06, "loss": 1.585281491279602, "step": 13744 }, { "epoch": 2.5020478747610815, "grad_norm": 54.75, "learning_rate": 1.2685880026351793e-06, "loss": 1.6740095615386963, "step": 13746 }, { "epoch": 2.5024119413852732, "grad_norm": 7.1875, "learning_rate": 1.268204631803511e-06, "loss": 1.4591107368469238, "step": 13748 }, { "epoch": 2.502776008009466, "grad_norm": 5.90625, "learning_rate": 1.2678215151057946e-06, "loss": 1.0724551677703857, "step": 13750 }, { "epoch": 2.503140074633658, "grad_norm": 14.4375, "learning_rate": 1.267438652598251e-06, "loss": 1.232196569442749, "step": 13752 }, { "epoch": 2.5035041412578503, "grad_norm": 9.625, "learning_rate": 1.267056044337064e-06, "loss": 1.2873656749725342, "step": 13754 }, { "epoch": 2.5038682078820425, "grad_norm": 37.5, "learning_rate": 1.2666736903783789e-06, "loss": 1.29099440574646, "step": 13756 }, { "epoch": 2.5042322745062346, "grad_norm": 5.125, "learning_rate": 1.2662915907783056e-06, "loss": 1.1078013181686401, "step": 13758 }, { "epoch": 2.504596341130427, "grad_norm": 12.5, "learning_rate": 1.2659097455929147e-06, "loss": 1.460280179977417, "step": 13760 }, { "epoch": 2.504960407754619, "grad_norm": 13.25, "learning_rate": 1.2655281548782417e-06, "loss": 1.7749568223953247, "step": 13762 }, { "epoch": 2.5053244743788112, "grad_norm": 4.09375, "learning_rate": 1.2651468186902825e-06, "loss": 1.3660907745361328, "step": 13764 }, { "epoch": 2.5056885410030034, "grad_norm": 10.4375, "learning_rate": 1.2647657370849966e-06, "loss": 0.9901412129402161, "step": 13766 }, { "epoch": 2.5060526076271956, "grad_norm": 8.25, "learning_rate": 1.264384910118307e-06, "loss": 1.341814637184143, "step": 13768 }, { "epoch": 2.506416674251388, "grad_norm": 18.625, "learning_rate": 1.2640043378460975e-06, "loss": 1.4288243055343628, "step": 13770 }, { "epoch": 2.5067807408755804, "grad_norm": 9.8125, "learning_rate": 1.2636240203242164e-06, "loss": 1.400880217552185, "step": 13772 }, { "epoch": 2.507144807499772, "grad_norm": 9.5625, "learning_rate": 1.2632439576084735e-06, "loss": 1.1051479578018188, "step": 13774 }, { "epoch": 2.507508874123965, "grad_norm": 9.625, "learning_rate": 1.2628641497546412e-06, "loss": 1.2555122375488281, "step": 13776 }, { "epoch": 2.507872940748157, "grad_norm": 9.25, "learning_rate": 1.262484596818455e-06, "loss": 1.545544147491455, "step": 13778 }, { "epoch": 2.5082370073723492, "grad_norm": 9.5625, "learning_rate": 1.2621052988556127e-06, "loss": 1.566070795059204, "step": 13780 }, { "epoch": 2.5086010739965414, "grad_norm": 5.9375, "learning_rate": 1.2617262559217745e-06, "loss": 1.2859457731246948, "step": 13782 }, { "epoch": 2.5089651406207336, "grad_norm": 11.25, "learning_rate": 1.2613474680725635e-06, "loss": 1.496281623840332, "step": 13784 }, { "epoch": 2.509329207244926, "grad_norm": 24.0, "learning_rate": 1.2609689353635658e-06, "loss": 1.3814291954040527, "step": 13786 }, { "epoch": 2.509693273869118, "grad_norm": 8.375, "learning_rate": 1.2605906578503291e-06, "loss": 1.2263468503952026, "step": 13788 }, { "epoch": 2.51005734049331, "grad_norm": 15.3125, "learning_rate": 1.260212635588364e-06, "loss": 0.8674606084823608, "step": 13790 }, { "epoch": 2.5104214071175024, "grad_norm": 15.9375, "learning_rate": 1.259834868633144e-06, "loss": 1.3211150169372559, "step": 13792 }, { "epoch": 2.5107854737416946, "grad_norm": 15.3125, "learning_rate": 1.2594573570401047e-06, "loss": 1.525397539138794, "step": 13794 }, { "epoch": 2.511149540365887, "grad_norm": 28.5, "learning_rate": 1.2590801008646444e-06, "loss": 1.7968785762786865, "step": 13796 }, { "epoch": 2.5115136069900794, "grad_norm": 9.4375, "learning_rate": 1.2587031001621242e-06, "loss": 1.0394837856292725, "step": 13798 }, { "epoch": 2.511877673614271, "grad_norm": 7.1875, "learning_rate": 1.258326354987867e-06, "loss": 1.3745152950286865, "step": 13800 }, { "epoch": 2.512241740238464, "grad_norm": 11.0625, "learning_rate": 1.257949865397159e-06, "loss": 0.9093539714813232, "step": 13802 }, { "epoch": 2.512605806862656, "grad_norm": 14.625, "learning_rate": 1.257573631445248e-06, "loss": 0.9860128164291382, "step": 13804 }, { "epoch": 2.512969873486848, "grad_norm": 17.0, "learning_rate": 1.2571976531873453e-06, "loss": 1.7929331064224243, "step": 13806 }, { "epoch": 2.5133339401110404, "grad_norm": 27.75, "learning_rate": 1.2568219306786243e-06, "loss": 1.7403910160064697, "step": 13808 }, { "epoch": 2.5136980067352326, "grad_norm": 21.25, "learning_rate": 1.2564464639742203e-06, "loss": 2.1030211448669434, "step": 13810 }, { "epoch": 2.5140620733594248, "grad_norm": 12.5625, "learning_rate": 1.2560712531292315e-06, "loss": 2.1319241523742676, "step": 13812 }, { "epoch": 2.514426139983617, "grad_norm": 37.75, "learning_rate": 1.2556962981987188e-06, "loss": 1.795989751815796, "step": 13814 }, { "epoch": 2.514790206607809, "grad_norm": 20.375, "learning_rate": 1.2553215992377054e-06, "loss": 1.5725948810577393, "step": 13816 }, { "epoch": 2.5151542732320014, "grad_norm": 15.5, "learning_rate": 1.254947156301177e-06, "loss": 0.5411054491996765, "step": 13818 }, { "epoch": 2.5155183398561936, "grad_norm": 13.9375, "learning_rate": 1.254572969444081e-06, "loss": 1.4140441417694092, "step": 13820 }, { "epoch": 2.5158824064803857, "grad_norm": 12.5625, "learning_rate": 1.2541990387213285e-06, "loss": 1.4466546773910522, "step": 13822 }, { "epoch": 2.5162464731045784, "grad_norm": 9.3125, "learning_rate": 1.2538253641877915e-06, "loss": 1.5120737552642822, "step": 13824 }, { "epoch": 2.51661053972877, "grad_norm": 18.125, "learning_rate": 1.253451945898306e-06, "loss": 1.521331548690796, "step": 13826 }, { "epoch": 2.5169746063529628, "grad_norm": 25.125, "learning_rate": 1.2530787839076692e-06, "loss": 0.9347392320632935, "step": 13828 }, { "epoch": 2.517338672977155, "grad_norm": 13.125, "learning_rate": 1.252705878270641e-06, "loss": 1.5979176759719849, "step": 13830 }, { "epoch": 2.517702739601347, "grad_norm": 5.59375, "learning_rate": 1.2523332290419442e-06, "loss": 1.349301815032959, "step": 13832 }, { "epoch": 2.5180668062255394, "grad_norm": 31.875, "learning_rate": 1.2519608362762637e-06, "loss": 1.4526147842407227, "step": 13834 }, { "epoch": 2.5184308728497315, "grad_norm": 9.6875, "learning_rate": 1.2515887000282457e-06, "loss": 0.9883827567100525, "step": 13836 }, { "epoch": 2.5187949394739237, "grad_norm": 19.625, "learning_rate": 1.2512168203525008e-06, "loss": 1.437248706817627, "step": 13838 }, { "epoch": 2.519159006098116, "grad_norm": 9.5625, "learning_rate": 1.2508451973035998e-06, "loss": 1.451150894165039, "step": 13840 }, { "epoch": 2.519523072722308, "grad_norm": 5.34375, "learning_rate": 1.2504738309360776e-06, "loss": 1.0233227014541626, "step": 13842 }, { "epoch": 2.5198871393465003, "grad_norm": 20.375, "learning_rate": 1.2501027213044306e-06, "loss": 1.3571885824203491, "step": 13844 }, { "epoch": 2.5202512059706925, "grad_norm": 11.1875, "learning_rate": 1.2497318684631174e-06, "loss": 1.1320991516113281, "step": 13846 }, { "epoch": 2.5206152725948847, "grad_norm": 24.75, "learning_rate": 1.2493612724665593e-06, "loss": 1.600059986114502, "step": 13848 }, { "epoch": 2.5209793392190774, "grad_norm": 11.3125, "learning_rate": 1.24899093336914e-06, "loss": 1.5025813579559326, "step": 13850 }, { "epoch": 2.521343405843269, "grad_norm": 16.375, "learning_rate": 1.2486208512252048e-06, "loss": 1.5578429698944092, "step": 13852 }, { "epoch": 2.5217074724674617, "grad_norm": 13.6875, "learning_rate": 1.248251026089062e-06, "loss": 1.6880372762680054, "step": 13854 }, { "epoch": 2.522071539091654, "grad_norm": 13.25, "learning_rate": 1.247881458014982e-06, "loss": 1.8128540515899658, "step": 13856 }, { "epoch": 2.522435605715846, "grad_norm": 12.8125, "learning_rate": 1.2475121470571972e-06, "loss": 1.4829449653625488, "step": 13858 }, { "epoch": 2.5227996723400383, "grad_norm": 66.5, "learning_rate": 1.2471430932699024e-06, "loss": 1.5019872188568115, "step": 13860 }, { "epoch": 2.5231637389642305, "grad_norm": 9.6875, "learning_rate": 1.2467742967072556e-06, "loss": 1.5482354164123535, "step": 13862 }, { "epoch": 2.5235278055884227, "grad_norm": 68.0, "learning_rate": 1.2464057574233749e-06, "loss": 1.4510550498962402, "step": 13864 }, { "epoch": 2.523891872212615, "grad_norm": 19.375, "learning_rate": 1.2460374754723427e-06, "loss": 1.0535571575164795, "step": 13866 }, { "epoch": 2.524255938836807, "grad_norm": 23.375, "learning_rate": 1.245669450908203e-06, "loss": 1.9020047187805176, "step": 13868 }, { "epoch": 2.5246200054609993, "grad_norm": 16.625, "learning_rate": 1.2453016837849618e-06, "loss": 1.8082274198532104, "step": 13870 }, { "epoch": 2.5249840720851915, "grad_norm": 17.875, "learning_rate": 1.244934174156587e-06, "loss": 1.144162654876709, "step": 13872 }, { "epoch": 2.5253481387093837, "grad_norm": 11.4375, "learning_rate": 1.2445669220770097e-06, "loss": 1.4965258836746216, "step": 13874 }, { "epoch": 2.5257122053335763, "grad_norm": 35.0, "learning_rate": 1.2441999276001226e-06, "loss": 1.5621570348739624, "step": 13876 }, { "epoch": 2.526076271957768, "grad_norm": 5.90625, "learning_rate": 1.2438331907797802e-06, "loss": 1.2954308986663818, "step": 13878 }, { "epoch": 2.5264403385819607, "grad_norm": 17.75, "learning_rate": 1.2434667116697999e-06, "loss": 1.0212547779083252, "step": 13880 }, { "epoch": 2.5268044052061525, "grad_norm": 290.0, "learning_rate": 1.243100490323961e-06, "loss": 1.0245438814163208, "step": 13882 }, { "epoch": 2.527168471830345, "grad_norm": 6.1875, "learning_rate": 1.2427345267960054e-06, "loss": 1.3658027648925781, "step": 13884 }, { "epoch": 2.5275325384545373, "grad_norm": 9.8125, "learning_rate": 1.2423688211396362e-06, "loss": 1.2823487520217896, "step": 13886 }, { "epoch": 2.5278966050787295, "grad_norm": 15.3125, "learning_rate": 1.2420033734085193e-06, "loss": 1.4981389045715332, "step": 13888 }, { "epoch": 2.5282606717029217, "grad_norm": 21.625, "learning_rate": 1.241638183656283e-06, "loss": 1.4964405298233032, "step": 13890 }, { "epoch": 2.528624738327114, "grad_norm": 15.4375, "learning_rate": 1.2412732519365173e-06, "loss": 1.746260404586792, "step": 13892 }, { "epoch": 2.528988804951306, "grad_norm": 21.625, "learning_rate": 1.2409085783027743e-06, "loss": 1.6342716217041016, "step": 13894 }, { "epoch": 2.5293528715754983, "grad_norm": 23.75, "learning_rate": 1.2405441628085685e-06, "loss": 1.8806421756744385, "step": 13896 }, { "epoch": 2.5297169381996905, "grad_norm": 12.75, "learning_rate": 1.2401800055073763e-06, "loss": 1.5460951328277588, "step": 13898 }, { "epoch": 2.5300810048238827, "grad_norm": 9.25, "learning_rate": 1.2398161064526366e-06, "loss": 1.4257985353469849, "step": 13900 }, { "epoch": 2.530445071448075, "grad_norm": 10.625, "learning_rate": 1.2394524656977493e-06, "loss": 1.4062708616256714, "step": 13902 }, { "epoch": 2.530809138072267, "grad_norm": 14.375, "learning_rate": 1.2390890832960783e-06, "loss": 1.4012010097503662, "step": 13904 }, { "epoch": 2.5311732046964597, "grad_norm": 10.8125, "learning_rate": 1.2387259593009478e-06, "loss": 1.4376744031906128, "step": 13906 }, { "epoch": 2.5315372713206514, "grad_norm": 7.96875, "learning_rate": 1.2383630937656449e-06, "loss": 1.4925472736358643, "step": 13908 }, { "epoch": 2.531901337944844, "grad_norm": 13.8125, "learning_rate": 1.2380004867434187e-06, "loss": 1.521460771560669, "step": 13910 }, { "epoch": 2.5322654045690363, "grad_norm": 14.8125, "learning_rate": 1.2376381382874805e-06, "loss": 1.5296789407730103, "step": 13912 }, { "epoch": 2.5326294711932285, "grad_norm": 3.96875, "learning_rate": 1.2372760484510033e-06, "loss": 0.8247794508934021, "step": 13914 }, { "epoch": 2.5329935378174206, "grad_norm": 17.5, "learning_rate": 1.2369142172871221e-06, "loss": 0.3369797468185425, "step": 13916 }, { "epoch": 2.533357604441613, "grad_norm": 13.375, "learning_rate": 1.2365526448489342e-06, "loss": 0.5634779334068298, "step": 13918 }, { "epoch": 2.533721671065805, "grad_norm": 12.125, "learning_rate": 1.2361913311894994e-06, "loss": 0.6927357912063599, "step": 13920 }, { "epoch": 2.5340857376899972, "grad_norm": 6.8125, "learning_rate": 1.2358302763618385e-06, "loss": 1.1136428117752075, "step": 13922 }, { "epoch": 2.5344498043141894, "grad_norm": 10.5, "learning_rate": 1.235469480418935e-06, "loss": 1.3693249225616455, "step": 13924 }, { "epoch": 2.5348138709383816, "grad_norm": 20.875, "learning_rate": 1.2351089434137343e-06, "loss": 1.448167324066162, "step": 13926 }, { "epoch": 2.535177937562574, "grad_norm": 22.125, "learning_rate": 1.2347486653991436e-06, "loss": 1.4910672903060913, "step": 13928 }, { "epoch": 2.535542004186766, "grad_norm": 22.0, "learning_rate": 1.234388646428032e-06, "loss": 1.5934560298919678, "step": 13930 }, { "epoch": 2.5359060708109586, "grad_norm": 7.84375, "learning_rate": 1.2340288865532319e-06, "loss": 1.4249051809310913, "step": 13932 }, { "epoch": 2.5362701374351504, "grad_norm": 20.625, "learning_rate": 1.233669385827535e-06, "loss": 1.6374164819717407, "step": 13934 }, { "epoch": 2.536634204059343, "grad_norm": 9.0625, "learning_rate": 1.233310144303698e-06, "loss": 1.2517423629760742, "step": 13936 }, { "epoch": 2.5369982706835352, "grad_norm": 17.25, "learning_rate": 1.2329511620344374e-06, "loss": 1.587918758392334, "step": 13938 }, { "epoch": 2.5373623373077274, "grad_norm": 7.90625, "learning_rate": 1.2325924390724326e-06, "loss": 1.4089986085891724, "step": 13940 }, { "epoch": 2.5377264039319196, "grad_norm": 30.0, "learning_rate": 1.2322339754703245e-06, "loss": 1.6078845262527466, "step": 13942 }, { "epoch": 2.538090470556112, "grad_norm": 47.0, "learning_rate": 1.2318757712807164e-06, "loss": 1.6663182973861694, "step": 13944 }, { "epoch": 2.538454537180304, "grad_norm": 10.3125, "learning_rate": 1.2315178265561733e-06, "loss": 1.4863834381103516, "step": 13946 }, { "epoch": 2.538818603804496, "grad_norm": 7.03125, "learning_rate": 1.2311601413492223e-06, "loss": 1.3234306573867798, "step": 13948 }, { "epoch": 2.5391826704286884, "grad_norm": 13.6875, "learning_rate": 1.2308027157123519e-06, "loss": 1.421712040901184, "step": 13950 }, { "epoch": 2.5395467370528806, "grad_norm": 14.5, "learning_rate": 1.2304455496980132e-06, "loss": 1.4140093326568604, "step": 13952 }, { "epoch": 2.539910803677073, "grad_norm": 8.6875, "learning_rate": 1.2300886433586186e-06, "loss": 1.1182925701141357, "step": 13954 }, { "epoch": 2.540274870301265, "grad_norm": 256.0, "learning_rate": 1.2297319967465427e-06, "loss": 1.3109902143478394, "step": 13956 }, { "epoch": 2.5406389369254576, "grad_norm": 15.875, "learning_rate": 1.2293756099141222e-06, "loss": 1.7431678771972656, "step": 13958 }, { "epoch": 2.5410030035496494, "grad_norm": 12.3125, "learning_rate": 1.229019482913655e-06, "loss": 1.6153838634490967, "step": 13960 }, { "epoch": 2.541367070173842, "grad_norm": 21.625, "learning_rate": 1.2286636157974017e-06, "loss": 1.7951079607009888, "step": 13962 }, { "epoch": 2.541731136798034, "grad_norm": 6.0625, "learning_rate": 1.2283080086175842e-06, "loss": 1.4971402883529663, "step": 13964 }, { "epoch": 2.5420952034222264, "grad_norm": 9.5, "learning_rate": 1.2279526614263863e-06, "loss": 1.4799537658691406, "step": 13966 }, { "epoch": 2.5424592700464186, "grad_norm": 15.9375, "learning_rate": 1.2275975742759538e-06, "loss": 1.5392813682556152, "step": 13968 }, { "epoch": 2.542823336670611, "grad_norm": 20.125, "learning_rate": 1.2272427472183944e-06, "loss": 1.6199944019317627, "step": 13970 }, { "epoch": 2.543187403294803, "grad_norm": 39.0, "learning_rate": 1.2268881803057776e-06, "loss": 2.1201658248901367, "step": 13972 }, { "epoch": 2.543551469918995, "grad_norm": 24.875, "learning_rate": 1.2265338735901344e-06, "loss": 1.7551052570343018, "step": 13974 }, { "epoch": 2.5439155365431874, "grad_norm": 8.4375, "learning_rate": 1.2261798271234582e-06, "loss": 1.130300760269165, "step": 13976 }, { "epoch": 2.5442796031673796, "grad_norm": 11.625, "learning_rate": 1.2258260409577035e-06, "loss": 1.527069330215454, "step": 13978 }, { "epoch": 2.5446436697915717, "grad_norm": 9.4375, "learning_rate": 1.2254725151447873e-06, "loss": 1.410813331604004, "step": 13980 }, { "epoch": 2.545007736415764, "grad_norm": 11.0, "learning_rate": 1.2251192497365879e-06, "loss": 1.1892207860946655, "step": 13982 }, { "epoch": 2.5453718030399566, "grad_norm": 14.5, "learning_rate": 1.2247662447849457e-06, "loss": 0.37004998326301575, "step": 13984 }, { "epoch": 2.5457358696641483, "grad_norm": 8.5625, "learning_rate": 1.2244135003416627e-06, "loss": 1.1805295944213867, "step": 13986 }, { "epoch": 2.546099936288341, "grad_norm": 10.125, "learning_rate": 1.2240610164585028e-06, "loss": 1.364537239074707, "step": 13988 }, { "epoch": 2.5464640029125327, "grad_norm": 10.0625, "learning_rate": 1.2237087931871912e-06, "loss": 1.3939199447631836, "step": 13990 }, { "epoch": 2.5468280695367254, "grad_norm": 14.8125, "learning_rate": 1.2233568305794158e-06, "loss": 1.6082172393798828, "step": 13992 }, { "epoch": 2.5471921361609176, "grad_norm": 13.5, "learning_rate": 1.2230051286868253e-06, "loss": 1.3050611019134521, "step": 13994 }, { "epoch": 2.5475562027851097, "grad_norm": 10.3125, "learning_rate": 1.2226536875610306e-06, "loss": 0.8881387114524841, "step": 13996 }, { "epoch": 2.547920269409302, "grad_norm": 6.375, "learning_rate": 1.2223025072536042e-06, "loss": 1.3263578414916992, "step": 13998 }, { "epoch": 2.548284336033494, "grad_norm": 6.28125, "learning_rate": 1.2219515878160806e-06, "loss": 1.412998914718628, "step": 14000 }, { "epoch": 2.5486484026576863, "grad_norm": 5.46875, "learning_rate": 1.221600929299956e-06, "loss": 1.2710697650909424, "step": 14002 }, { "epoch": 2.5490124692818785, "grad_norm": 8.75, "learning_rate": 1.2212505317566872e-06, "loss": 1.3614611625671387, "step": 14004 }, { "epoch": 2.5493765359060707, "grad_norm": 9.6875, "learning_rate": 1.2209003952376947e-06, "loss": 1.9486970901489258, "step": 14006 }, { "epoch": 2.549740602530263, "grad_norm": 7.5, "learning_rate": 1.220550519794359e-06, "loss": 1.3704783916473389, "step": 14008 }, { "epoch": 2.550104669154455, "grad_norm": 13.3125, "learning_rate": 1.2202009054780228e-06, "loss": 1.4659056663513184, "step": 14010 }, { "epoch": 2.5504687357786473, "grad_norm": 5.40625, "learning_rate": 1.2198515523399912e-06, "loss": 1.341232180595398, "step": 14012 }, { "epoch": 2.55083280240284, "grad_norm": 15.5625, "learning_rate": 1.2195024604315298e-06, "loss": 1.6684938669204712, "step": 14014 }, { "epoch": 2.5511968690270317, "grad_norm": 6.5625, "learning_rate": 1.2191536298038664e-06, "loss": 1.3729336261749268, "step": 14016 }, { "epoch": 2.5515609356512243, "grad_norm": 11.375, "learning_rate": 1.218805060508191e-06, "loss": 1.0623828172683716, "step": 14018 }, { "epoch": 2.5519250022754165, "grad_norm": 7.8125, "learning_rate": 1.2184567525956544e-06, "loss": 1.0805500745773315, "step": 14020 }, { "epoch": 2.5522890688996087, "grad_norm": 14.25, "learning_rate": 1.2181087061173694e-06, "loss": 1.6687734127044678, "step": 14022 }, { "epoch": 2.552653135523801, "grad_norm": 20.75, "learning_rate": 1.2177609211244101e-06, "loss": 1.6037479639053345, "step": 14024 }, { "epoch": 2.553017202147993, "grad_norm": 8.25, "learning_rate": 1.2174133976678133e-06, "loss": 1.4225633144378662, "step": 14026 }, { "epoch": 2.5533812687721853, "grad_norm": 10.875, "learning_rate": 1.2170661357985757e-06, "loss": 1.4339572191238403, "step": 14028 }, { "epoch": 2.5537453353963775, "grad_norm": 53.5, "learning_rate": 1.2167191355676573e-06, "loss": 1.2567026615142822, "step": 14030 }, { "epoch": 2.5541094020205697, "grad_norm": 12.6875, "learning_rate": 1.216372397025979e-06, "loss": 0.9043101072311401, "step": 14032 }, { "epoch": 2.554473468644762, "grad_norm": 9.625, "learning_rate": 1.2160259202244227e-06, "loss": 1.0356941223144531, "step": 14034 }, { "epoch": 2.554837535268954, "grad_norm": 25.125, "learning_rate": 1.215679705213833e-06, "loss": 1.522454857826233, "step": 14036 }, { "epoch": 2.5552016018931463, "grad_norm": 12.4375, "learning_rate": 1.2153337520450154e-06, "loss": 1.6948034763336182, "step": 14038 }, { "epoch": 2.555565668517339, "grad_norm": 13.75, "learning_rate": 1.214988060768737e-06, "loss": 1.432172179222107, "step": 14040 }, { "epoch": 2.5559297351415307, "grad_norm": 10.625, "learning_rate": 1.2146426314357262e-06, "loss": 1.0863529443740845, "step": 14042 }, { "epoch": 2.5562938017657233, "grad_norm": 7.5625, "learning_rate": 1.2142974640966743e-06, "loss": 0.9806747436523438, "step": 14044 }, { "epoch": 2.5566578683899155, "grad_norm": 100.5, "learning_rate": 1.2139525588022325e-06, "loss": 0.7954238057136536, "step": 14046 }, { "epoch": 2.5570219350141077, "grad_norm": 8.5625, "learning_rate": 1.2136079156030148e-06, "loss": 0.9820431470870972, "step": 14048 }, { "epoch": 2.5573860016383, "grad_norm": 35.25, "learning_rate": 1.2132635345495956e-06, "loss": 1.075451374053955, "step": 14050 }, { "epoch": 2.557750068262492, "grad_norm": 17.375, "learning_rate": 1.2129194156925118e-06, "loss": 1.5588490962982178, "step": 14052 }, { "epoch": 2.5581141348866843, "grad_norm": 19.375, "learning_rate": 1.2125755590822613e-06, "loss": 1.5445644855499268, "step": 14054 }, { "epoch": 2.5584782015108765, "grad_norm": 27.375, "learning_rate": 1.2122319647693036e-06, "loss": 1.5661966800689697, "step": 14056 }, { "epoch": 2.5588422681350687, "grad_norm": 11.6875, "learning_rate": 1.21188863280406e-06, "loss": 1.3662261962890625, "step": 14058 }, { "epoch": 2.559206334759261, "grad_norm": 21.5, "learning_rate": 1.211545563236913e-06, "loss": 1.576395869255066, "step": 14060 }, { "epoch": 2.559570401383453, "grad_norm": 11.0, "learning_rate": 1.2112027561182066e-06, "loss": 1.4173095226287842, "step": 14062 }, { "epoch": 2.5599344680076452, "grad_norm": 15.8125, "learning_rate": 1.2108602114982463e-06, "loss": 2.094416379928589, "step": 14064 }, { "epoch": 2.560298534631838, "grad_norm": 9.4375, "learning_rate": 1.2105179294272993e-06, "loss": 1.495354413986206, "step": 14066 }, { "epoch": 2.5606626012560296, "grad_norm": 24.875, "learning_rate": 1.2101759099555938e-06, "loss": 1.4475536346435547, "step": 14068 }, { "epoch": 2.5610266678802223, "grad_norm": 16.875, "learning_rate": 1.2098341531333202e-06, "loss": 1.4381740093231201, "step": 14070 }, { "epoch": 2.5613907345044145, "grad_norm": 7.6875, "learning_rate": 1.2094926590106298e-06, "loss": 1.404022455215454, "step": 14072 }, { "epoch": 2.5617548011286067, "grad_norm": 12.5, "learning_rate": 1.2091514276376355e-06, "loss": 0.83309006690979, "step": 14074 }, { "epoch": 2.562118867752799, "grad_norm": 7.71875, "learning_rate": 1.208810459064411e-06, "loss": 0.46824920177459717, "step": 14076 }, { "epoch": 2.562482934376991, "grad_norm": 16.25, "learning_rate": 1.2084697533409928e-06, "loss": 1.586581826210022, "step": 14078 }, { "epoch": 2.5628470010011832, "grad_norm": 100.5, "learning_rate": 1.2081293105173783e-06, "loss": 1.2888014316558838, "step": 14080 }, { "epoch": 2.5632110676253754, "grad_norm": 4.53125, "learning_rate": 1.207789130643525e-06, "loss": 1.4294322729110718, "step": 14082 }, { "epoch": 2.5635751342495676, "grad_norm": 8.75, "learning_rate": 1.2074492137693539e-06, "loss": 1.0541068315505981, "step": 14084 }, { "epoch": 2.56393920087376, "grad_norm": 7.6875, "learning_rate": 1.2071095599447464e-06, "loss": 1.5774511098861694, "step": 14086 }, { "epoch": 2.564303267497952, "grad_norm": 4.90625, "learning_rate": 1.2067701692195443e-06, "loss": 1.4180166721343994, "step": 14088 }, { "epoch": 2.564667334122144, "grad_norm": 9.4375, "learning_rate": 1.2064310416435532e-06, "loss": 1.1897733211517334, "step": 14090 }, { "epoch": 2.565031400746337, "grad_norm": 7.1875, "learning_rate": 1.2060921772665378e-06, "loss": 1.3457527160644531, "step": 14092 }, { "epoch": 2.5653954673705286, "grad_norm": 22.125, "learning_rate": 1.2057535761382253e-06, "loss": 1.5147616863250732, "step": 14094 }, { "epoch": 2.5657595339947212, "grad_norm": 16.125, "learning_rate": 1.205415238308304e-06, "loss": 1.2961853742599487, "step": 14096 }, { "epoch": 2.5661236006189134, "grad_norm": 23.5, "learning_rate": 1.205077163826424e-06, "loss": 1.6319491863250732, "step": 14098 }, { "epoch": 2.5664876672431056, "grad_norm": 21.0, "learning_rate": 1.2047393527421956e-06, "loss": 1.7514996528625488, "step": 14100 }, { "epoch": 2.566851733867298, "grad_norm": 28.125, "learning_rate": 1.204401805105192e-06, "loss": 1.10114324092865, "step": 14102 }, { "epoch": 2.56721580049149, "grad_norm": 7.34375, "learning_rate": 1.2040645209649462e-06, "loss": 1.4271674156188965, "step": 14104 }, { "epoch": 2.567579867115682, "grad_norm": 7.75, "learning_rate": 1.2037275003709539e-06, "loss": 1.4360320568084717, "step": 14106 }, { "epoch": 2.5679439337398744, "grad_norm": 6.1875, "learning_rate": 1.203390743372671e-06, "loss": 1.0118153095245361, "step": 14108 }, { "epoch": 2.5683080003640666, "grad_norm": 6.09375, "learning_rate": 1.2030542500195159e-06, "loss": 1.2163078784942627, "step": 14110 }, { "epoch": 2.568672066988259, "grad_norm": 28.875, "learning_rate": 1.2027180203608668e-06, "loss": 1.2504976987838745, "step": 14112 }, { "epoch": 2.569036133612451, "grad_norm": 21.125, "learning_rate": 1.2023820544460647e-06, "loss": 1.2607107162475586, "step": 14114 }, { "epoch": 2.569400200236643, "grad_norm": 7.09375, "learning_rate": 1.2020463523244112e-06, "loss": 1.0573214292526245, "step": 14116 }, { "epoch": 2.5697642668608354, "grad_norm": 13.375, "learning_rate": 1.2017109140451687e-06, "loss": 1.6065982580184937, "step": 14118 }, { "epoch": 2.5701283334850276, "grad_norm": 18.25, "learning_rate": 1.2013757396575619e-06, "loss": 0.9907183647155762, "step": 14120 }, { "epoch": 2.57049240010922, "grad_norm": 11.5, "learning_rate": 1.201040829210776e-06, "loss": 1.2247848510742188, "step": 14122 }, { "epoch": 2.570856466733412, "grad_norm": 6.21875, "learning_rate": 1.200706182753958e-06, "loss": 1.1839265823364258, "step": 14124 }, { "epoch": 2.5712205333576046, "grad_norm": 20.125, "learning_rate": 1.2003718003362155e-06, "loss": 1.9839324951171875, "step": 14126 }, { "epoch": 2.571584599981797, "grad_norm": 9.875, "learning_rate": 1.2000376820066183e-06, "loss": 1.9553616046905518, "step": 14128 }, { "epoch": 2.571948666605989, "grad_norm": 3.1875, "learning_rate": 1.1997038278141966e-06, "loss": 1.0969654321670532, "step": 14130 }, { "epoch": 2.572312733230181, "grad_norm": 8.8125, "learning_rate": 1.1993702378079422e-06, "loss": 1.151517391204834, "step": 14132 }, { "epoch": 2.5726767998543734, "grad_norm": 15.625, "learning_rate": 1.1990369120368082e-06, "loss": 1.244920253753662, "step": 14134 }, { "epoch": 2.5730408664785656, "grad_norm": 12.375, "learning_rate": 1.1987038505497088e-06, "loss": 1.6495258808135986, "step": 14136 }, { "epoch": 2.5734049331027578, "grad_norm": 10.875, "learning_rate": 1.198371053395519e-06, "loss": 1.46974778175354, "step": 14138 }, { "epoch": 2.57376899972695, "grad_norm": 3.828125, "learning_rate": 1.198038520623076e-06, "loss": 1.18740975856781, "step": 14140 }, { "epoch": 2.574133066351142, "grad_norm": 16.25, "learning_rate": 1.1977062522811768e-06, "loss": 0.8307124972343445, "step": 14142 }, { "epoch": 2.5744971329753343, "grad_norm": 19.5, "learning_rate": 1.1973742484185818e-06, "loss": 1.0607904195785522, "step": 14144 }, { "epoch": 2.5748611995995265, "grad_norm": 7.90625, "learning_rate": 1.1970425090840099e-06, "loss": 1.2869579792022705, "step": 14146 }, { "epoch": 2.575225266223719, "grad_norm": 9.5, "learning_rate": 1.1967110343261434e-06, "loss": 1.5717499256134033, "step": 14148 }, { "epoch": 2.575589332847911, "grad_norm": 31.0, "learning_rate": 1.1963798241936245e-06, "loss": 1.4820665121078491, "step": 14150 }, { "epoch": 2.5759533994721036, "grad_norm": 32.75, "learning_rate": 1.1960488787350566e-06, "loss": 0.6237311363220215, "step": 14152 }, { "epoch": 2.5763174660962957, "grad_norm": 11.25, "learning_rate": 1.1957181979990054e-06, "loss": 1.5681191682815552, "step": 14154 }, { "epoch": 2.576681532720488, "grad_norm": 9.3125, "learning_rate": 1.1953877820339965e-06, "loss": 1.417885422706604, "step": 14156 }, { "epoch": 2.57704559934468, "grad_norm": 6.75, "learning_rate": 1.1950576308885166e-06, "loss": 1.273742914199829, "step": 14158 }, { "epoch": 2.5774096659688723, "grad_norm": 8.875, "learning_rate": 1.194727744611015e-06, "loss": 1.3932478427886963, "step": 14160 }, { "epoch": 2.5777737325930645, "grad_norm": 14.375, "learning_rate": 1.1943981232499008e-06, "loss": 1.3501145839691162, "step": 14162 }, { "epoch": 2.5781377992172567, "grad_norm": 12.5625, "learning_rate": 1.1940687668535444e-06, "loss": 1.8123904466629028, "step": 14164 }, { "epoch": 2.578501865841449, "grad_norm": 12.375, "learning_rate": 1.1937396754702777e-06, "loss": 1.1665410995483398, "step": 14166 }, { "epoch": 2.578865932465641, "grad_norm": 11.9375, "learning_rate": 1.1934108491483938e-06, "loss": 1.5544352531433105, "step": 14168 }, { "epoch": 2.5792299990898333, "grad_norm": 16.25, "learning_rate": 1.1930822879361458e-06, "loss": 1.2436751127243042, "step": 14170 }, { "epoch": 2.5795940657140255, "grad_norm": 5.8125, "learning_rate": 1.1927539918817496e-06, "loss": 1.2847809791564941, "step": 14172 }, { "epoch": 2.579958132338218, "grad_norm": 3.625, "learning_rate": 1.1924259610333806e-06, "loss": 1.0977860689163208, "step": 14174 }, { "epoch": 2.58032219896241, "grad_norm": 19.625, "learning_rate": 1.192098195439177e-06, "loss": 1.4552431106567383, "step": 14176 }, { "epoch": 2.5806862655866025, "grad_norm": 6.46875, "learning_rate": 1.1917706951472358e-06, "loss": 1.06253182888031, "step": 14178 }, { "epoch": 2.5810503322107947, "grad_norm": 14.375, "learning_rate": 1.191443460205617e-06, "loss": 1.0235005617141724, "step": 14180 }, { "epoch": 2.581414398834987, "grad_norm": 18.25, "learning_rate": 1.1911164906623415e-06, "loss": 0.6101502180099487, "step": 14182 }, { "epoch": 2.581778465459179, "grad_norm": 34.25, "learning_rate": 1.1907897865653897e-06, "loss": 1.621131181716919, "step": 14184 }, { "epoch": 2.5821425320833713, "grad_norm": 32.75, "learning_rate": 1.190463347962705e-06, "loss": 1.3554693460464478, "step": 14186 }, { "epoch": 2.5825065987075635, "grad_norm": 17.375, "learning_rate": 1.1901371749021905e-06, "loss": 1.155472993850708, "step": 14188 }, { "epoch": 2.5828706653317557, "grad_norm": 9.5, "learning_rate": 1.1898112674317106e-06, "loss": 1.6012673377990723, "step": 14190 }, { "epoch": 2.583234731955948, "grad_norm": 11.0, "learning_rate": 1.1894856255990914e-06, "loss": 1.8098478317260742, "step": 14192 }, { "epoch": 2.58359879858014, "grad_norm": 5.34375, "learning_rate": 1.1891602494521192e-06, "loss": 1.045157551765442, "step": 14194 }, { "epoch": 2.5839628652043323, "grad_norm": 37.25, "learning_rate": 1.1888351390385417e-06, "loss": 1.7236558198928833, "step": 14196 }, { "epoch": 2.5843269318285245, "grad_norm": 12.125, "learning_rate": 1.1885102944060676e-06, "loss": 1.464908480644226, "step": 14198 }, { "epoch": 2.584690998452717, "grad_norm": 41.25, "learning_rate": 1.1881857156023665e-06, "loss": 1.8662400245666504, "step": 14200 }, { "epoch": 2.585055065076909, "grad_norm": 14.75, "learning_rate": 1.1878614026750688e-06, "loss": 1.4441614151000977, "step": 14202 }, { "epoch": 2.5854191317011015, "grad_norm": 6.625, "learning_rate": 1.1875373556717665e-06, "loss": 1.3337925672531128, "step": 14204 }, { "epoch": 2.5857831983252937, "grad_norm": 15.875, "learning_rate": 1.187213574640012e-06, "loss": 1.5422848463058472, "step": 14206 }, { "epoch": 2.586147264949486, "grad_norm": 9.5, "learning_rate": 1.1868900596273186e-06, "loss": 1.3169329166412354, "step": 14208 }, { "epoch": 2.586511331573678, "grad_norm": 13.5, "learning_rate": 1.1865668106811611e-06, "loss": 1.3868608474731445, "step": 14210 }, { "epoch": 2.5868753981978703, "grad_norm": 13.0, "learning_rate": 1.186243827848975e-06, "loss": 1.5084577798843384, "step": 14212 }, { "epoch": 2.5872394648220625, "grad_norm": 12.125, "learning_rate": 1.1859211111781568e-06, "loss": 1.442775845527649, "step": 14214 }, { "epoch": 2.5876035314462547, "grad_norm": 20.75, "learning_rate": 1.1855986607160636e-06, "loss": 1.5960636138916016, "step": 14216 }, { "epoch": 2.587967598070447, "grad_norm": 18.5, "learning_rate": 1.185276476510014e-06, "loss": 1.8336906433105469, "step": 14218 }, { "epoch": 2.588331664694639, "grad_norm": 15.1875, "learning_rate": 1.1849545586072866e-06, "loss": 1.584119439125061, "step": 14220 }, { "epoch": 2.5886957313188312, "grad_norm": 11.0, "learning_rate": 1.184632907055122e-06, "loss": 1.3604992628097534, "step": 14222 }, { "epoch": 2.5890597979430234, "grad_norm": 6.25, "learning_rate": 1.1843115219007217e-06, "loss": 1.135657548904419, "step": 14224 }, { "epoch": 2.589423864567216, "grad_norm": 36.25, "learning_rate": 1.1839904031912472e-06, "loss": 1.3711371421813965, "step": 14226 }, { "epoch": 2.589787931191408, "grad_norm": 6.5625, "learning_rate": 1.1836695509738211e-06, "loss": 1.2336375713348389, "step": 14228 }, { "epoch": 2.5901519978156005, "grad_norm": 16.875, "learning_rate": 1.1833489652955275e-06, "loss": 1.243660807609558, "step": 14230 }, { "epoch": 2.590516064439792, "grad_norm": 17.5, "learning_rate": 1.1830286462034112e-06, "loss": 0.7745146751403809, "step": 14232 }, { "epoch": 2.590880131063985, "grad_norm": 8.0, "learning_rate": 1.182708593744477e-06, "loss": 1.038374423980713, "step": 14234 }, { "epoch": 2.591244197688177, "grad_norm": 9.0, "learning_rate": 1.1823888079656926e-06, "loss": 1.5681627988815308, "step": 14236 }, { "epoch": 2.5916082643123692, "grad_norm": 13.0625, "learning_rate": 1.1820692889139838e-06, "loss": 1.5277447700500488, "step": 14238 }, { "epoch": 2.5919723309365614, "grad_norm": 19.875, "learning_rate": 1.1817500366362398e-06, "loss": 1.6709702014923096, "step": 14240 }, { "epoch": 2.5923363975607536, "grad_norm": 15.8125, "learning_rate": 1.1814310511793092e-06, "loss": 1.6598896980285645, "step": 14242 }, { "epoch": 2.592700464184946, "grad_norm": 20.0, "learning_rate": 1.1811123325900017e-06, "loss": 1.7732157707214355, "step": 14244 }, { "epoch": 2.593064530809138, "grad_norm": 12.6875, "learning_rate": 1.1807938809150883e-06, "loss": 1.7412445545196533, "step": 14246 }, { "epoch": 2.59342859743333, "grad_norm": 12.1875, "learning_rate": 1.1804756962013e-06, "loss": 1.464808702468872, "step": 14248 }, { "epoch": 2.5937926640575224, "grad_norm": 13.5625, "learning_rate": 1.1801577784953295e-06, "loss": 1.4241043329238892, "step": 14250 }, { "epoch": 2.5941567306817146, "grad_norm": 14.5, "learning_rate": 1.1798401278438298e-06, "loss": 1.095261812210083, "step": 14252 }, { "epoch": 2.594520797305907, "grad_norm": 13.25, "learning_rate": 1.179522744293415e-06, "loss": 0.671078085899353, "step": 14254 }, { "epoch": 2.5948848639300994, "grad_norm": 8.625, "learning_rate": 1.1792056278906594e-06, "loss": 1.6016740798950195, "step": 14256 }, { "epoch": 2.595248930554291, "grad_norm": 13.875, "learning_rate": 1.1788887786820993e-06, "loss": 1.1386425495147705, "step": 14258 }, { "epoch": 2.595612997178484, "grad_norm": 13.875, "learning_rate": 1.1785721967142304e-06, "loss": 1.4968576431274414, "step": 14260 }, { "epoch": 2.595977063802676, "grad_norm": 34.0, "learning_rate": 1.17825588203351e-06, "loss": 1.8564518690109253, "step": 14262 }, { "epoch": 2.596341130426868, "grad_norm": 6.21875, "learning_rate": 1.177939834686356e-06, "loss": 0.9061320424079895, "step": 14264 }, { "epoch": 2.5967051970510604, "grad_norm": 6.03125, "learning_rate": 1.1776240547191475e-06, "loss": 1.1914856433868408, "step": 14266 }, { "epoch": 2.5970692636752526, "grad_norm": 12.125, "learning_rate": 1.177308542178223e-06, "loss": 1.5928727388381958, "step": 14268 }, { "epoch": 2.597433330299445, "grad_norm": 19.0, "learning_rate": 1.1769932971098835e-06, "loss": 1.5279734134674072, "step": 14270 }, { "epoch": 2.597797396923637, "grad_norm": 18.625, "learning_rate": 1.1766783195603895e-06, "loss": 0.8941159248352051, "step": 14272 }, { "epoch": 2.598161463547829, "grad_norm": 16.875, "learning_rate": 1.1763636095759628e-06, "loss": 1.8265458345413208, "step": 14274 }, { "epoch": 2.5985255301720214, "grad_norm": 7.59375, "learning_rate": 1.176049167202786e-06, "loss": 1.4179325103759766, "step": 14276 }, { "epoch": 2.5988895967962136, "grad_norm": 51.0, "learning_rate": 1.1757349924870021e-06, "loss": 1.2888375520706177, "step": 14278 }, { "epoch": 2.5992536634204058, "grad_norm": 40.75, "learning_rate": 1.175421085474715e-06, "loss": 1.3022136688232422, "step": 14280 }, { "epoch": 2.5996177300445984, "grad_norm": 8.625, "learning_rate": 1.175107446211989e-06, "loss": 1.5855441093444824, "step": 14282 }, { "epoch": 2.59998179666879, "grad_norm": 6.3125, "learning_rate": 1.1747940747448497e-06, "loss": 1.3258237838745117, "step": 14284 }, { "epoch": 2.600345863292983, "grad_norm": 35.0, "learning_rate": 1.174480971119283e-06, "loss": 1.7382245063781738, "step": 14286 }, { "epoch": 2.600709929917175, "grad_norm": 15.3125, "learning_rate": 1.1741681353812358e-06, "loss": 1.469666600227356, "step": 14288 }, { "epoch": 2.601073996541367, "grad_norm": 8.0625, "learning_rate": 1.173855567576615e-06, "loss": 1.1094344854354858, "step": 14290 }, { "epoch": 2.6014380631655594, "grad_norm": 7.71875, "learning_rate": 1.173543267751289e-06, "loss": 1.4580005407333374, "step": 14292 }, { "epoch": 2.6018021297897516, "grad_norm": 10.625, "learning_rate": 1.1732312359510867e-06, "loss": 1.124633550643921, "step": 14294 }, { "epoch": 2.6021661964139438, "grad_norm": 9.625, "learning_rate": 1.1729194722217972e-06, "loss": 1.5115344524383545, "step": 14296 }, { "epoch": 2.602530263038136, "grad_norm": 10.5, "learning_rate": 1.1726079766091706e-06, "loss": 1.4737446308135986, "step": 14298 }, { "epoch": 2.602894329662328, "grad_norm": 24.0, "learning_rate": 1.1722967491589175e-06, "loss": 1.6549428701400757, "step": 14300 }, { "epoch": 2.6032583962865203, "grad_norm": 11.5, "learning_rate": 1.1719857899167096e-06, "loss": 1.7363176345825195, "step": 14302 }, { "epoch": 2.6036224629107125, "grad_norm": 3.640625, "learning_rate": 1.1716750989281787e-06, "loss": 0.9746482372283936, "step": 14304 }, { "epoch": 2.6039865295349047, "grad_norm": 3.796875, "learning_rate": 1.1713646762389174e-06, "loss": 0.9989664554595947, "step": 14306 }, { "epoch": 2.6043505961590974, "grad_norm": 11.125, "learning_rate": 1.1710545218944795e-06, "loss": 1.170009732246399, "step": 14308 }, { "epoch": 2.604714662783289, "grad_norm": 23.25, "learning_rate": 1.170744635940378e-06, "loss": 1.514284372329712, "step": 14310 }, { "epoch": 2.6050787294074818, "grad_norm": 19.625, "learning_rate": 1.170435018422088e-06, "loss": 1.518493413925171, "step": 14312 }, { "epoch": 2.605442796031674, "grad_norm": 18.125, "learning_rate": 1.1701256693850443e-06, "loss": 1.315392017364502, "step": 14314 }, { "epoch": 2.605806862655866, "grad_norm": 26.625, "learning_rate": 1.1698165888746427e-06, "loss": 1.8009157180786133, "step": 14316 }, { "epoch": 2.6061709292800583, "grad_norm": 33.0, "learning_rate": 1.1695077769362397e-06, "loss": 1.5634257793426514, "step": 14318 }, { "epoch": 2.6065349959042505, "grad_norm": 40.0, "learning_rate": 1.1691992336151524e-06, "loss": 2.0482845306396484, "step": 14320 }, { "epoch": 2.6068990625284427, "grad_norm": 16.5, "learning_rate": 1.1688909589566578e-06, "loss": 1.777584433555603, "step": 14322 }, { "epoch": 2.607263129152635, "grad_norm": 17.25, "learning_rate": 1.168582953005994e-06, "loss": 1.4168450832366943, "step": 14324 }, { "epoch": 2.607627195776827, "grad_norm": 11.9375, "learning_rate": 1.1682752158083598e-06, "loss": 1.4832003116607666, "step": 14326 }, { "epoch": 2.6079912624010193, "grad_norm": 31.875, "learning_rate": 1.1679677474089146e-06, "loss": 1.8173999786376953, "step": 14328 }, { "epoch": 2.6083553290252115, "grad_norm": 11.875, "learning_rate": 1.1676605478527777e-06, "loss": 1.5967960357666016, "step": 14330 }, { "epoch": 2.6087193956494037, "grad_norm": 3.234375, "learning_rate": 1.1673536171850295e-06, "loss": 0.9845655560493469, "step": 14332 }, { "epoch": 2.6090834622735963, "grad_norm": 36.0, "learning_rate": 1.1670469554507109e-06, "loss": 0.9247639179229736, "step": 14334 }, { "epoch": 2.609447528897788, "grad_norm": 17.625, "learning_rate": 1.1667405626948234e-06, "loss": 0.6797451972961426, "step": 14336 }, { "epoch": 2.6098115955219807, "grad_norm": 11.4375, "learning_rate": 1.166434438962329e-06, "loss": 1.5167579650878906, "step": 14338 }, { "epoch": 2.610175662146173, "grad_norm": 22.125, "learning_rate": 1.1661285842981495e-06, "loss": 1.6723637580871582, "step": 14340 }, { "epoch": 2.610539728770365, "grad_norm": 11.4375, "learning_rate": 1.1658229987471683e-06, "loss": 1.4314948320388794, "step": 14342 }, { "epoch": 2.6109037953945573, "grad_norm": 8.4375, "learning_rate": 1.165517682354229e-06, "loss": 1.2646710872650146, "step": 14344 }, { "epoch": 2.6112678620187495, "grad_norm": 51.0, "learning_rate": 1.165212635164135e-06, "loss": 1.2827895879745483, "step": 14346 }, { "epoch": 2.6116319286429417, "grad_norm": 18.375, "learning_rate": 1.1649078572216512e-06, "loss": 1.4106487035751343, "step": 14348 }, { "epoch": 2.611995995267134, "grad_norm": 11.3125, "learning_rate": 1.1646033485715023e-06, "loss": 1.4193886518478394, "step": 14350 }, { "epoch": 2.612360061891326, "grad_norm": 9.3125, "learning_rate": 1.164299109258374e-06, "loss": 1.5058352947235107, "step": 14352 }, { "epoch": 2.6127241285155183, "grad_norm": 8.1875, "learning_rate": 1.1639951393269118e-06, "loss": 1.394639492034912, "step": 14354 }, { "epoch": 2.6130881951397105, "grad_norm": 9.875, "learning_rate": 1.1636914388217224e-06, "loss": 1.3609979152679443, "step": 14356 }, { "epoch": 2.6134522617639027, "grad_norm": 15.1875, "learning_rate": 1.1633880077873721e-06, "loss": 1.588275671005249, "step": 14358 }, { "epoch": 2.613816328388095, "grad_norm": 11.5625, "learning_rate": 1.1630848462683885e-06, "loss": 1.5709130764007568, "step": 14360 }, { "epoch": 2.614180395012287, "grad_norm": 21.625, "learning_rate": 1.1627819543092597e-06, "loss": 1.4891860485076904, "step": 14362 }, { "epoch": 2.6145444616364797, "grad_norm": 26.5, "learning_rate": 1.1624793319544335e-06, "loss": 1.5109132528305054, "step": 14364 }, { "epoch": 2.6149085282606714, "grad_norm": 5.4375, "learning_rate": 1.1621769792483182e-06, "loss": 0.9553173780441284, "step": 14366 }, { "epoch": 2.615272594884864, "grad_norm": 6.375, "learning_rate": 1.1618748962352833e-06, "loss": 1.087977647781372, "step": 14368 }, { "epoch": 2.6156366615090563, "grad_norm": 25.75, "learning_rate": 1.1615730829596583e-06, "loss": 1.0443356037139893, "step": 14370 }, { "epoch": 2.6160007281332485, "grad_norm": 54.75, "learning_rate": 1.1612715394657326e-06, "loss": 1.5442872047424316, "step": 14372 }, { "epoch": 2.6163647947574407, "grad_norm": 12.5, "learning_rate": 1.1609702657977568e-06, "loss": 1.707930564880371, "step": 14374 }, { "epoch": 2.616728861381633, "grad_norm": 11.75, "learning_rate": 1.1606692619999418e-06, "loss": 1.2924145460128784, "step": 14376 }, { "epoch": 2.617092928005825, "grad_norm": 17.5, "learning_rate": 1.1603685281164585e-06, "loss": 1.6354949474334717, "step": 14378 }, { "epoch": 2.6174569946300172, "grad_norm": 13.6875, "learning_rate": 1.160068064191438e-06, "loss": 1.9317681789398193, "step": 14380 }, { "epoch": 2.6178210612542094, "grad_norm": 16.75, "learning_rate": 1.1597678702689724e-06, "loss": 1.2470465898513794, "step": 14382 }, { "epoch": 2.6181851278784016, "grad_norm": 9.0625, "learning_rate": 1.1594679463931142e-06, "loss": 0.6711374521255493, "step": 14384 }, { "epoch": 2.618549194502594, "grad_norm": 14.5, "learning_rate": 1.1591682926078762e-06, "loss": 1.3027068376541138, "step": 14386 }, { "epoch": 2.618913261126786, "grad_norm": 12.0625, "learning_rate": 1.1588689089572306e-06, "loss": 1.6903302669525146, "step": 14388 }, { "epoch": 2.6192773277509787, "grad_norm": 11.8125, "learning_rate": 1.1585697954851113e-06, "loss": 0.8386199474334717, "step": 14390 }, { "epoch": 2.6196413943751704, "grad_norm": 6.53125, "learning_rate": 1.1582709522354117e-06, "loss": 1.4591509103775024, "step": 14392 }, { "epoch": 2.620005460999363, "grad_norm": 3.234375, "learning_rate": 1.157972379251986e-06, "loss": 0.9996927380561829, "step": 14394 }, { "epoch": 2.6203695276235552, "grad_norm": 13.25, "learning_rate": 1.1576740765786488e-06, "loss": 1.0451319217681885, "step": 14396 }, { "epoch": 2.6207335942477474, "grad_norm": 6.625, "learning_rate": 1.1573760442591741e-06, "loss": 1.190625786781311, "step": 14398 }, { "epoch": 2.6210976608719396, "grad_norm": 7.65625, "learning_rate": 1.157078282337298e-06, "loss": 1.2719480991363525, "step": 14400 }, { "epoch": 2.621461727496132, "grad_norm": 12.625, "learning_rate": 1.1567807908567148e-06, "loss": 1.1912826299667358, "step": 14402 }, { "epoch": 2.621825794120324, "grad_norm": 7.9375, "learning_rate": 1.1564835698610808e-06, "loss": 1.4906561374664307, "step": 14404 }, { "epoch": 2.622189860744516, "grad_norm": 4.78125, "learning_rate": 1.1561866193940119e-06, "loss": 1.1690740585327148, "step": 14406 }, { "epoch": 2.6225539273687084, "grad_norm": 3.84375, "learning_rate": 1.1558899394990841e-06, "loss": 0.8879704475402832, "step": 14408 }, { "epoch": 2.6229179939929006, "grad_norm": 6.46875, "learning_rate": 1.1555935302198343e-06, "loss": 1.193251609802246, "step": 14410 }, { "epoch": 2.623282060617093, "grad_norm": 17.625, "learning_rate": 1.1552973915997592e-06, "loss": 1.4749467372894287, "step": 14412 }, { "epoch": 2.623646127241285, "grad_norm": 7.3125, "learning_rate": 1.1550015236823158e-06, "loss": 1.5314549207687378, "step": 14414 }, { "epoch": 2.6240101938654776, "grad_norm": 16.75, "learning_rate": 1.154705926510922e-06, "loss": 1.4068033695220947, "step": 14416 }, { "epoch": 2.6243742604896694, "grad_norm": 7.46875, "learning_rate": 1.1544106001289549e-06, "loss": 1.1362931728363037, "step": 14418 }, { "epoch": 2.624738327113862, "grad_norm": 9.375, "learning_rate": 1.1541155445797527e-06, "loss": 0.8063347339630127, "step": 14420 }, { "epoch": 2.625102393738054, "grad_norm": 6.875, "learning_rate": 1.1538207599066138e-06, "loss": 1.5680792331695557, "step": 14422 }, { "epoch": 2.6254664603622464, "grad_norm": 15.5625, "learning_rate": 1.1535262461527962e-06, "loss": 1.4349617958068848, "step": 14424 }, { "epoch": 2.6258305269864386, "grad_norm": 23.375, "learning_rate": 1.1532320033615191e-06, "loss": 1.9161278009414673, "step": 14426 }, { "epoch": 2.626194593610631, "grad_norm": 14.4375, "learning_rate": 1.152938031575961e-06, "loss": 1.3714648485183716, "step": 14428 }, { "epoch": 2.626558660234823, "grad_norm": 26.5, "learning_rate": 1.1526443308392615e-06, "loss": 1.9736828804016113, "step": 14430 }, { "epoch": 2.626922726859015, "grad_norm": 17.875, "learning_rate": 1.15235090119452e-06, "loss": 1.324831247329712, "step": 14432 }, { "epoch": 2.6272867934832074, "grad_norm": 10.4375, "learning_rate": 1.1520577426847952e-06, "loss": 1.5042481422424316, "step": 14434 }, { "epoch": 2.6276508601073996, "grad_norm": 21.25, "learning_rate": 1.151764855353108e-06, "loss": 1.3443495035171509, "step": 14436 }, { "epoch": 2.6280149267315918, "grad_norm": 5.90625, "learning_rate": 1.151472239242438e-06, "loss": 0.8775898814201355, "step": 14438 }, { "epoch": 2.628378993355784, "grad_norm": 8.8125, "learning_rate": 1.1511798943957255e-06, "loss": 1.1691288948059082, "step": 14440 }, { "epoch": 2.6287430599799766, "grad_norm": 10.125, "learning_rate": 1.150887820855871e-06, "loss": 1.4335007667541504, "step": 14442 }, { "epoch": 2.6291071266041683, "grad_norm": 13.6875, "learning_rate": 1.1505960186657349e-06, "loss": 1.2584631443023682, "step": 14444 }, { "epoch": 2.629471193228361, "grad_norm": 10.375, "learning_rate": 1.150304487868138e-06, "loss": 1.5659459829330444, "step": 14446 }, { "epoch": 2.629835259852553, "grad_norm": 9.9375, "learning_rate": 1.1500132285058613e-06, "loss": 1.5534355640411377, "step": 14448 }, { "epoch": 2.6301993264767454, "grad_norm": 13.0625, "learning_rate": 1.1497222406216463e-06, "loss": 1.129992127418518, "step": 14450 }, { "epoch": 2.6305633931009376, "grad_norm": 49.25, "learning_rate": 1.1494315242581936e-06, "loss": 1.502547025680542, "step": 14452 }, { "epoch": 2.6309274597251298, "grad_norm": 7.78125, "learning_rate": 1.149141079458165e-06, "loss": 1.3004871606826782, "step": 14454 }, { "epoch": 2.631291526349322, "grad_norm": 3.671875, "learning_rate": 1.1488509062641825e-06, "loss": 1.029909610748291, "step": 14456 }, { "epoch": 2.631655592973514, "grad_norm": 7.3125, "learning_rate": 1.148561004718827e-06, "loss": 1.453866720199585, "step": 14458 }, { "epoch": 2.6320196595977063, "grad_norm": 13.875, "learning_rate": 1.148271374864641e-06, "loss": 1.7987303733825684, "step": 14460 }, { "epoch": 2.6323837262218985, "grad_norm": 13.9375, "learning_rate": 1.1479820167441265e-06, "loss": 1.6310635805130005, "step": 14462 }, { "epoch": 2.6327477928460907, "grad_norm": 11.75, "learning_rate": 1.1476929303997454e-06, "loss": 1.784327745437622, "step": 14464 }, { "epoch": 2.633111859470283, "grad_norm": 11.75, "learning_rate": 1.1474041158739201e-06, "loss": 1.5094164609909058, "step": 14466 }, { "epoch": 2.6334759260944756, "grad_norm": 19.0, "learning_rate": 1.1471155732090325e-06, "loss": 1.506624460220337, "step": 14468 }, { "epoch": 2.6338399927186673, "grad_norm": 7.71875, "learning_rate": 1.1468273024474258e-06, "loss": 1.3636056184768677, "step": 14470 }, { "epoch": 2.63420405934286, "grad_norm": 5.25, "learning_rate": 1.1465393036314022e-06, "loss": 1.4712549448013306, "step": 14472 }, { "epoch": 2.6345681259670517, "grad_norm": 7.1875, "learning_rate": 1.146251576803224e-06, "loss": 0.996986448764801, "step": 14474 }, { "epoch": 2.6349321925912443, "grad_norm": 120.5, "learning_rate": 1.1459641220051148e-06, "loss": 1.3135651350021362, "step": 14476 }, { "epoch": 2.6352962592154365, "grad_norm": 9.6875, "learning_rate": 1.1456769392792568e-06, "loss": 0.8766427636146545, "step": 14478 }, { "epoch": 2.6356603258396287, "grad_norm": 9.875, "learning_rate": 1.145390028667793e-06, "loss": 1.4558296203613281, "step": 14480 }, { "epoch": 2.636024392463821, "grad_norm": 3.984375, "learning_rate": 1.1451033902128264e-06, "loss": 1.076955795288086, "step": 14482 }, { "epoch": 2.636388459088013, "grad_norm": 18.5, "learning_rate": 1.1448170239564201e-06, "loss": 1.2234370708465576, "step": 14484 }, { "epoch": 2.6367525257122053, "grad_norm": 16.25, "learning_rate": 1.1445309299405971e-06, "loss": 1.508989691734314, "step": 14486 }, { "epoch": 2.6371165923363975, "grad_norm": 20.625, "learning_rate": 1.1442451082073405e-06, "loss": 1.3836790323257446, "step": 14488 }, { "epoch": 2.6374806589605897, "grad_norm": 13.8125, "learning_rate": 1.1439595587985937e-06, "loss": 1.1410776376724243, "step": 14490 }, { "epoch": 2.637844725584782, "grad_norm": 5.125, "learning_rate": 1.1436742817562595e-06, "loss": 1.3450950384140015, "step": 14492 }, { "epoch": 2.638208792208974, "grad_norm": 12.8125, "learning_rate": 1.1433892771222018e-06, "loss": 1.4257886409759521, "step": 14494 }, { "epoch": 2.6385728588331663, "grad_norm": 12.75, "learning_rate": 1.1431045449382432e-06, "loss": 1.494485855102539, "step": 14496 }, { "epoch": 2.638936925457359, "grad_norm": 7.46875, "learning_rate": 1.1428200852461675e-06, "loss": 1.257080316543579, "step": 14498 }, { "epoch": 2.6393009920815507, "grad_norm": 9.125, "learning_rate": 1.1425358980877177e-06, "loss": 1.38213312625885, "step": 14500 }, { "epoch": 2.6396650587057433, "grad_norm": 15.8125, "learning_rate": 1.142251983504597e-06, "loss": 1.309072732925415, "step": 14502 }, { "epoch": 2.6400291253299355, "grad_norm": 9.75, "learning_rate": 1.1419683415384693e-06, "loss": 1.4315125942230225, "step": 14504 }, { "epoch": 2.6403931919541277, "grad_norm": 20.5, "learning_rate": 1.1416849722309574e-06, "loss": 1.6254199743270874, "step": 14506 }, { "epoch": 2.64075725857832, "grad_norm": 16.0, "learning_rate": 1.1414018756236446e-06, "loss": 1.2900720834732056, "step": 14508 }, { "epoch": 2.641121325202512, "grad_norm": 38.25, "learning_rate": 1.1411190517580745e-06, "loss": 1.8353610038757324, "step": 14510 }, { "epoch": 2.6414853918267043, "grad_norm": 16.0, "learning_rate": 1.1408365006757504e-06, "loss": 1.5915122032165527, "step": 14512 }, { "epoch": 2.6418494584508965, "grad_norm": 8.625, "learning_rate": 1.140554222418135e-06, "loss": 1.2189075946807861, "step": 14514 }, { "epoch": 2.6422135250750887, "grad_norm": 5.78125, "learning_rate": 1.1402722170266517e-06, "loss": 1.345229148864746, "step": 14516 }, { "epoch": 2.642577591699281, "grad_norm": 9.125, "learning_rate": 1.1399904845426837e-06, "loss": 1.4504570960998535, "step": 14518 }, { "epoch": 2.642941658323473, "grad_norm": 10.25, "learning_rate": 1.1397090250075743e-06, "loss": 1.26963210105896, "step": 14520 }, { "epoch": 2.6433057249476652, "grad_norm": 16.25, "learning_rate": 1.1394278384626263e-06, "loss": 1.1671103239059448, "step": 14522 }, { "epoch": 2.643669791571858, "grad_norm": 13.8125, "learning_rate": 1.1391469249491027e-06, "loss": 1.461940884590149, "step": 14524 }, { "epoch": 2.6440338581960496, "grad_norm": 16.5, "learning_rate": 1.1388662845082267e-06, "loss": 1.5407819747924805, "step": 14526 }, { "epoch": 2.6443979248202423, "grad_norm": 6.25, "learning_rate": 1.1385859171811806e-06, "loss": 1.3907628059387207, "step": 14528 }, { "epoch": 2.6447619914444345, "grad_norm": 10.75, "learning_rate": 1.1383058230091077e-06, "loss": 1.573671579360962, "step": 14530 }, { "epoch": 2.6451260580686267, "grad_norm": 13.4375, "learning_rate": 1.1380260020331108e-06, "loss": 1.3575526475906372, "step": 14532 }, { "epoch": 2.645490124692819, "grad_norm": 9.125, "learning_rate": 1.1377464542942517e-06, "loss": 1.273698329925537, "step": 14534 }, { "epoch": 2.645854191317011, "grad_norm": 9.0625, "learning_rate": 1.1374671798335534e-06, "loss": 1.4671393632888794, "step": 14536 }, { "epoch": 2.6462182579412032, "grad_norm": 12.5, "learning_rate": 1.1371881786919986e-06, "loss": 1.550858736038208, "step": 14538 }, { "epoch": 2.6465823245653954, "grad_norm": 11.6875, "learning_rate": 1.1369094509105293e-06, "loss": 1.6266849040985107, "step": 14540 }, { "epoch": 2.6469463911895876, "grad_norm": 5.1875, "learning_rate": 1.1366309965300474e-06, "loss": 1.3505311012268066, "step": 14542 }, { "epoch": 2.64731045781378, "grad_norm": 9.0, "learning_rate": 1.136352815591415e-06, "loss": 1.3523355722427368, "step": 14544 }, { "epoch": 2.647674524437972, "grad_norm": 14.8125, "learning_rate": 1.1360749081354546e-06, "loss": 1.112898588180542, "step": 14546 }, { "epoch": 2.648038591062164, "grad_norm": 19.0, "learning_rate": 1.1357972742029472e-06, "loss": 1.0763049125671387, "step": 14548 }, { "epoch": 2.648402657686357, "grad_norm": 7.375, "learning_rate": 1.135519913834635e-06, "loss": 0.8174800872802734, "step": 14550 }, { "epoch": 2.6487667243105486, "grad_norm": 6.0, "learning_rate": 1.1352428270712198e-06, "loss": 1.0621155500411987, "step": 14552 }, { "epoch": 2.6491307909347412, "grad_norm": 23.625, "learning_rate": 1.134966013953362e-06, "loss": 1.335787296295166, "step": 14554 }, { "epoch": 2.6494948575589334, "grad_norm": 16.875, "learning_rate": 1.1346894745216836e-06, "loss": 1.3539605140686035, "step": 14556 }, { "epoch": 2.6498589241831256, "grad_norm": 9.875, "learning_rate": 1.1344132088167652e-06, "loss": 1.3322436809539795, "step": 14558 }, { "epoch": 2.650222990807318, "grad_norm": 19.375, "learning_rate": 1.1341372168791482e-06, "loss": 1.0502952337265015, "step": 14560 }, { "epoch": 2.65058705743151, "grad_norm": 7.21875, "learning_rate": 1.1338614987493323e-06, "loss": 0.9821246266365051, "step": 14562 }, { "epoch": 2.650951124055702, "grad_norm": 19.125, "learning_rate": 1.133586054467779e-06, "loss": 1.519580364227295, "step": 14564 }, { "epoch": 2.6513151906798944, "grad_norm": 11.1875, "learning_rate": 1.1333108840749085e-06, "loss": 1.449804663658142, "step": 14566 }, { "epoch": 2.6516792573040866, "grad_norm": 15.75, "learning_rate": 1.1330359876111006e-06, "loss": 1.5614076852798462, "step": 14568 }, { "epoch": 2.652043323928279, "grad_norm": 21.375, "learning_rate": 1.132761365116695e-06, "loss": 1.7726774215698242, "step": 14570 }, { "epoch": 2.652407390552471, "grad_norm": 10.9375, "learning_rate": 1.1324870166319923e-06, "loss": 1.3080321550369263, "step": 14572 }, { "epoch": 2.652771457176663, "grad_norm": 13.3125, "learning_rate": 1.1322129421972514e-06, "loss": 1.405094861984253, "step": 14574 }, { "epoch": 2.653135523800856, "grad_norm": 49.0, "learning_rate": 1.1319391418526917e-06, "loss": 1.595885157585144, "step": 14576 }, { "epoch": 2.6534995904250476, "grad_norm": 13.375, "learning_rate": 1.1316656156384926e-06, "loss": 1.574186086654663, "step": 14578 }, { "epoch": 2.65386365704924, "grad_norm": 13.625, "learning_rate": 1.131392363594792e-06, "loss": 1.7120325565338135, "step": 14580 }, { "epoch": 2.6542277236734324, "grad_norm": 57.75, "learning_rate": 1.1311193857616901e-06, "loss": 1.4646236896514893, "step": 14582 }, { "epoch": 2.6545917902976246, "grad_norm": 6.03125, "learning_rate": 1.130846682179244e-06, "loss": 1.0188984870910645, "step": 14584 }, { "epoch": 2.654955856921817, "grad_norm": 73.5, "learning_rate": 1.1305742528874724e-06, "loss": 1.4415225982666016, "step": 14586 }, { "epoch": 2.655319923546009, "grad_norm": 7.84375, "learning_rate": 1.130302097926353e-06, "loss": 1.1592278480529785, "step": 14588 }, { "epoch": 2.655683990170201, "grad_norm": 8.6875, "learning_rate": 1.1300302173358232e-06, "loss": 1.4418576955795288, "step": 14590 }, { "epoch": 2.6560480567943934, "grad_norm": 10.1875, "learning_rate": 1.1297586111557813e-06, "loss": 1.396946907043457, "step": 14592 }, { "epoch": 2.6564121234185856, "grad_norm": 30.25, "learning_rate": 1.1294872794260835e-06, "loss": 1.6139272451400757, "step": 14594 }, { "epoch": 2.6567761900427778, "grad_norm": 25.125, "learning_rate": 1.1292162221865468e-06, "loss": 1.820549488067627, "step": 14596 }, { "epoch": 2.65714025666697, "grad_norm": 7.96875, "learning_rate": 1.128945439476948e-06, "loss": 1.3808941841125488, "step": 14598 }, { "epoch": 2.657504323291162, "grad_norm": 3.703125, "learning_rate": 1.1286749313370229e-06, "loss": 1.07881760597229, "step": 14600 }, { "epoch": 2.6578683899153543, "grad_norm": 5.625, "learning_rate": 1.1284046978064684e-06, "loss": 0.8214249014854431, "step": 14602 }, { "epoch": 2.6582324565395465, "grad_norm": 10.0625, "learning_rate": 1.1281347389249393e-06, "loss": 1.4165815114974976, "step": 14604 }, { "epoch": 2.658596523163739, "grad_norm": 16.375, "learning_rate": 1.1278650547320513e-06, "loss": 1.6547558307647705, "step": 14606 }, { "epoch": 2.658960589787931, "grad_norm": 22.0, "learning_rate": 1.1275956452673793e-06, "loss": 1.5943236351013184, "step": 14608 }, { "epoch": 2.6593246564121236, "grad_norm": 8.6875, "learning_rate": 1.1273265105704579e-06, "loss": 0.21635861694812775, "step": 14610 }, { "epoch": 2.6596887230363158, "grad_norm": 23.75, "learning_rate": 1.1270576506807825e-06, "loss": 1.3442341089248657, "step": 14612 }, { "epoch": 2.660052789660508, "grad_norm": 19.625, "learning_rate": 1.126789065637806e-06, "loss": 1.7877488136291504, "step": 14614 }, { "epoch": 2.6604168562847, "grad_norm": 18.375, "learning_rate": 1.1265207554809431e-06, "loss": 1.4619274139404297, "step": 14616 }, { "epoch": 2.6607809229088923, "grad_norm": 10.125, "learning_rate": 1.1262527202495663e-06, "loss": 1.626192331314087, "step": 14618 }, { "epoch": 2.6611449895330845, "grad_norm": 10.0, "learning_rate": 1.1259849599830091e-06, "loss": 1.585963249206543, "step": 14620 }, { "epoch": 2.6615090561572767, "grad_norm": 14.0, "learning_rate": 1.1257174747205645e-06, "loss": 1.6999564170837402, "step": 14622 }, { "epoch": 2.661873122781469, "grad_norm": 12.5, "learning_rate": 1.1254502645014849e-06, "loss": 1.8844951391220093, "step": 14624 }, { "epoch": 2.662237189405661, "grad_norm": 9.9375, "learning_rate": 1.1251833293649819e-06, "loss": 1.475205421447754, "step": 14626 }, { "epoch": 2.6626012560298533, "grad_norm": 12.1875, "learning_rate": 1.1249166693502274e-06, "loss": 1.9800291061401367, "step": 14628 }, { "epoch": 2.6629653226540455, "grad_norm": 19.375, "learning_rate": 1.1246502844963523e-06, "loss": 1.4416791200637817, "step": 14630 }, { "epoch": 2.663329389278238, "grad_norm": 15.125, "learning_rate": 1.1243841748424486e-06, "loss": 1.2385917901992798, "step": 14632 }, { "epoch": 2.66369345590243, "grad_norm": 11.25, "learning_rate": 1.1241183404275653e-06, "loss": 0.8826155662536621, "step": 14634 }, { "epoch": 2.6640575225266225, "grad_norm": 25.75, "learning_rate": 1.1238527812907136e-06, "loss": 1.5413708686828613, "step": 14636 }, { "epoch": 2.6644215891508147, "grad_norm": 12.125, "learning_rate": 1.1235874974708626e-06, "loss": 2.031757116317749, "step": 14638 }, { "epoch": 2.664785655775007, "grad_norm": 10.4375, "learning_rate": 1.123322489006942e-06, "loss": 1.492416501045227, "step": 14640 }, { "epoch": 2.665149722399199, "grad_norm": 8.4375, "learning_rate": 1.123057755937841e-06, "loss": 1.468606948852539, "step": 14642 }, { "epoch": 2.6655137890233913, "grad_norm": 12.1875, "learning_rate": 1.1227932983024076e-06, "loss": 1.4281564950942993, "step": 14644 }, { "epoch": 2.6658778556475835, "grad_norm": 14.5625, "learning_rate": 1.1225291161394498e-06, "loss": 1.9869191646575928, "step": 14646 }, { "epoch": 2.6662419222717757, "grad_norm": 9.8125, "learning_rate": 1.1222652094877357e-06, "loss": 1.6500223875045776, "step": 14648 }, { "epoch": 2.666605988895968, "grad_norm": 3.296875, "learning_rate": 1.1220015783859917e-06, "loss": 0.9932403564453125, "step": 14650 }, { "epoch": 2.66697005552016, "grad_norm": 8.75, "learning_rate": 1.121738222872906e-06, "loss": 1.2444219589233398, "step": 14652 }, { "epoch": 2.6673341221443523, "grad_norm": 8.625, "learning_rate": 1.121475142987124e-06, "loss": 1.322481393814087, "step": 14654 }, { "epoch": 2.6676981887685445, "grad_norm": 12.9375, "learning_rate": 1.1212123387672516e-06, "loss": 1.8623713254928589, "step": 14656 }, { "epoch": 2.668062255392737, "grad_norm": 83.5, "learning_rate": 1.1209498102518544e-06, "loss": 1.9126713275909424, "step": 14658 }, { "epoch": 2.668426322016929, "grad_norm": 18.625, "learning_rate": 1.1206875574794573e-06, "loss": 1.214752197265625, "step": 14660 }, { "epoch": 2.6687903886411215, "grad_norm": 13.625, "learning_rate": 1.1204255804885455e-06, "loss": 1.4213049411773682, "step": 14662 }, { "epoch": 2.6691544552653137, "grad_norm": 23.5, "learning_rate": 1.1201638793175618e-06, "loss": 1.2736343145370483, "step": 14664 }, { "epoch": 2.669518521889506, "grad_norm": 12.875, "learning_rate": 1.1199024540049108e-06, "loss": 1.472684621810913, "step": 14666 }, { "epoch": 2.669882588513698, "grad_norm": 9.9375, "learning_rate": 1.1196413045889553e-06, "loss": 1.4619131088256836, "step": 14668 }, { "epoch": 2.6702466551378903, "grad_norm": 6.96875, "learning_rate": 1.119380431108018e-06, "loss": 1.5982983112335205, "step": 14670 }, { "epoch": 2.6706107217620825, "grad_norm": 14.625, "learning_rate": 1.119119833600381e-06, "loss": 1.685892105102539, "step": 14672 }, { "epoch": 2.6709747883862747, "grad_norm": 13.8125, "learning_rate": 1.118859512104286e-06, "loss": 2.2336182594299316, "step": 14674 }, { "epoch": 2.671338855010467, "grad_norm": 12.3125, "learning_rate": 1.1185994666579336e-06, "loss": 1.7420251369476318, "step": 14676 }, { "epoch": 2.671702921634659, "grad_norm": 24.625, "learning_rate": 1.1183396972994853e-06, "loss": 1.7761002779006958, "step": 14678 }, { "epoch": 2.6720669882588513, "grad_norm": 9.3125, "learning_rate": 1.1180802040670601e-06, "loss": 0.8746968507766724, "step": 14680 }, { "epoch": 2.6724310548830434, "grad_norm": 10.0, "learning_rate": 1.1178209869987387e-06, "loss": 1.4348918199539185, "step": 14682 }, { "epoch": 2.672795121507236, "grad_norm": 9.3125, "learning_rate": 1.1175620461325595e-06, "loss": 0.6703236103057861, "step": 14684 }, { "epoch": 2.673159188131428, "grad_norm": 14.5625, "learning_rate": 1.117303381506521e-06, "loss": 1.5436615943908691, "step": 14686 }, { "epoch": 2.6735232547556205, "grad_norm": 17.375, "learning_rate": 1.1170449931585816e-06, "loss": 1.6233363151550293, "step": 14688 }, { "epoch": 2.6738873213798127, "grad_norm": 22.875, "learning_rate": 1.1167868811266583e-06, "loss": 1.157469630241394, "step": 14690 }, { "epoch": 2.674251388004005, "grad_norm": 37.5, "learning_rate": 1.1165290454486287e-06, "loss": 0.5947645306587219, "step": 14692 }, { "epoch": 2.674615454628197, "grad_norm": 16.75, "learning_rate": 1.116271486162328e-06, "loss": 1.547325849533081, "step": 14694 }, { "epoch": 2.6749795212523892, "grad_norm": 8.9375, "learning_rate": 1.116014203305553e-06, "loss": 1.7868096828460693, "step": 14696 }, { "epoch": 2.6753435878765814, "grad_norm": 10.125, "learning_rate": 1.1157571969160586e-06, "loss": 1.553477168083191, "step": 14698 }, { "epoch": 2.6757076545007736, "grad_norm": 10.75, "learning_rate": 1.1155004670315588e-06, "loss": 1.070258617401123, "step": 14700 }, { "epoch": 2.676071721124966, "grad_norm": 19.375, "learning_rate": 1.115244013689729e-06, "loss": 1.1477819681167603, "step": 14702 }, { "epoch": 2.676435787749158, "grad_norm": 31.0, "learning_rate": 1.1149878369282018e-06, "loss": 1.0180714130401611, "step": 14704 }, { "epoch": 2.67679985437335, "grad_norm": 21.0, "learning_rate": 1.1147319367845699e-06, "loss": 1.2511703968048096, "step": 14706 }, { "epoch": 2.6771639209975424, "grad_norm": 14.5, "learning_rate": 1.1144763132963862e-06, "loss": 1.5910966396331787, "step": 14708 }, { "epoch": 2.677527987621735, "grad_norm": 35.5, "learning_rate": 1.1142209665011615e-06, "loss": 2.2161030769348145, "step": 14710 }, { "epoch": 2.677892054245927, "grad_norm": 5.59375, "learning_rate": 1.1139658964363687e-06, "loss": 1.056174397468567, "step": 14712 }, { "epoch": 2.6782561208701194, "grad_norm": 8.1875, "learning_rate": 1.1137111031394366e-06, "loss": 1.4566659927368164, "step": 14714 }, { "epoch": 2.678620187494311, "grad_norm": 11.75, "learning_rate": 1.1134565866477556e-06, "loss": 1.432720422744751, "step": 14716 }, { "epoch": 2.678984254118504, "grad_norm": 11.1875, "learning_rate": 1.1132023469986752e-06, "loss": 1.5778437852859497, "step": 14718 }, { "epoch": 2.679348320742696, "grad_norm": 6.5625, "learning_rate": 1.1129483842295035e-06, "loss": 1.4382703304290771, "step": 14720 }, { "epoch": 2.679712387366888, "grad_norm": 6.0, "learning_rate": 1.1126946983775094e-06, "loss": 1.1238818168640137, "step": 14722 }, { "epoch": 2.6800764539910804, "grad_norm": 8.5, "learning_rate": 1.1124412894799192e-06, "loss": 1.2095158100128174, "step": 14724 }, { "epoch": 2.6804405206152726, "grad_norm": 7.5625, "learning_rate": 1.1121881575739208e-06, "loss": 0.6999382972717285, "step": 14726 }, { "epoch": 2.680804587239465, "grad_norm": 38.25, "learning_rate": 1.111935302696659e-06, "loss": 1.3341186046600342, "step": 14728 }, { "epoch": 2.681168653863657, "grad_norm": 10.4375, "learning_rate": 1.1116827248852397e-06, "loss": 0.7850853800773621, "step": 14730 }, { "epoch": 2.681532720487849, "grad_norm": 15.3125, "learning_rate": 1.1114304241767287e-06, "loss": 1.4966394901275635, "step": 14732 }, { "epoch": 2.6818967871120414, "grad_norm": 36.25, "learning_rate": 1.111178400608149e-06, "loss": 1.0849692821502686, "step": 14734 }, { "epoch": 2.6822608537362336, "grad_norm": 14.375, "learning_rate": 1.1109266542164838e-06, "loss": 1.555729627609253, "step": 14736 }, { "epoch": 2.6826249203604258, "grad_norm": 7.03125, "learning_rate": 1.110675185038677e-06, "loss": 1.0224976539611816, "step": 14738 }, { "epoch": 2.6829889869846184, "grad_norm": 10.25, "learning_rate": 1.1104239931116293e-06, "loss": 1.3707863092422485, "step": 14740 }, { "epoch": 2.68335305360881, "grad_norm": 11.125, "learning_rate": 1.1101730784722034e-06, "loss": 1.4015088081359863, "step": 14742 }, { "epoch": 2.683717120233003, "grad_norm": 8.3125, "learning_rate": 1.1099224411572192e-06, "loss": 1.3800102472305298, "step": 14744 }, { "epoch": 2.684081186857195, "grad_norm": 12.6875, "learning_rate": 1.109672081203457e-06, "loss": 1.4732944965362549, "step": 14746 }, { "epoch": 2.684445253481387, "grad_norm": 10.125, "learning_rate": 1.109421998647656e-06, "loss": 1.2888832092285156, "step": 14748 }, { "epoch": 2.6848093201055794, "grad_norm": 5.75, "learning_rate": 1.1091721935265148e-06, "loss": 1.0861753225326538, "step": 14750 }, { "epoch": 2.6851733867297716, "grad_norm": 14.4375, "learning_rate": 1.1089226658766916e-06, "loss": 1.341196060180664, "step": 14752 }, { "epoch": 2.6855374533539638, "grad_norm": 15.1875, "learning_rate": 1.1086734157348033e-06, "loss": 1.214228868484497, "step": 14754 }, { "epoch": 2.685901519978156, "grad_norm": 11.875, "learning_rate": 1.1084244431374261e-06, "loss": 1.1609221696853638, "step": 14756 }, { "epoch": 2.686265586602348, "grad_norm": 15.1875, "learning_rate": 1.1081757481210962e-06, "loss": 1.769871711730957, "step": 14758 }, { "epoch": 2.6866296532265403, "grad_norm": 10.3125, "learning_rate": 1.107927330722308e-06, "loss": 1.7308299541473389, "step": 14760 }, { "epoch": 2.6869937198507325, "grad_norm": 4.9375, "learning_rate": 1.1076791909775168e-06, "loss": 1.2495229244232178, "step": 14762 }, { "epoch": 2.6873577864749247, "grad_norm": 7.65625, "learning_rate": 1.107431328923135e-06, "loss": 1.0096275806427002, "step": 14764 }, { "epoch": 2.6877218530991174, "grad_norm": 12.375, "learning_rate": 1.1071837445955356e-06, "loss": 1.4926071166992188, "step": 14766 }, { "epoch": 2.688085919723309, "grad_norm": 27.75, "learning_rate": 1.106936438031051e-06, "loss": 1.8004555702209473, "step": 14768 }, { "epoch": 2.6884499863475018, "grad_norm": 20.0, "learning_rate": 1.1066894092659719e-06, "loss": 1.40166175365448, "step": 14770 }, { "epoch": 2.688814052971694, "grad_norm": 7.21875, "learning_rate": 1.1064426583365498e-06, "loss": 1.55739426612854, "step": 14772 }, { "epoch": 2.689178119595886, "grad_norm": 18.75, "learning_rate": 1.1061961852789933e-06, "loss": 1.456763744354248, "step": 14774 }, { "epoch": 2.6895421862200783, "grad_norm": 10.25, "learning_rate": 1.1059499901294713e-06, "loss": 1.6596550941467285, "step": 14776 }, { "epoch": 2.6899062528442705, "grad_norm": 9.625, "learning_rate": 1.1057040729241127e-06, "loss": 1.402900218963623, "step": 14778 }, { "epoch": 2.6902703194684627, "grad_norm": 6.09375, "learning_rate": 1.1054584336990043e-06, "loss": 1.3532071113586426, "step": 14780 }, { "epoch": 2.690634386092655, "grad_norm": 9.125, "learning_rate": 1.1052130724901932e-06, "loss": 1.2240444421768188, "step": 14782 }, { "epoch": 2.690998452716847, "grad_norm": 33.5, "learning_rate": 1.1049679893336846e-06, "loss": 1.4172601699829102, "step": 14784 }, { "epoch": 2.6913625193410393, "grad_norm": 6.0625, "learning_rate": 1.1047231842654436e-06, "loss": 1.1519123315811157, "step": 14786 }, { "epoch": 2.6917265859652315, "grad_norm": 10.5625, "learning_rate": 1.1044786573213945e-06, "loss": 1.2620350122451782, "step": 14788 }, { "epoch": 2.6920906525894237, "grad_norm": 14.0625, "learning_rate": 1.1042344085374202e-06, "loss": 1.023705244064331, "step": 14790 }, { "epoch": 2.6924547192136163, "grad_norm": 34.25, "learning_rate": 1.1039904379493643e-06, "loss": 1.3071603775024414, "step": 14792 }, { "epoch": 2.692818785837808, "grad_norm": 33.25, "learning_rate": 1.1037467455930272e-06, "loss": 1.8074480295181274, "step": 14794 }, { "epoch": 2.6931828524620007, "grad_norm": 13.5625, "learning_rate": 1.1035033315041705e-06, "loss": 1.2304755449295044, "step": 14796 }, { "epoch": 2.693546919086193, "grad_norm": 11.3125, "learning_rate": 1.1032601957185143e-06, "loss": 0.8926444053649902, "step": 14798 }, { "epoch": 2.693910985710385, "grad_norm": 9.0, "learning_rate": 1.1030173382717371e-06, "loss": 1.5995372533798218, "step": 14800 }, { "epoch": 2.6942750523345773, "grad_norm": 3.625, "learning_rate": 1.1027747591994782e-06, "loss": 1.3425649404525757, "step": 14802 }, { "epoch": 2.6946391189587695, "grad_norm": 7.5625, "learning_rate": 1.1025324585373344e-06, "loss": 1.5030573606491089, "step": 14804 }, { "epoch": 2.6950031855829617, "grad_norm": 27.0, "learning_rate": 1.1022904363208627e-06, "loss": 1.4105416536331177, "step": 14806 }, { "epoch": 2.695367252207154, "grad_norm": 12.6875, "learning_rate": 1.1020486925855785e-06, "loss": 1.6102200746536255, "step": 14808 }, { "epoch": 2.695731318831346, "grad_norm": 17.125, "learning_rate": 1.1018072273669567e-06, "loss": 1.4038678407669067, "step": 14810 }, { "epoch": 2.6960953854555383, "grad_norm": 48.75, "learning_rate": 1.1015660407004323e-06, "loss": 1.6581690311431885, "step": 14812 }, { "epoch": 2.6964594520797305, "grad_norm": 10.0625, "learning_rate": 1.1013251326213972e-06, "loss": 1.4166088104248047, "step": 14814 }, { "epoch": 2.6968235187039227, "grad_norm": 7.625, "learning_rate": 1.1010845031652046e-06, "loss": 1.1834051609039307, "step": 14816 }, { "epoch": 2.6971875853281153, "grad_norm": 9.5, "learning_rate": 1.1008441523671653e-06, "loss": 0.860220730304718, "step": 14818 }, { "epoch": 2.697551651952307, "grad_norm": 12.6875, "learning_rate": 1.1006040802625496e-06, "loss": 1.2868974208831787, "step": 14820 }, { "epoch": 2.6979157185764997, "grad_norm": 9.3125, "learning_rate": 1.1003642868865882e-06, "loss": 1.3573637008666992, "step": 14822 }, { "epoch": 2.6982797852006914, "grad_norm": 12.5625, "learning_rate": 1.100124772274469e-06, "loss": 1.1661683320999146, "step": 14824 }, { "epoch": 2.698643851824884, "grad_norm": 16.125, "learning_rate": 1.09988553646134e-06, "loss": 0.8377955555915833, "step": 14826 }, { "epoch": 2.6990079184490763, "grad_norm": 46.25, "learning_rate": 1.0996465794823077e-06, "loss": 0.7688472270965576, "step": 14828 }, { "epoch": 2.6993719850732685, "grad_norm": 4.53125, "learning_rate": 1.0994079013724385e-06, "loss": 1.2073286771774292, "step": 14830 }, { "epoch": 2.6997360516974607, "grad_norm": 17.25, "learning_rate": 1.0991695021667576e-06, "loss": 1.6766215562820435, "step": 14832 }, { "epoch": 2.700100118321653, "grad_norm": 6.9375, "learning_rate": 1.0989313819002488e-06, "loss": 1.5042650699615479, "step": 14834 }, { "epoch": 2.700464184945845, "grad_norm": 8.625, "learning_rate": 1.0986935406078552e-06, "loss": 1.2722575664520264, "step": 14836 }, { "epoch": 2.7008282515700373, "grad_norm": 15.875, "learning_rate": 1.0984559783244792e-06, "loss": 1.2128934860229492, "step": 14838 }, { "epoch": 2.7011923181942294, "grad_norm": 7.96875, "learning_rate": 1.098218695084982e-06, "loss": 1.174805760383606, "step": 14840 }, { "epoch": 2.7015563848184216, "grad_norm": 7.125, "learning_rate": 1.0979816909241845e-06, "loss": 1.3532884120941162, "step": 14842 }, { "epoch": 2.701920451442614, "grad_norm": 12.8125, "learning_rate": 1.0977449658768653e-06, "loss": 1.617944598197937, "step": 14844 }, { "epoch": 2.702284518066806, "grad_norm": 17.375, "learning_rate": 1.0975085199777633e-06, "loss": 1.5966460704803467, "step": 14846 }, { "epoch": 2.7026485846909987, "grad_norm": 22.5, "learning_rate": 1.0972723532615759e-06, "loss": 1.6074533462524414, "step": 14848 }, { "epoch": 2.7030126513151904, "grad_norm": 13.125, "learning_rate": 1.097036465762959e-06, "loss": 1.2419157028198242, "step": 14850 }, { "epoch": 2.703376717939383, "grad_norm": 6.25, "learning_rate": 1.0968008575165297e-06, "loss": 1.4943106174468994, "step": 14852 }, { "epoch": 2.7037407845635753, "grad_norm": 12.125, "learning_rate": 1.096565528556861e-06, "loss": 1.5701483488082886, "step": 14854 }, { "epoch": 2.7041048511877674, "grad_norm": 15.375, "learning_rate": 1.0963304789184872e-06, "loss": 1.543341040611267, "step": 14856 }, { "epoch": 2.7044689178119596, "grad_norm": 22.0, "learning_rate": 1.0960957086359005e-06, "loss": 1.6615351438522339, "step": 14858 }, { "epoch": 2.704832984436152, "grad_norm": 7.0, "learning_rate": 1.0958612177435526e-06, "loss": 1.0224934816360474, "step": 14860 }, { "epoch": 2.705197051060344, "grad_norm": 62.0, "learning_rate": 1.0956270062758548e-06, "loss": 1.6262320280075073, "step": 14862 }, { "epoch": 2.705561117684536, "grad_norm": 10.875, "learning_rate": 1.0953930742671758e-06, "loss": 1.477679967880249, "step": 14864 }, { "epoch": 2.7059251843087284, "grad_norm": 11.6875, "learning_rate": 1.0951594217518442e-06, "loss": 1.423030138015747, "step": 14866 }, { "epoch": 2.7062892509329206, "grad_norm": 25.125, "learning_rate": 1.094926048764148e-06, "loss": 1.366789698600769, "step": 14868 }, { "epoch": 2.706653317557113, "grad_norm": 4.78125, "learning_rate": 1.0946929553383334e-06, "loss": 1.3657417297363281, "step": 14870 }, { "epoch": 2.707017384181305, "grad_norm": 7.34375, "learning_rate": 1.0944601415086064e-06, "loss": 1.2840315103530884, "step": 14872 }, { "epoch": 2.7073814508054976, "grad_norm": 41.5, "learning_rate": 1.0942276073091312e-06, "loss": 1.3589062690734863, "step": 14874 }, { "epoch": 2.7077455174296894, "grad_norm": 16.0, "learning_rate": 1.0939953527740309e-06, "loss": 1.4740675687789917, "step": 14876 }, { "epoch": 2.708109584053882, "grad_norm": 35.5, "learning_rate": 1.0937633779373883e-06, "loss": 1.9843084812164307, "step": 14878 }, { "epoch": 2.708473650678074, "grad_norm": 19.25, "learning_rate": 1.0935316828332442e-06, "loss": 1.3780264854431152, "step": 14880 }, { "epoch": 2.7088377173022664, "grad_norm": 7.78125, "learning_rate": 1.0933002674956005e-06, "loss": 1.1791101694107056, "step": 14882 }, { "epoch": 2.7092017839264586, "grad_norm": 7.625, "learning_rate": 1.0930691319584147e-06, "loss": 1.2423118352890015, "step": 14884 }, { "epoch": 2.709565850550651, "grad_norm": 8.5, "learning_rate": 1.0928382762556056e-06, "loss": 1.3711953163146973, "step": 14886 }, { "epoch": 2.709929917174843, "grad_norm": 19.75, "learning_rate": 1.0926077004210507e-06, "loss": 1.5114002227783203, "step": 14888 }, { "epoch": 2.710293983799035, "grad_norm": 6.59375, "learning_rate": 1.0923774044885854e-06, "loss": 1.085712194442749, "step": 14890 }, { "epoch": 2.7106580504232274, "grad_norm": 15.1875, "learning_rate": 1.0921473884920053e-06, "loss": 0.7910588979721069, "step": 14892 }, { "epoch": 2.7110221170474196, "grad_norm": 8.5, "learning_rate": 1.0919176524650642e-06, "loss": 1.142046570777893, "step": 14894 }, { "epoch": 2.7113861836716118, "grad_norm": 22.5, "learning_rate": 1.0916881964414749e-06, "loss": 1.6150672435760498, "step": 14896 }, { "epoch": 2.711750250295804, "grad_norm": 9.8125, "learning_rate": 1.0914590204549086e-06, "loss": 1.270698070526123, "step": 14898 }, { "epoch": 2.7121143169199966, "grad_norm": 6.65625, "learning_rate": 1.0912301245389965e-06, "loss": 1.5403876304626465, "step": 14900 }, { "epoch": 2.7124783835441884, "grad_norm": 7.125, "learning_rate": 1.0910015087273285e-06, "loss": 1.2519055604934692, "step": 14902 }, { "epoch": 2.712842450168381, "grad_norm": 14.25, "learning_rate": 1.0907731730534524e-06, "loss": 1.5530656576156616, "step": 14904 }, { "epoch": 2.713206516792573, "grad_norm": 10.3125, "learning_rate": 1.0905451175508756e-06, "loss": 1.0416339635849, "step": 14906 }, { "epoch": 2.7135705834167654, "grad_norm": 21.5, "learning_rate": 1.0903173422530644e-06, "loss": 1.3582653999328613, "step": 14908 }, { "epoch": 2.7139346500409576, "grad_norm": 12.5, "learning_rate": 1.090089847193444e-06, "loss": 0.950001060962677, "step": 14910 }, { "epoch": 2.7142987166651498, "grad_norm": 15.5625, "learning_rate": 1.0898626324053986e-06, "loss": 1.4787170886993408, "step": 14912 }, { "epoch": 2.714662783289342, "grad_norm": 10.0625, "learning_rate": 1.0896356979222706e-06, "loss": 1.2867422103881836, "step": 14914 }, { "epoch": 2.715026849913534, "grad_norm": 15.9375, "learning_rate": 1.089409043777362e-06, "loss": 1.447325348854065, "step": 14916 }, { "epoch": 2.7153909165377264, "grad_norm": 18.125, "learning_rate": 1.089182670003933e-06, "loss": 1.357314109802246, "step": 14918 }, { "epoch": 2.7157549831619185, "grad_norm": 7.09375, "learning_rate": 1.0889565766352035e-06, "loss": 0.9751088619232178, "step": 14920 }, { "epoch": 2.7161190497861107, "grad_norm": 9.5, "learning_rate": 1.0887307637043517e-06, "loss": 1.7251536846160889, "step": 14922 }, { "epoch": 2.716483116410303, "grad_norm": 11.5625, "learning_rate": 1.0885052312445143e-06, "loss": 1.4645901918411255, "step": 14924 }, { "epoch": 2.7168471830344956, "grad_norm": 9.6875, "learning_rate": 1.088279979288788e-06, "loss": 1.4059804677963257, "step": 14926 }, { "epoch": 2.7172112496586873, "grad_norm": 4.9375, "learning_rate": 1.088055007870227e-06, "loss": 0.921481728553772, "step": 14928 }, { "epoch": 2.71757531628288, "grad_norm": 10.8125, "learning_rate": 1.087830317021845e-06, "loss": 0.9975974559783936, "step": 14930 }, { "epoch": 2.717939382907072, "grad_norm": 13.125, "learning_rate": 1.0876059067766149e-06, "loss": 0.6959519386291504, "step": 14932 }, { "epoch": 2.7183034495312643, "grad_norm": 15.9375, "learning_rate": 1.0873817771674678e-06, "loss": 1.6531071662902832, "step": 14934 }, { "epoch": 2.7186675161554565, "grad_norm": 30.0, "learning_rate": 1.0871579282272937e-06, "loss": 1.711456298828125, "step": 14936 }, { "epoch": 2.7190315827796487, "grad_norm": 7.40625, "learning_rate": 1.0869343599889414e-06, "loss": 1.3563625812530518, "step": 14938 }, { "epoch": 2.719395649403841, "grad_norm": 8.3125, "learning_rate": 1.0867110724852184e-06, "loss": 1.3335464000701904, "step": 14940 }, { "epoch": 2.719759716028033, "grad_norm": 36.25, "learning_rate": 1.0864880657488924e-06, "loss": 1.4002331495285034, "step": 14942 }, { "epoch": 2.7201237826522253, "grad_norm": 8.75, "learning_rate": 1.0862653398126877e-06, "loss": 1.7840296030044556, "step": 14944 }, { "epoch": 2.7204878492764175, "grad_norm": 34.5, "learning_rate": 1.0860428947092885e-06, "loss": 1.1636714935302734, "step": 14946 }, { "epoch": 2.7208519159006097, "grad_norm": 7.21875, "learning_rate": 1.0858207304713381e-06, "loss": 1.1873703002929688, "step": 14948 }, { "epoch": 2.721215982524802, "grad_norm": 16.375, "learning_rate": 1.0855988471314377e-06, "loss": 1.2008424997329712, "step": 14950 }, { "epoch": 2.7215800491489945, "grad_norm": 7.09375, "learning_rate": 1.0853772447221484e-06, "loss": 1.6064014434814453, "step": 14952 }, { "epoch": 2.7219441157731863, "grad_norm": 19.375, "learning_rate": 1.0851559232759893e-06, "loss": 1.1498956680297852, "step": 14954 }, { "epoch": 2.722308182397379, "grad_norm": 19.375, "learning_rate": 1.084934882825438e-06, "loss": 1.8827636241912842, "step": 14956 }, { "epoch": 2.7226722490215707, "grad_norm": 15.0625, "learning_rate": 1.0847141234029317e-06, "loss": 1.5877909660339355, "step": 14958 }, { "epoch": 2.7230363156457633, "grad_norm": 108.5, "learning_rate": 1.0844936450408656e-06, "loss": 1.1987026929855347, "step": 14960 }, { "epoch": 2.7234003822699555, "grad_norm": 16.375, "learning_rate": 1.0842734477715946e-06, "loss": 0.9726529121398926, "step": 14962 }, { "epoch": 2.7237644488941477, "grad_norm": 4.875, "learning_rate": 1.084053531627431e-06, "loss": 1.2493902444839478, "step": 14964 }, { "epoch": 2.72412851551834, "grad_norm": 14.4375, "learning_rate": 1.0838338966406473e-06, "loss": 1.2756197452545166, "step": 14966 }, { "epoch": 2.724492582142532, "grad_norm": 44.25, "learning_rate": 1.0836145428434736e-06, "loss": 1.310465931892395, "step": 14968 }, { "epoch": 2.7248566487667243, "grad_norm": 7.65625, "learning_rate": 1.083395470268099e-06, "loss": 1.5246061086654663, "step": 14970 }, { "epoch": 2.7252207153909165, "grad_norm": 3.109375, "learning_rate": 1.0831766789466724e-06, "loss": 1.0762794017791748, "step": 14972 }, { "epoch": 2.7255847820151087, "grad_norm": 22.625, "learning_rate": 1.0829581689112996e-06, "loss": 1.4710791110992432, "step": 14974 }, { "epoch": 2.725948848639301, "grad_norm": 10.25, "learning_rate": 1.0827399401940464e-06, "loss": 1.3621971607208252, "step": 14976 }, { "epoch": 2.726312915263493, "grad_norm": 16.5, "learning_rate": 1.082521992826937e-06, "loss": 1.884993314743042, "step": 14978 }, { "epoch": 2.7266769818876853, "grad_norm": 13.9375, "learning_rate": 1.0823043268419542e-06, "loss": 1.5959687232971191, "step": 14980 }, { "epoch": 2.727041048511878, "grad_norm": 12.875, "learning_rate": 1.08208694227104e-06, "loss": 1.381267786026001, "step": 14982 }, { "epoch": 2.7274051151360696, "grad_norm": 11.125, "learning_rate": 1.0818698391460943e-06, "loss": 1.377220869064331, "step": 14984 }, { "epoch": 2.7277691817602623, "grad_norm": 40.0, "learning_rate": 1.0816530174989762e-06, "loss": 1.6236605644226074, "step": 14986 }, { "epoch": 2.7281332483844545, "grad_norm": 12.0, "learning_rate": 1.0814364773615033e-06, "loss": 1.6138181686401367, "step": 14988 }, { "epoch": 2.7284973150086467, "grad_norm": 12.375, "learning_rate": 1.081220218765452e-06, "loss": 1.669021487236023, "step": 14990 }, { "epoch": 2.728861381632839, "grad_norm": 43.25, "learning_rate": 1.081004241742558e-06, "loss": 1.7873477935791016, "step": 14992 }, { "epoch": 2.729225448257031, "grad_norm": 11.0, "learning_rate": 1.080788546324514e-06, "loss": 1.5181097984313965, "step": 14994 }, { "epoch": 2.7295895148812233, "grad_norm": 8.625, "learning_rate": 1.0805731325429731e-06, "loss": 1.1909656524658203, "step": 14996 }, { "epoch": 2.7299535815054154, "grad_norm": 11.5625, "learning_rate": 1.0803580004295463e-06, "loss": 1.3072558641433716, "step": 14998 }, { "epoch": 2.7303176481296076, "grad_norm": 22.75, "learning_rate": 1.080143150015803e-06, "loss": 1.3752968311309814, "step": 15000 }, { "epoch": 2.7306817147538, "grad_norm": 11.75, "learning_rate": 1.0799285813332724e-06, "loss": 1.7476496696472168, "step": 15002 }, { "epoch": 2.731045781377992, "grad_norm": 12.625, "learning_rate": 1.0797142944134408e-06, "loss": 1.6604036092758179, "step": 15004 }, { "epoch": 2.7314098480021842, "grad_norm": 576.0, "learning_rate": 1.0795002892877543e-06, "loss": 0.6702216863632202, "step": 15006 }, { "epoch": 2.731773914626377, "grad_norm": 11.625, "learning_rate": 1.0792865659876172e-06, "loss": 0.9962186813354492, "step": 15008 }, { "epoch": 2.7321379812505686, "grad_norm": 26.375, "learning_rate": 1.0790731245443924e-06, "loss": 1.5872361660003662, "step": 15010 }, { "epoch": 2.7325020478747613, "grad_norm": 25.75, "learning_rate": 1.0788599649894022e-06, "loss": 2.004768133163452, "step": 15012 }, { "epoch": 2.7328661144989534, "grad_norm": 4.59375, "learning_rate": 1.0786470873539263e-06, "loss": 1.2689882516860962, "step": 15014 }, { "epoch": 2.7332301811231456, "grad_norm": 9.5625, "learning_rate": 1.0784344916692039e-06, "loss": 1.0874770879745483, "step": 15016 }, { "epoch": 2.733594247747338, "grad_norm": 11.0, "learning_rate": 1.0782221779664322e-06, "loss": 1.4245054721832275, "step": 15018 }, { "epoch": 2.73395831437153, "grad_norm": 7.5, "learning_rate": 1.0780101462767672e-06, "loss": 0.8740946054458618, "step": 15020 }, { "epoch": 2.7343223809957222, "grad_norm": 7.1875, "learning_rate": 1.077798396631325e-06, "loss": 1.290091633796692, "step": 15022 }, { "epoch": 2.7346864476199144, "grad_norm": 7.4375, "learning_rate": 1.0775869290611774e-06, "loss": 0.5480546355247498, "step": 15024 }, { "epoch": 2.7350505142441066, "grad_norm": 7.9375, "learning_rate": 1.0773757435973573e-06, "loss": 1.5005695819854736, "step": 15026 }, { "epoch": 2.735414580868299, "grad_norm": 8.625, "learning_rate": 1.077164840270855e-06, "loss": 1.4684122800827026, "step": 15028 }, { "epoch": 2.735778647492491, "grad_norm": 8.375, "learning_rate": 1.0769542191126199e-06, "loss": 1.3569011688232422, "step": 15030 }, { "epoch": 2.736142714116683, "grad_norm": 7.90625, "learning_rate": 1.07674388015356e-06, "loss": 1.455420732498169, "step": 15032 }, { "epoch": 2.736506780740876, "grad_norm": 12.8125, "learning_rate": 1.0765338234245411e-06, "loss": 1.492271900177002, "step": 15034 }, { "epoch": 2.7368708473650676, "grad_norm": 15.6875, "learning_rate": 1.0763240489563885e-06, "loss": 1.520097017288208, "step": 15036 }, { "epoch": 2.73723491398926, "grad_norm": 100.0, "learning_rate": 1.076114556779886e-06, "loss": 1.4418143033981323, "step": 15038 }, { "epoch": 2.7375989806134524, "grad_norm": 10.375, "learning_rate": 1.0759053469257747e-06, "loss": 1.2542225122451782, "step": 15040 }, { "epoch": 2.7379630472376446, "grad_norm": 11.75, "learning_rate": 1.0756964194247571e-06, "loss": 1.473730206489563, "step": 15042 }, { "epoch": 2.738327113861837, "grad_norm": 18.875, "learning_rate": 1.075487774307491e-06, "loss": 2.000340461730957, "step": 15044 }, { "epoch": 2.738691180486029, "grad_norm": 7.4375, "learning_rate": 1.0752794116045944e-06, "loss": 1.027376651763916, "step": 15046 }, { "epoch": 2.739055247110221, "grad_norm": 11.875, "learning_rate": 1.0750713313466444e-06, "loss": 1.216172218322754, "step": 15048 }, { "epoch": 2.7394193137344134, "grad_norm": 15.6875, "learning_rate": 1.074863533564175e-06, "loss": 1.9682786464691162, "step": 15050 }, { "epoch": 2.7397833803586056, "grad_norm": 11.4375, "learning_rate": 1.0746560182876805e-06, "loss": 1.5438538789749146, "step": 15052 }, { "epoch": 2.7401474469827978, "grad_norm": 14.75, "learning_rate": 1.0744487855476126e-06, "loss": 1.5282137393951416, "step": 15054 }, { "epoch": 2.74051151360699, "grad_norm": 15.75, "learning_rate": 1.0742418353743816e-06, "loss": 1.5225894451141357, "step": 15056 }, { "epoch": 2.740875580231182, "grad_norm": 4.90625, "learning_rate": 1.0740351677983567e-06, "loss": 1.446022629737854, "step": 15058 }, { "epoch": 2.741239646855375, "grad_norm": 12.5625, "learning_rate": 1.0738287828498656e-06, "loss": 1.17795991897583, "step": 15060 }, { "epoch": 2.7416037134795666, "grad_norm": 9.0625, "learning_rate": 1.0736226805591951e-06, "loss": 1.3711397647857666, "step": 15062 }, { "epoch": 2.741967780103759, "grad_norm": 15.875, "learning_rate": 1.0734168609565892e-06, "loss": 1.5267786979675293, "step": 15064 }, { "epoch": 2.742331846727951, "grad_norm": 19.75, "learning_rate": 1.073211324072251e-06, "loss": 1.615204930305481, "step": 15066 }, { "epoch": 2.7426959133521436, "grad_norm": 18.375, "learning_rate": 1.0730060699363427e-06, "loss": 1.579131841659546, "step": 15068 }, { "epoch": 2.7430599799763358, "grad_norm": 16.75, "learning_rate": 1.0728010985789835e-06, "loss": 1.4976472854614258, "step": 15070 }, { "epoch": 2.743424046600528, "grad_norm": 9.875, "learning_rate": 1.0725964100302535e-06, "loss": 0.5477147102355957, "step": 15072 }, { "epoch": 2.74378811322472, "grad_norm": 12.5625, "learning_rate": 1.0723920043201891e-06, "loss": 1.427178144454956, "step": 15074 }, { "epoch": 2.7441521798489124, "grad_norm": 10.125, "learning_rate": 1.072187881478786e-06, "loss": 1.7375693321228027, "step": 15076 }, { "epoch": 2.7445162464731045, "grad_norm": 4.9375, "learning_rate": 1.071984041535999e-06, "loss": 1.363765001296997, "step": 15078 }, { "epoch": 2.7448803130972967, "grad_norm": 16.375, "learning_rate": 1.0717804845217395e-06, "loss": 1.5048247575759888, "step": 15080 }, { "epoch": 2.745244379721489, "grad_norm": 15.3125, "learning_rate": 1.0715772104658801e-06, "loss": 2.018571376800537, "step": 15082 }, { "epoch": 2.745608446345681, "grad_norm": 9.6875, "learning_rate": 1.0713742193982497e-06, "loss": 1.3345913887023926, "step": 15084 }, { "epoch": 2.7459725129698733, "grad_norm": 7.625, "learning_rate": 1.0711715113486368e-06, "loss": 1.6039857864379883, "step": 15086 }, { "epoch": 2.7463365795940655, "grad_norm": 19.5, "learning_rate": 1.0709690863467874e-06, "loss": 1.3996527194976807, "step": 15088 }, { "epoch": 2.746700646218258, "grad_norm": 17.375, "learning_rate": 1.070766944422407e-06, "loss": 1.4645377397537231, "step": 15090 }, { "epoch": 2.74706471284245, "grad_norm": 11.875, "learning_rate": 1.0705650856051591e-06, "loss": 1.4853307008743286, "step": 15092 }, { "epoch": 2.7474287794666425, "grad_norm": 7.96875, "learning_rate": 1.0703635099246657e-06, "loss": 1.4188501834869385, "step": 15094 }, { "epoch": 2.7477928460908347, "grad_norm": 6.34375, "learning_rate": 1.0701622174105069e-06, "loss": 1.3596289157867432, "step": 15096 }, { "epoch": 2.748156912715027, "grad_norm": 9.3125, "learning_rate": 1.0699612080922218e-06, "loss": 0.9225339889526367, "step": 15098 }, { "epoch": 2.748520979339219, "grad_norm": 80.5, "learning_rate": 1.0697604819993075e-06, "loss": 1.1935278177261353, "step": 15100 }, { "epoch": 2.7488850459634113, "grad_norm": 6.125, "learning_rate": 1.0695600391612201e-06, "loss": 0.844068169593811, "step": 15102 }, { "epoch": 2.7492491125876035, "grad_norm": 12.5625, "learning_rate": 1.0693598796073734e-06, "loss": 1.4681004285812378, "step": 15104 }, { "epoch": 2.7496131792117957, "grad_norm": 39.25, "learning_rate": 1.06916000336714e-06, "loss": 1.627410888671875, "step": 15106 }, { "epoch": 2.749977245835988, "grad_norm": 9.0, "learning_rate": 1.0689604104698515e-06, "loss": 1.5033308267593384, "step": 15108 }, { "epoch": 2.75034131246018, "grad_norm": 8.8125, "learning_rate": 1.0687611009447966e-06, "loss": 1.4627599716186523, "step": 15110 }, { "epoch": 2.7507053790843723, "grad_norm": 8.4375, "learning_rate": 1.068562074821224e-06, "loss": 1.2369627952575684, "step": 15112 }, { "epoch": 2.7510694457085645, "grad_norm": 37.75, "learning_rate": 1.0683633321283392e-06, "loss": 1.7903141975402832, "step": 15114 }, { "epoch": 2.751433512332757, "grad_norm": 16.875, "learning_rate": 1.068164872895307e-06, "loss": 1.864713430404663, "step": 15116 }, { "epoch": 2.751797578956949, "grad_norm": 6.3125, "learning_rate": 1.0679666971512508e-06, "loss": 1.2925167083740234, "step": 15118 }, { "epoch": 2.7521616455811415, "grad_norm": 5.21875, "learning_rate": 1.0677688049252517e-06, "loss": 0.8857063055038452, "step": 15120 }, { "epoch": 2.7525257122053337, "grad_norm": 9.9375, "learning_rate": 1.0675711962463503e-06, "loss": 1.0740444660186768, "step": 15122 }, { "epoch": 2.752889778829526, "grad_norm": 19.125, "learning_rate": 1.0673738711435443e-06, "loss": 1.4283254146575928, "step": 15124 }, { "epoch": 2.753253845453718, "grad_norm": 7.28125, "learning_rate": 1.0671768296457902e-06, "loss": 1.4395685195922852, "step": 15126 }, { "epoch": 2.7536179120779103, "grad_norm": 6.53125, "learning_rate": 1.0669800717820034e-06, "loss": 1.183334469795227, "step": 15128 }, { "epoch": 2.7539819787021025, "grad_norm": 17.5, "learning_rate": 1.066783597581057e-06, "loss": 1.7554134130477905, "step": 15130 }, { "epoch": 2.7543460453262947, "grad_norm": 14.25, "learning_rate": 1.0665874070717838e-06, "loss": 1.4719607830047607, "step": 15132 }, { "epoch": 2.754710111950487, "grad_norm": 9.875, "learning_rate": 1.0663915002829727e-06, "loss": 1.3063950538635254, "step": 15134 }, { "epoch": 2.755074178574679, "grad_norm": 14.875, "learning_rate": 1.0661958772433725e-06, "loss": 1.6672691106796265, "step": 15136 }, { "epoch": 2.7554382451988713, "grad_norm": 8.75, "learning_rate": 1.0660005379816906e-06, "loss": 0.9045916199684143, "step": 15138 }, { "epoch": 2.7558023118230635, "grad_norm": 56.0, "learning_rate": 1.0658054825265913e-06, "loss": 1.2428569793701172, "step": 15140 }, { "epoch": 2.756166378447256, "grad_norm": 8.375, "learning_rate": 1.0656107109066995e-06, "loss": 1.813591480255127, "step": 15142 }, { "epoch": 2.756530445071448, "grad_norm": 5.875, "learning_rate": 1.0654162231505966e-06, "loss": 1.2263996601104736, "step": 15144 }, { "epoch": 2.7568945116956405, "grad_norm": 11.875, "learning_rate": 1.0652220192868224e-06, "loss": 1.3955984115600586, "step": 15146 }, { "epoch": 2.7572585783198327, "grad_norm": 96.5, "learning_rate": 1.0650280993438758e-06, "loss": 1.4035351276397705, "step": 15148 }, { "epoch": 2.757622644944025, "grad_norm": 28.375, "learning_rate": 1.064834463350214e-06, "loss": 1.8159451484680176, "step": 15150 }, { "epoch": 2.757986711568217, "grad_norm": 25.125, "learning_rate": 1.0646411113342524e-06, "loss": 1.4774194955825806, "step": 15152 }, { "epoch": 2.7583507781924093, "grad_norm": 22.875, "learning_rate": 1.064448043324364e-06, "loss": 1.5671591758728027, "step": 15154 }, { "epoch": 2.7587148448166015, "grad_norm": 202.0, "learning_rate": 1.0642552593488814e-06, "loss": 1.3637635707855225, "step": 15156 }, { "epoch": 2.7590789114407936, "grad_norm": 5.9375, "learning_rate": 1.0640627594360947e-06, "loss": 1.3500739336013794, "step": 15158 }, { "epoch": 2.759442978064986, "grad_norm": 8.75, "learning_rate": 1.0638705436142518e-06, "loss": 1.2351429462432861, "step": 15160 }, { "epoch": 2.759807044689178, "grad_norm": 13.0625, "learning_rate": 1.0636786119115609e-06, "loss": 1.5977540016174316, "step": 15162 }, { "epoch": 2.7601711113133702, "grad_norm": 9.75, "learning_rate": 1.063486964356186e-06, "loss": 1.422680139541626, "step": 15164 }, { "epoch": 2.7605351779375624, "grad_norm": 8.8125, "learning_rate": 1.0632956009762514e-06, "loss": 1.2810008525848389, "step": 15166 }, { "epoch": 2.760899244561755, "grad_norm": 17.0, "learning_rate": 1.0631045217998384e-06, "loss": 1.2463493347167969, "step": 15168 }, { "epoch": 2.761263311185947, "grad_norm": 15.125, "learning_rate": 1.062913726854987e-06, "loss": 1.984569787979126, "step": 15170 }, { "epoch": 2.7616273778101394, "grad_norm": 16.0, "learning_rate": 1.0627232161696964e-06, "loss": 1.5968174934387207, "step": 15172 }, { "epoch": 2.7619914444343316, "grad_norm": 18.5, "learning_rate": 1.0625329897719226e-06, "loss": 1.5111920833587646, "step": 15174 }, { "epoch": 2.762355511058524, "grad_norm": 7.59375, "learning_rate": 1.0623430476895805e-06, "loss": 1.2464303970336914, "step": 15176 }, { "epoch": 2.762719577682716, "grad_norm": 8.6875, "learning_rate": 1.0621533899505437e-06, "loss": 1.359215497970581, "step": 15178 }, { "epoch": 2.7630836443069082, "grad_norm": 11.4375, "learning_rate": 1.0619640165826431e-06, "loss": 1.0701346397399902, "step": 15180 }, { "epoch": 2.7634477109311004, "grad_norm": 13.0625, "learning_rate": 1.0617749276136696e-06, "loss": 1.4837589263916016, "step": 15182 }, { "epoch": 2.7638117775552926, "grad_norm": 7.78125, "learning_rate": 1.06158612307137e-06, "loss": 1.0844717025756836, "step": 15184 }, { "epoch": 2.764175844179485, "grad_norm": 12.5625, "learning_rate": 1.0613976029834513e-06, "loss": 1.6679472923278809, "step": 15186 }, { "epoch": 2.764539910803677, "grad_norm": 17.625, "learning_rate": 1.061209367377578e-06, "loss": 1.5979485511779785, "step": 15188 }, { "epoch": 2.764903977427869, "grad_norm": 15.5, "learning_rate": 1.0610214162813723e-06, "loss": 1.7798595428466797, "step": 15190 }, { "epoch": 2.7652680440520614, "grad_norm": 15.125, "learning_rate": 1.0608337497224164e-06, "loss": 1.5039747953414917, "step": 15192 }, { "epoch": 2.765632110676254, "grad_norm": 7.75, "learning_rate": 1.0606463677282487e-06, "loss": 1.4596741199493408, "step": 15194 }, { "epoch": 2.765996177300446, "grad_norm": 30.875, "learning_rate": 1.060459270326367e-06, "loss": 1.54854416847229, "step": 15196 }, { "epoch": 2.7663602439246384, "grad_norm": 12.0625, "learning_rate": 1.0602724575442271e-06, "loss": 1.1845885515213013, "step": 15198 }, { "epoch": 2.76672431054883, "grad_norm": 9.25, "learning_rate": 1.060085929409243e-06, "loss": 1.4231187105178833, "step": 15200 }, { "epoch": 2.767088377173023, "grad_norm": 9.1875, "learning_rate": 1.059899685948787e-06, "loss": 1.2150213718414307, "step": 15202 }, { "epoch": 2.767452443797215, "grad_norm": 24.0, "learning_rate": 1.05971372719019e-06, "loss": 1.0649685859680176, "step": 15204 }, { "epoch": 2.767816510421407, "grad_norm": 8.25, "learning_rate": 1.0595280531607397e-06, "loss": 1.265956163406372, "step": 15206 }, { "epoch": 2.7681805770455994, "grad_norm": 44.0, "learning_rate": 1.0593426638876836e-06, "loss": 1.1646571159362793, "step": 15208 }, { "epoch": 2.7685446436697916, "grad_norm": 12.25, "learning_rate": 1.0591575593982267e-06, "loss": 1.683388352394104, "step": 15210 }, { "epoch": 2.7689087102939838, "grad_norm": 27.375, "learning_rate": 1.058972739719533e-06, "loss": 1.9732575416564941, "step": 15212 }, { "epoch": 2.769272776918176, "grad_norm": 5.0, "learning_rate": 1.058788204878723e-06, "loss": 1.3566139936447144, "step": 15214 }, { "epoch": 2.769636843542368, "grad_norm": 15.875, "learning_rate": 1.0586039549028768e-06, "loss": 1.5855283737182617, "step": 15216 }, { "epoch": 2.7700009101665604, "grad_norm": 14.0, "learning_rate": 1.0584199898190325e-06, "loss": 1.4462519884109497, "step": 15218 }, { "epoch": 2.7703649767907526, "grad_norm": 28.5, "learning_rate": 1.058236309654186e-06, "loss": 1.0793297290802002, "step": 15220 }, { "epoch": 2.7707290434149447, "grad_norm": 60.0, "learning_rate": 1.0580529144352923e-06, "loss": 0.5910310745239258, "step": 15222 }, { "epoch": 2.7710931100391374, "grad_norm": 11.625, "learning_rate": 1.0578698041892632e-06, "loss": 1.5671952962875366, "step": 15224 }, { "epoch": 2.771457176663329, "grad_norm": 11.0, "learning_rate": 1.0576869789429692e-06, "loss": 1.503175973892212, "step": 15226 }, { "epoch": 2.7718212432875218, "grad_norm": 17.25, "learning_rate": 1.0575044387232398e-06, "loss": 1.6208741664886475, "step": 15228 }, { "epoch": 2.772185309911714, "grad_norm": 5.09375, "learning_rate": 1.0573221835568614e-06, "loss": 1.2335257530212402, "step": 15230 }, { "epoch": 2.772549376535906, "grad_norm": 7.84375, "learning_rate": 1.0571402134705805e-06, "loss": 1.325007438659668, "step": 15232 }, { "epoch": 2.7729134431600984, "grad_norm": 11.0, "learning_rate": 1.0569585284910988e-06, "loss": 1.2743703126907349, "step": 15234 }, { "epoch": 2.7732775097842906, "grad_norm": 34.0, "learning_rate": 1.056777128645079e-06, "loss": 1.4732047319412231, "step": 15236 }, { "epoch": 2.7736415764084827, "grad_norm": 24.25, "learning_rate": 1.0565960139591399e-06, "loss": 1.7495557069778442, "step": 15238 }, { "epoch": 2.774005643032675, "grad_norm": 8.75, "learning_rate": 1.0564151844598599e-06, "loss": 1.5108754634857178, "step": 15240 }, { "epoch": 2.774369709656867, "grad_norm": 68.0, "learning_rate": 1.0562346401737754e-06, "loss": 1.63310968875885, "step": 15242 }, { "epoch": 2.7747337762810593, "grad_norm": 14.5, "learning_rate": 1.0560543811273797e-06, "loss": 1.6279914379119873, "step": 15244 }, { "epoch": 2.7750978429052515, "grad_norm": 12.0625, "learning_rate": 1.0558744073471255e-06, "loss": 1.22107994556427, "step": 15246 }, { "epoch": 2.7754619095294437, "grad_norm": 8.0625, "learning_rate": 1.0556947188594231e-06, "loss": 1.3569004535675049, "step": 15248 }, { "epoch": 2.7758259761536364, "grad_norm": 26.5, "learning_rate": 1.055515315690641e-06, "loss": 1.5827465057373047, "step": 15250 }, { "epoch": 2.776190042777828, "grad_norm": 12.3125, "learning_rate": 1.0553361978671064e-06, "loss": 1.9765727519989014, "step": 15252 }, { "epoch": 2.7765541094020207, "grad_norm": 22.75, "learning_rate": 1.0551573654151036e-06, "loss": 1.3594779968261719, "step": 15254 }, { "epoch": 2.776918176026213, "grad_norm": 9.375, "learning_rate": 1.0549788183608755e-06, "loss": 1.1901259422302246, "step": 15256 }, { "epoch": 2.777282242650405, "grad_norm": 13.25, "learning_rate": 1.0548005567306235e-06, "loss": 1.3841781616210938, "step": 15258 }, { "epoch": 2.7776463092745973, "grad_norm": 26.75, "learning_rate": 1.0546225805505062e-06, "loss": 1.4691760540008545, "step": 15260 }, { "epoch": 2.7780103758987895, "grad_norm": 8.9375, "learning_rate": 1.054444889846642e-06, "loss": 1.2579461336135864, "step": 15262 }, { "epoch": 2.7783744425229817, "grad_norm": 7.15625, "learning_rate": 1.0542674846451052e-06, "loss": 0.9277184009552002, "step": 15264 }, { "epoch": 2.778738509147174, "grad_norm": 9.25, "learning_rate": 1.0540903649719294e-06, "loss": 1.3653194904327393, "step": 15266 }, { "epoch": 2.779102575771366, "grad_norm": 6.34375, "learning_rate": 1.0539135308531067e-06, "loss": 1.3270657062530518, "step": 15268 }, { "epoch": 2.7794666423955583, "grad_norm": 30.375, "learning_rate": 1.0537369823145866e-06, "loss": 1.225717544555664, "step": 15270 }, { "epoch": 2.7798307090197505, "grad_norm": 181.0, "learning_rate": 1.0535607193822769e-06, "loss": 1.7292187213897705, "step": 15272 }, { "epoch": 2.7801947756439427, "grad_norm": 27.25, "learning_rate": 1.053384742082043e-06, "loss": 1.31608247756958, "step": 15274 }, { "epoch": 2.7805588422681353, "grad_norm": 10.625, "learning_rate": 1.0532090504397098e-06, "loss": 1.0656460523605347, "step": 15276 }, { "epoch": 2.780922908892327, "grad_norm": 5.5, "learning_rate": 1.0530336444810586e-06, "loss": 1.230137825012207, "step": 15278 }, { "epoch": 2.7812869755165197, "grad_norm": 11.0, "learning_rate": 1.0528585242318293e-06, "loss": 1.2619385719299316, "step": 15280 }, { "epoch": 2.781651042140712, "grad_norm": 13.5625, "learning_rate": 1.0526836897177212e-06, "loss": 1.7867231369018555, "step": 15282 }, { "epoch": 2.782015108764904, "grad_norm": 5.625, "learning_rate": 1.0525091409643894e-06, "loss": 1.3512623310089111, "step": 15284 }, { "epoch": 2.7823791753890963, "grad_norm": 5.03125, "learning_rate": 1.0523348779974487e-06, "loss": 1.3322995901107788, "step": 15286 }, { "epoch": 2.7827432420132885, "grad_norm": 11.0, "learning_rate": 1.0521609008424716e-06, "loss": 1.5845931768417358, "step": 15288 }, { "epoch": 2.7831073086374807, "grad_norm": 8.5, "learning_rate": 1.051987209524988e-06, "loss": 1.4134056568145752, "step": 15290 }, { "epoch": 2.783471375261673, "grad_norm": 13.1875, "learning_rate": 1.0518138040704873e-06, "loss": 1.5484111309051514, "step": 15292 }, { "epoch": 2.783835441885865, "grad_norm": 8.5, "learning_rate": 1.0516406845044153e-06, "loss": 1.4702564477920532, "step": 15294 }, { "epoch": 2.7841995085100573, "grad_norm": 12.1875, "learning_rate": 1.051467850852177e-06, "loss": 1.244093894958496, "step": 15296 }, { "epoch": 2.7845635751342495, "grad_norm": 12.1875, "learning_rate": 1.0512953031391347e-06, "loss": 1.359610676765442, "step": 15298 }, { "epoch": 2.7849276417584417, "grad_norm": 7.625, "learning_rate": 1.0511230413906093e-06, "loss": 1.1750068664550781, "step": 15300 }, { "epoch": 2.7852917083826343, "grad_norm": 18.25, "learning_rate": 1.0509510656318796e-06, "loss": 1.401893973350525, "step": 15302 }, { "epoch": 2.785655775006826, "grad_norm": 7.34375, "learning_rate": 1.0507793758881822e-06, "loss": 1.1192243099212646, "step": 15304 }, { "epoch": 2.7860198416310187, "grad_norm": 25.875, "learning_rate": 1.0506079721847117e-06, "loss": 1.3272150754928589, "step": 15306 }, { "epoch": 2.7863839082552104, "grad_norm": 8.75, "learning_rate": 1.0504368545466213e-06, "loss": 1.481140375137329, "step": 15308 }, { "epoch": 2.786747974879403, "grad_norm": 13.25, "learning_rate": 1.0502660229990213e-06, "loss": 1.6706256866455078, "step": 15310 }, { "epoch": 2.7871120415035953, "grad_norm": 32.0, "learning_rate": 1.0500954775669813e-06, "loss": 1.7830562591552734, "step": 15312 }, { "epoch": 2.7874761081277875, "grad_norm": 6.625, "learning_rate": 1.0499252182755274e-06, "loss": 0.9861568212509155, "step": 15314 }, { "epoch": 2.7878401747519796, "grad_norm": 6.90625, "learning_rate": 1.0497552451496447e-06, "loss": 1.2062160968780518, "step": 15316 }, { "epoch": 2.788204241376172, "grad_norm": 11.5625, "learning_rate": 1.0495855582142763e-06, "loss": 1.2470710277557373, "step": 15318 }, { "epoch": 2.788568308000364, "grad_norm": 39.25, "learning_rate": 1.0494161574943224e-06, "loss": 0.9131478071212769, "step": 15320 }, { "epoch": 2.7889323746245562, "grad_norm": 11.8125, "learning_rate": 1.0492470430146432e-06, "loss": 1.005089521408081, "step": 15322 }, { "epoch": 2.7892964412487484, "grad_norm": 6.65625, "learning_rate": 1.049078214800054e-06, "loss": 1.2499432563781738, "step": 15324 }, { "epoch": 2.7896605078729406, "grad_norm": 8.0, "learning_rate": 1.0489096728753308e-06, "loss": 1.538610816001892, "step": 15326 }, { "epoch": 2.790024574497133, "grad_norm": 9.4375, "learning_rate": 1.048741417265206e-06, "loss": 0.9799821376800537, "step": 15328 }, { "epoch": 2.790388641121325, "grad_norm": 15.25, "learning_rate": 1.0485734479943702e-06, "loss": 1.5890527963638306, "step": 15330 }, { "epoch": 2.7907527077455176, "grad_norm": 15.0625, "learning_rate": 1.0484057650874727e-06, "loss": 1.3033627271652222, "step": 15332 }, { "epoch": 2.7911167743697094, "grad_norm": 17.125, "learning_rate": 1.04823836856912e-06, "loss": 1.0314157009124756, "step": 15334 }, { "epoch": 2.791480840993902, "grad_norm": 12.9375, "learning_rate": 1.0480712584638769e-06, "loss": 1.7576978206634521, "step": 15336 }, { "epoch": 2.7918449076180942, "grad_norm": 3.828125, "learning_rate": 1.0479044347962662e-06, "loss": 0.9735898971557617, "step": 15338 }, { "epoch": 2.7922089742422864, "grad_norm": 7.8125, "learning_rate": 1.047737897590768e-06, "loss": 1.4962714910507202, "step": 15340 }, { "epoch": 2.7925730408664786, "grad_norm": 42.25, "learning_rate": 1.0475716468718224e-06, "loss": 1.4702708721160889, "step": 15342 }, { "epoch": 2.792937107490671, "grad_norm": 14.3125, "learning_rate": 1.0474056826638248e-06, "loss": 1.3507142066955566, "step": 15344 }, { "epoch": 2.793301174114863, "grad_norm": 9.3125, "learning_rate": 1.0472400049911302e-06, "loss": 1.5258738994598389, "step": 15346 }, { "epoch": 2.793665240739055, "grad_norm": 9.9375, "learning_rate": 1.0470746138780507e-06, "loss": 1.4927797317504883, "step": 15348 }, { "epoch": 2.7940293073632474, "grad_norm": 10.25, "learning_rate": 1.0469095093488568e-06, "loss": 1.3006072044372559, "step": 15350 }, { "epoch": 2.7943933739874396, "grad_norm": 9.8125, "learning_rate": 1.046744691427778e-06, "loss": 1.3229343891143799, "step": 15352 }, { "epoch": 2.794757440611632, "grad_norm": 30.875, "learning_rate": 1.0465801601389997e-06, "loss": 1.4262301921844482, "step": 15354 }, { "epoch": 2.795121507235824, "grad_norm": 6.40625, "learning_rate": 1.0464159155066662e-06, "loss": 1.4256393909454346, "step": 15356 }, { "epoch": 2.7954855738600166, "grad_norm": 7.65625, "learning_rate": 1.0462519575548798e-06, "loss": 1.3325252532958984, "step": 15358 }, { "epoch": 2.7958496404842084, "grad_norm": 7.625, "learning_rate": 1.0460882863077007e-06, "loss": 1.3819911479949951, "step": 15360 }, { "epoch": 2.796213707108401, "grad_norm": 13.625, "learning_rate": 1.0459249017891474e-06, "loss": 1.2774745225906372, "step": 15362 }, { "epoch": 2.796577773732593, "grad_norm": 10.375, "learning_rate": 1.0457618040231953e-06, "loss": 1.5341606140136719, "step": 15364 }, { "epoch": 2.7969418403567854, "grad_norm": 12.5625, "learning_rate": 1.045598993033779e-06, "loss": 1.3138628005981445, "step": 15366 }, { "epoch": 2.7973059069809776, "grad_norm": 7.6875, "learning_rate": 1.04543646884479e-06, "loss": 1.5500141382217407, "step": 15368 }, { "epoch": 2.79766997360517, "grad_norm": 34.75, "learning_rate": 1.0452742314800775e-06, "loss": 1.4380184412002563, "step": 15370 }, { "epoch": 2.798034040229362, "grad_norm": 7.84375, "learning_rate": 1.0451122809634502e-06, "loss": 1.3354018926620483, "step": 15372 }, { "epoch": 2.798398106853554, "grad_norm": 10.375, "learning_rate": 1.044950617318673e-06, "loss": 1.4029438495635986, "step": 15374 }, { "epoch": 2.7987621734777464, "grad_norm": 21.0, "learning_rate": 1.0447892405694696e-06, "loss": 1.5707744359970093, "step": 15376 }, { "epoch": 2.7991262401019386, "grad_norm": 23.0, "learning_rate": 1.0446281507395213e-06, "loss": 1.2131977081298828, "step": 15378 }, { "epoch": 2.7994903067261308, "grad_norm": 15.0625, "learning_rate": 1.0444673478524675e-06, "loss": 0.7167494297027588, "step": 15380 }, { "epoch": 2.799854373350323, "grad_norm": 16.0, "learning_rate": 1.0443068319319054e-06, "loss": 1.2614479064941406, "step": 15382 }, { "epoch": 2.8002184399745156, "grad_norm": 8.5, "learning_rate": 1.04414660300139e-06, "loss": 1.4775824546813965, "step": 15384 }, { "epoch": 2.8005825065987073, "grad_norm": 6.09375, "learning_rate": 1.0439866610844342e-06, "loss": 1.256793737411499, "step": 15386 }, { "epoch": 2.8009465732229, "grad_norm": 17.25, "learning_rate": 1.043827006204509e-06, "loss": 1.2728979587554932, "step": 15388 }, { "epoch": 2.801310639847092, "grad_norm": 12.1875, "learning_rate": 1.0436676383850425e-06, "loss": 1.5430599451065063, "step": 15390 }, { "epoch": 2.8016747064712844, "grad_norm": 13.1875, "learning_rate": 1.0435085576494221e-06, "loss": 1.3391112089157104, "step": 15392 }, { "epoch": 2.8020387730954766, "grad_norm": 99.5, "learning_rate": 1.0433497640209921e-06, "loss": 1.2824134826660156, "step": 15394 }, { "epoch": 2.8024028397196687, "grad_norm": 11.5, "learning_rate": 1.043191257523054e-06, "loss": 0.9084633588790894, "step": 15396 }, { "epoch": 2.802766906343861, "grad_norm": 11.3125, "learning_rate": 1.0430330381788692e-06, "loss": 1.7296411991119385, "step": 15398 }, { "epoch": 2.803130972968053, "grad_norm": 9.25, "learning_rate": 1.0428751060116547e-06, "loss": 1.0605690479278564, "step": 15400 }, { "epoch": 2.8034950395922453, "grad_norm": 9.375, "learning_rate": 1.0427174610445873e-06, "loss": 0.7418138980865479, "step": 15402 }, { "epoch": 2.8038591062164375, "grad_norm": 5.5, "learning_rate": 1.0425601033008e-06, "loss": 1.2067638635635376, "step": 15404 }, { "epoch": 2.8042231728406297, "grad_norm": 28.125, "learning_rate": 1.042403032803385e-06, "loss": 1.207100510597229, "step": 15406 }, { "epoch": 2.804587239464822, "grad_norm": 3.296875, "learning_rate": 1.042246249575391e-06, "loss": 1.1736596822738647, "step": 15408 }, { "epoch": 2.8049513060890146, "grad_norm": 11.125, "learning_rate": 1.0420897536398262e-06, "loss": 1.1369788646697998, "step": 15410 }, { "epoch": 2.8053153727132063, "grad_norm": 5.9375, "learning_rate": 1.0419335450196554e-06, "loss": 1.498537540435791, "step": 15412 }, { "epoch": 2.805679439337399, "grad_norm": 48.75, "learning_rate": 1.0417776237378014e-06, "loss": 1.3380601406097412, "step": 15414 }, { "epoch": 2.806043505961591, "grad_norm": 23.875, "learning_rate": 1.0416219898171451e-06, "loss": 1.2410119771957397, "step": 15416 }, { "epoch": 2.8064075725857833, "grad_norm": 20.625, "learning_rate": 1.0414666432805252e-06, "loss": 1.8276417255401611, "step": 15418 }, { "epoch": 2.8067716392099755, "grad_norm": 22.75, "learning_rate": 1.041311584150738e-06, "loss": 1.3166488409042358, "step": 15420 }, { "epoch": 2.8071357058341677, "grad_norm": 12.0625, "learning_rate": 1.0411568124505384e-06, "loss": 1.2247083187103271, "step": 15422 }, { "epoch": 2.80749977245836, "grad_norm": 7.28125, "learning_rate": 1.0410023282026376e-06, "loss": 1.3944164514541626, "step": 15424 }, { "epoch": 2.807863839082552, "grad_norm": 5.875, "learning_rate": 1.0408481314297062e-06, "loss": 1.3214515447616577, "step": 15426 }, { "epoch": 2.8082279057067443, "grad_norm": 3.3125, "learning_rate": 1.0406942221543718e-06, "loss": 1.055877685546875, "step": 15428 }, { "epoch": 2.8085919723309365, "grad_norm": 39.5, "learning_rate": 1.0405406003992197e-06, "loss": 1.6331040859222412, "step": 15430 }, { "epoch": 2.8089560389551287, "grad_norm": 21.0, "learning_rate": 1.0403872661867938e-06, "loss": 1.712215542793274, "step": 15432 }, { "epoch": 2.809320105579321, "grad_norm": 11.3125, "learning_rate": 1.0402342195395949e-06, "loss": 1.2522261142730713, "step": 15434 }, { "epoch": 2.8096841722035135, "grad_norm": 5.15625, "learning_rate": 1.040081460480082e-06, "loss": 1.5624516010284424, "step": 15436 }, { "epoch": 2.8100482388277053, "grad_norm": 5.90625, "learning_rate": 1.039928989030672e-06, "loss": 0.9424973130226135, "step": 15438 }, { "epoch": 2.810412305451898, "grad_norm": 12.5, "learning_rate": 1.039776805213739e-06, "loss": 1.0278855562210083, "step": 15440 }, { "epoch": 2.8107763720760897, "grad_norm": 13.875, "learning_rate": 1.0396249090516163e-06, "loss": 0.761257529258728, "step": 15442 }, { "epoch": 2.8111404387002823, "grad_norm": 5.78125, "learning_rate": 1.0394733005665931e-06, "loss": 0.9988175630569458, "step": 15444 }, { "epoch": 2.8115045053244745, "grad_norm": 12.1875, "learning_rate": 1.039321979780918e-06, "loss": 1.3274415731430054, "step": 15446 }, { "epoch": 2.8118685719486667, "grad_norm": 20.875, "learning_rate": 1.0391709467167961e-06, "loss": 2.0050301551818848, "step": 15448 }, { "epoch": 2.812232638572859, "grad_norm": 12.625, "learning_rate": 1.0390202013963913e-06, "loss": 1.9951343536376953, "step": 15450 }, { "epoch": 2.812596705197051, "grad_norm": 84.0, "learning_rate": 1.0388697438418251e-06, "loss": 2.048008680343628, "step": 15452 }, { "epoch": 2.8129607718212433, "grad_norm": 8.75, "learning_rate": 1.038719574075176e-06, "loss": 1.5148104429244995, "step": 15454 }, { "epoch": 2.8133248384454355, "grad_norm": 6.53125, "learning_rate": 1.0385696921184813e-06, "loss": 1.4777487516403198, "step": 15456 }, { "epoch": 2.8136889050696277, "grad_norm": 12.75, "learning_rate": 1.0384200979937349e-06, "loss": 1.4203996658325195, "step": 15458 }, { "epoch": 2.81405297169382, "grad_norm": 9.125, "learning_rate": 1.0382707917228894e-06, "loss": 1.5079729557037354, "step": 15460 }, { "epoch": 2.814417038318012, "grad_norm": 7.96875, "learning_rate": 1.0381217733278555e-06, "loss": 1.5309672355651855, "step": 15462 }, { "epoch": 2.8147811049422042, "grad_norm": 12.5, "learning_rate": 1.0379730428305004e-06, "loss": 1.1129004955291748, "step": 15464 }, { "epoch": 2.815145171566397, "grad_norm": 14.75, "learning_rate": 1.03782460025265e-06, "loss": 1.3766101598739624, "step": 15466 }, { "epoch": 2.8155092381905886, "grad_norm": 28.0, "learning_rate": 1.0376764456160873e-06, "loss": 1.8709042072296143, "step": 15468 }, { "epoch": 2.8158733048147813, "grad_norm": 10.625, "learning_rate": 1.0375285789425534e-06, "loss": 1.3518832921981812, "step": 15470 }, { "epoch": 2.8162373714389735, "grad_norm": 8.5, "learning_rate": 1.037381000253748e-06, "loss": 1.6706080436706543, "step": 15472 }, { "epoch": 2.8166014380631657, "grad_norm": 9.125, "learning_rate": 1.0372337095713265e-06, "loss": 1.367620825767517, "step": 15474 }, { "epoch": 2.816965504687358, "grad_norm": 14.3125, "learning_rate": 1.037086706916904e-06, "loss": 1.1302152872085571, "step": 15476 }, { "epoch": 2.81732957131155, "grad_norm": 18.625, "learning_rate": 1.0369399923120518e-06, "loss": 0.6499367952346802, "step": 15478 }, { "epoch": 2.8176936379357422, "grad_norm": 75.0, "learning_rate": 1.0367935657783005e-06, "loss": 1.5066099166870117, "step": 15480 }, { "epoch": 2.8180577045599344, "grad_norm": 22.25, "learning_rate": 1.0366474273371373e-06, "loss": 1.5841988325119019, "step": 15482 }, { "epoch": 2.8184217711841266, "grad_norm": 27.5, "learning_rate": 1.0365015770100071e-06, "loss": 1.6245061159133911, "step": 15484 }, { "epoch": 2.818785837808319, "grad_norm": 14.0625, "learning_rate": 1.0363560148183135e-06, "loss": 1.731898307800293, "step": 15486 }, { "epoch": 2.819149904432511, "grad_norm": 13.375, "learning_rate": 1.0362107407834165e-06, "loss": 1.5783600807189941, "step": 15488 }, { "epoch": 2.819513971056703, "grad_norm": 6.59375, "learning_rate": 1.0360657549266346e-06, "loss": 1.3789788484573364, "step": 15490 }, { "epoch": 2.819878037680896, "grad_norm": 5.90625, "learning_rate": 1.0359210572692442e-06, "loss": 1.0591803789138794, "step": 15492 }, { "epoch": 2.8202421043050876, "grad_norm": 18.75, "learning_rate": 1.0357766478324792e-06, "loss": 1.4972491264343262, "step": 15494 }, { "epoch": 2.8206061709292802, "grad_norm": 18.5, "learning_rate": 1.0356325266375305e-06, "loss": 1.6652246713638306, "step": 15496 }, { "epoch": 2.8209702375534724, "grad_norm": 12.6875, "learning_rate": 1.0354886937055478e-06, "loss": 0.5180586576461792, "step": 15498 }, { "epoch": 2.8213343041776646, "grad_norm": 15.25, "learning_rate": 1.0353451490576375e-06, "loss": 1.503953218460083, "step": 15500 }, { "epoch": 2.821698370801857, "grad_norm": 21.25, "learning_rate": 1.035201892714865e-06, "loss": 1.2995222806930542, "step": 15502 }, { "epoch": 2.822062437426049, "grad_norm": 13.625, "learning_rate": 1.035058924698252e-06, "loss": 1.3643341064453125, "step": 15504 }, { "epoch": 2.822426504050241, "grad_norm": 16.5, "learning_rate": 1.0349162450287781e-06, "loss": 1.4070734977722168, "step": 15506 }, { "epoch": 2.8227905706744334, "grad_norm": 26.375, "learning_rate": 1.034773853727382e-06, "loss": 1.3273887634277344, "step": 15508 }, { "epoch": 2.8231546372986256, "grad_norm": 15.1875, "learning_rate": 1.0346317508149581e-06, "loss": 0.47858723998069763, "step": 15510 }, { "epoch": 2.823518703922818, "grad_norm": 27.125, "learning_rate": 1.0344899363123603e-06, "loss": 1.2646902799606323, "step": 15512 }, { "epoch": 2.82388277054701, "grad_norm": 7.75, "learning_rate": 1.0343484102403984e-06, "loss": 1.2462102174758911, "step": 15514 }, { "epoch": 2.824246837171202, "grad_norm": 11.1875, "learning_rate": 1.0342071726198415e-06, "loss": 1.6582112312316895, "step": 15516 }, { "epoch": 2.824610903795395, "grad_norm": 8.0625, "learning_rate": 1.0340662234714155e-06, "loss": 1.49208402633667, "step": 15518 }, { "epoch": 2.8249749704195866, "grad_norm": 38.5, "learning_rate": 1.0339255628158034e-06, "loss": 1.167761206626892, "step": 15520 }, { "epoch": 2.825339037043779, "grad_norm": 8.3125, "learning_rate": 1.0337851906736476e-06, "loss": 1.338670253753662, "step": 15522 }, { "epoch": 2.8257031036679714, "grad_norm": 9.5625, "learning_rate": 1.0336451070655466e-06, "loss": 1.16083824634552, "step": 15524 }, { "epoch": 2.8260671702921636, "grad_norm": 10.5, "learning_rate": 1.033505312012057e-06, "loss": 1.3998286724090576, "step": 15526 }, { "epoch": 2.826431236916356, "grad_norm": 25.125, "learning_rate": 1.0333658055336937e-06, "loss": 1.4924664497375488, "step": 15528 }, { "epoch": 2.826795303540548, "grad_norm": 9.9375, "learning_rate": 1.033226587650928e-06, "loss": 1.5790613889694214, "step": 15530 }, { "epoch": 2.82715937016474, "grad_norm": 7.03125, "learning_rate": 1.0330876583841902e-06, "loss": 1.2886697053909302, "step": 15532 }, { "epoch": 2.8275234367889324, "grad_norm": 10.75, "learning_rate": 1.0329490177538673e-06, "loss": 1.3453556299209595, "step": 15534 }, { "epoch": 2.8278875034131246, "grad_norm": 10.8125, "learning_rate": 1.0328106657803045e-06, "loss": 1.1671477556228638, "step": 15536 }, { "epoch": 2.8282515700373168, "grad_norm": 8.1875, "learning_rate": 1.0326726024838036e-06, "loss": 0.9520739316940308, "step": 15538 }, { "epoch": 2.828615636661509, "grad_norm": 26.25, "learning_rate": 1.0325348278846258e-06, "loss": 1.2173244953155518, "step": 15540 }, { "epoch": 2.828979703285701, "grad_norm": 13.0625, "learning_rate": 1.0323973420029885e-06, "loss": 1.791285514831543, "step": 15542 }, { "epoch": 2.829343769909894, "grad_norm": 21.875, "learning_rate": 1.0322601448590673e-06, "loss": 1.4645047187805176, "step": 15544 }, { "epoch": 2.8297078365340855, "grad_norm": 15.3125, "learning_rate": 1.0321232364729953e-06, "loss": 1.399730920791626, "step": 15546 }, { "epoch": 2.830071903158278, "grad_norm": 9.875, "learning_rate": 1.0319866168648632e-06, "loss": 1.454708218574524, "step": 15548 }, { "epoch": 2.83043596978247, "grad_norm": 11.875, "learning_rate": 1.0318502860547193e-06, "loss": 1.4562010765075684, "step": 15550 }, { "epoch": 2.8308000364066626, "grad_norm": 4.125, "learning_rate": 1.03171424406257e-06, "loss": 0.979663610458374, "step": 15552 }, { "epoch": 2.8311641030308548, "grad_norm": 24.0, "learning_rate": 1.0315784909083781e-06, "loss": 1.3442797660827637, "step": 15554 }, { "epoch": 2.831528169655047, "grad_norm": 14.25, "learning_rate": 1.031443026612066e-06, "loss": 1.7458198070526123, "step": 15556 }, { "epoch": 2.831892236279239, "grad_norm": 8.3125, "learning_rate": 1.0313078511935114e-06, "loss": 1.3635425567626953, "step": 15558 }, { "epoch": 2.8322563029034313, "grad_norm": 8.9375, "learning_rate": 1.0311729646725513e-06, "loss": 1.3710309267044067, "step": 15560 }, { "epoch": 2.8326203695276235, "grad_norm": 14.0625, "learning_rate": 1.03103836706898e-06, "loss": 1.4832854270935059, "step": 15562 }, { "epoch": 2.8329844361518157, "grad_norm": 3.5, "learning_rate": 1.0309040584025482e-06, "loss": 1.207900881767273, "step": 15564 }, { "epoch": 2.833348502776008, "grad_norm": 26.5, "learning_rate": 1.0307700386929664e-06, "loss": 1.359100103378296, "step": 15566 }, { "epoch": 2.8337125694002, "grad_norm": 12.375, "learning_rate": 1.0306363079599007e-06, "loss": 1.9217147827148438, "step": 15568 }, { "epoch": 2.8340766360243923, "grad_norm": 26.375, "learning_rate": 1.0305028662229752e-06, "loss": 1.4839872121810913, "step": 15570 }, { "epoch": 2.8344407026485845, "grad_norm": 16.625, "learning_rate": 1.0303697135017733e-06, "loss": 1.5705583095550537, "step": 15572 }, { "epoch": 2.834804769272777, "grad_norm": 10.9375, "learning_rate": 1.030236849815833e-06, "loss": 1.2779827117919922, "step": 15574 }, { "epoch": 2.835168835896969, "grad_norm": 16.625, "learning_rate": 1.0301042751846524e-06, "loss": 1.885358214378357, "step": 15576 }, { "epoch": 2.8355329025211615, "grad_norm": 11.125, "learning_rate": 1.0299719896276864e-06, "loss": 1.866011619567871, "step": 15578 }, { "epoch": 2.8358969691453537, "grad_norm": 5.1875, "learning_rate": 1.0298399931643466e-06, "loss": 1.074000358581543, "step": 15580 }, { "epoch": 2.836261035769546, "grad_norm": 3.203125, "learning_rate": 1.029708285814004e-06, "loss": 1.0708781480789185, "step": 15582 }, { "epoch": 2.836625102393738, "grad_norm": 5.90625, "learning_rate": 1.029576867595985e-06, "loss": 0.9992654323577881, "step": 15584 }, { "epoch": 2.8369891690179303, "grad_norm": 9.125, "learning_rate": 1.0294457385295755e-06, "loss": 1.426224946975708, "step": 15586 }, { "epoch": 2.8373532356421225, "grad_norm": 12.0, "learning_rate": 1.029314898634018e-06, "loss": 1.972790002822876, "step": 15588 }, { "epoch": 2.8377173022663147, "grad_norm": 13.9375, "learning_rate": 1.0291843479285123e-06, "loss": 1.9864373207092285, "step": 15590 }, { "epoch": 2.838081368890507, "grad_norm": 7.125, "learning_rate": 1.0290540864322173e-06, "loss": 1.5132185220718384, "step": 15592 }, { "epoch": 2.838445435514699, "grad_norm": 10.6875, "learning_rate": 1.028924114164247e-06, "loss": 1.3053078651428223, "step": 15594 }, { "epoch": 2.8388095021388913, "grad_norm": 29.25, "learning_rate": 1.0287944311436748e-06, "loss": 1.3878569602966309, "step": 15596 }, { "epoch": 2.8391735687630835, "grad_norm": 22.875, "learning_rate": 1.0286650373895315e-06, "loss": 1.5359525680541992, "step": 15598 }, { "epoch": 2.839537635387276, "grad_norm": 15.375, "learning_rate": 1.0285359329208045e-06, "loss": 1.7001599073410034, "step": 15600 }, { "epoch": 2.839901702011468, "grad_norm": 31.5, "learning_rate": 1.02840711775644e-06, "loss": 1.0770227909088135, "step": 15602 }, { "epoch": 2.8402657686356605, "grad_norm": 3.375, "learning_rate": 1.0282785919153408e-06, "loss": 1.1072921752929688, "step": 15604 }, { "epoch": 2.8406298352598527, "grad_norm": 35.25, "learning_rate": 1.0281503554163675e-06, "loss": 1.1652566194534302, "step": 15606 }, { "epoch": 2.840993901884045, "grad_norm": 21.5, "learning_rate": 1.0280224082783383e-06, "loss": 1.7659976482391357, "step": 15608 }, { "epoch": 2.841357968508237, "grad_norm": 12.0625, "learning_rate": 1.0278947505200288e-06, "loss": 1.5059798955917358, "step": 15610 }, { "epoch": 2.8417220351324293, "grad_norm": 12.875, "learning_rate": 1.0277673821601728e-06, "loss": 1.3959065675735474, "step": 15612 }, { "epoch": 2.8420861017566215, "grad_norm": 9.6875, "learning_rate": 1.0276403032174604e-06, "loss": 1.7684130668640137, "step": 15614 }, { "epoch": 2.8424501683808137, "grad_norm": 12.5, "learning_rate": 1.0275135137105403e-06, "loss": 1.6079057455062866, "step": 15616 }, { "epoch": 2.842814235005006, "grad_norm": 12.25, "learning_rate": 1.0273870136580185e-06, "loss": 1.6850941181182861, "step": 15618 }, { "epoch": 2.843178301629198, "grad_norm": 31.375, "learning_rate": 1.0272608030784576e-06, "loss": 2.0558536052703857, "step": 15620 }, { "epoch": 2.8435423682533902, "grad_norm": 14.125, "learning_rate": 1.0271348819903798e-06, "loss": 1.3582508563995361, "step": 15622 }, { "epoch": 2.8439064348775824, "grad_norm": 15.375, "learning_rate": 1.027009250412262e-06, "loss": 0.9238548278808594, "step": 15624 }, { "epoch": 2.844270501501775, "grad_norm": 25.875, "learning_rate": 1.0268839083625413e-06, "loss": 1.4568922519683838, "step": 15626 }, { "epoch": 2.844634568125967, "grad_norm": 10.4375, "learning_rate": 1.0267588558596107e-06, "loss": 1.2882832288742065, "step": 15628 }, { "epoch": 2.8449986347501595, "grad_norm": 10.1875, "learning_rate": 1.026634092921821e-06, "loss": 1.344896912574768, "step": 15630 }, { "epoch": 2.8453627013743517, "grad_norm": 9.875, "learning_rate": 1.0265096195674808e-06, "loss": 1.427192211151123, "step": 15632 }, { "epoch": 2.845726767998544, "grad_norm": 12.0625, "learning_rate": 1.026385435814856e-06, "loss": 1.5665194988250732, "step": 15634 }, { "epoch": 2.846090834622736, "grad_norm": 22.375, "learning_rate": 1.0262615416821704e-06, "loss": 1.610897421836853, "step": 15636 }, { "epoch": 2.8464549012469282, "grad_norm": 30.125, "learning_rate": 1.0261379371876045e-06, "loss": 0.5919798612594604, "step": 15638 }, { "epoch": 2.8468189678711204, "grad_norm": 49.75, "learning_rate": 1.0260146223492972e-06, "loss": 1.4704358577728271, "step": 15640 }, { "epoch": 2.8471830344953126, "grad_norm": 11.25, "learning_rate": 1.025891597185344e-06, "loss": 1.2458178997039795, "step": 15642 }, { "epoch": 2.847547101119505, "grad_norm": 11.0625, "learning_rate": 1.0257688617137985e-06, "loss": 1.385305404663086, "step": 15644 }, { "epoch": 2.847911167743697, "grad_norm": 15.9375, "learning_rate": 1.0256464159526718e-06, "loss": 1.2866665124893188, "step": 15646 }, { "epoch": 2.848275234367889, "grad_norm": 29.625, "learning_rate": 1.0255242599199322e-06, "loss": 1.521550178527832, "step": 15648 }, { "epoch": 2.8486393009920814, "grad_norm": 14.125, "learning_rate": 1.0254023936335055e-06, "loss": 1.831729769706726, "step": 15650 }, { "epoch": 2.849003367616274, "grad_norm": 4.59375, "learning_rate": 1.0252808171112755e-06, "loss": 0.8389082551002502, "step": 15652 }, { "epoch": 2.849367434240466, "grad_norm": 6.75, "learning_rate": 1.0251595303710823e-06, "loss": 0.9912675023078918, "step": 15654 }, { "epoch": 2.8497315008646584, "grad_norm": 15.0, "learning_rate": 1.025038533430725e-06, "loss": 1.6578004360198975, "step": 15656 }, { "epoch": 2.8500955674888506, "grad_norm": 36.0, "learning_rate": 1.024917826307959e-06, "loss": 1.6915128231048584, "step": 15658 }, { "epoch": 2.850459634113043, "grad_norm": 12.375, "learning_rate": 1.0247974090204977e-06, "loss": 1.5055975914001465, "step": 15660 }, { "epoch": 2.850823700737235, "grad_norm": 11.8125, "learning_rate": 1.0246772815860117e-06, "loss": 1.5027272701263428, "step": 15662 }, { "epoch": 2.851187767361427, "grad_norm": 28.25, "learning_rate": 1.0245574440221295e-06, "loss": 1.7784786224365234, "step": 15664 }, { "epoch": 2.8515518339856194, "grad_norm": 26.0, "learning_rate": 1.0244378963464366e-06, "loss": 1.3188923597335815, "step": 15666 }, { "epoch": 2.8519159006098116, "grad_norm": 14.5625, "learning_rate": 1.0243186385764762e-06, "loss": 1.3407018184661865, "step": 15668 }, { "epoch": 2.852279967234004, "grad_norm": 10.375, "learning_rate": 1.0241996707297485e-06, "loss": 1.4565585851669312, "step": 15670 }, { "epoch": 2.852644033858196, "grad_norm": 8.125, "learning_rate": 1.0240809928237126e-06, "loss": 1.3527569770812988, "step": 15672 }, { "epoch": 2.853008100482388, "grad_norm": 2.671875, "learning_rate": 1.0239626048757829e-06, "loss": 1.1230276823043823, "step": 15674 }, { "epoch": 2.8533721671065804, "grad_norm": 11.0625, "learning_rate": 1.0238445069033328e-06, "loss": 1.0557066202163696, "step": 15676 }, { "epoch": 2.853736233730773, "grad_norm": 35.25, "learning_rate": 1.0237266989236925e-06, "loss": 1.4573719501495361, "step": 15678 }, { "epoch": 2.8541003003549648, "grad_norm": 25.0, "learning_rate": 1.02360918095415e-06, "loss": 1.7462213039398193, "step": 15680 }, { "epoch": 2.8544643669791574, "grad_norm": 22.875, "learning_rate": 1.0234919530119507e-06, "loss": 2.002379894256592, "step": 15682 }, { "epoch": 2.854828433603349, "grad_norm": 17.75, "learning_rate": 1.0233750151142973e-06, "loss": 1.7310270071029663, "step": 15684 }, { "epoch": 2.855192500227542, "grad_norm": 18.875, "learning_rate": 1.0232583672783497e-06, "loss": 1.4237812757492065, "step": 15686 }, { "epoch": 2.855556566851734, "grad_norm": 17.875, "learning_rate": 1.0231420095212258e-06, "loss": 1.3627734184265137, "step": 15688 }, { "epoch": 2.855920633475926, "grad_norm": 14.9375, "learning_rate": 1.0230259418600003e-06, "loss": 1.5233899354934692, "step": 15690 }, { "epoch": 2.8562847001001184, "grad_norm": 10.3125, "learning_rate": 1.0229101643117062e-06, "loss": 1.369388222694397, "step": 15692 }, { "epoch": 2.8566487667243106, "grad_norm": 13.5625, "learning_rate": 1.0227946768933325e-06, "loss": 1.090606927871704, "step": 15694 }, { "epoch": 2.8570128333485028, "grad_norm": 7.40625, "learning_rate": 1.0226794796218276e-06, "loss": 1.486121654510498, "step": 15696 }, { "epoch": 2.857376899972695, "grad_norm": 50.75, "learning_rate": 1.0225645725140954e-06, "loss": 1.3467481136322021, "step": 15698 }, { "epoch": 2.857740966596887, "grad_norm": 46.25, "learning_rate": 1.0224499555869985e-06, "loss": 0.8996018171310425, "step": 15700 }, { "epoch": 2.8581050332210793, "grad_norm": 23.125, "learning_rate": 1.0223356288573564e-06, "loss": 0.4530978798866272, "step": 15702 }, { "epoch": 2.8584690998452715, "grad_norm": 9.5, "learning_rate": 1.0222215923419454e-06, "loss": 1.717079520225525, "step": 15704 }, { "epoch": 2.8588331664694637, "grad_norm": 11.5625, "learning_rate": 1.0221078460575013e-06, "loss": 1.354611873626709, "step": 15706 }, { "epoch": 2.8591972330936564, "grad_norm": 14.1875, "learning_rate": 1.0219943900207147e-06, "loss": 1.965169906616211, "step": 15708 }, { "epoch": 2.859561299717848, "grad_norm": 18.0, "learning_rate": 1.021881224248235e-06, "loss": 1.909058690071106, "step": 15710 }, { "epoch": 2.8599253663420408, "grad_norm": 10.4375, "learning_rate": 1.0217683487566692e-06, "loss": 1.3860423564910889, "step": 15712 }, { "epoch": 2.860289432966233, "grad_norm": 20.375, "learning_rate": 1.0216557635625813e-06, "loss": 1.5550782680511475, "step": 15714 }, { "epoch": 2.860653499590425, "grad_norm": 9.0, "learning_rate": 1.0215434686824924e-06, "loss": 1.426262378692627, "step": 15716 }, { "epoch": 2.8610175662146173, "grad_norm": 8.9375, "learning_rate": 1.0214314641328815e-06, "loss": 1.237898588180542, "step": 15718 }, { "epoch": 2.8613816328388095, "grad_norm": 9.9375, "learning_rate": 1.0213197499301847e-06, "loss": 1.406991958618164, "step": 15720 }, { "epoch": 2.8617456994630017, "grad_norm": 11.9375, "learning_rate": 1.0212083260907962e-06, "loss": 1.5383907556533813, "step": 15722 }, { "epoch": 2.862109766087194, "grad_norm": 10.3125, "learning_rate": 1.021097192631066e-06, "loss": 1.4900743961334229, "step": 15724 }, { "epoch": 2.862473832711386, "grad_norm": 7.78125, "learning_rate": 1.0209863495673033e-06, "loss": 1.2796670198440552, "step": 15726 }, { "epoch": 2.8628378993355783, "grad_norm": 15.9375, "learning_rate": 1.0208757969157734e-06, "loss": 1.0955740213394165, "step": 15728 }, { "epoch": 2.8632019659597705, "grad_norm": 19.0, "learning_rate": 1.0207655346926995e-06, "loss": 0.6655865907669067, "step": 15730 }, { "epoch": 2.8635660325839627, "grad_norm": 7.03125, "learning_rate": 1.0206555629142624e-06, "loss": 1.6001516580581665, "step": 15732 }, { "epoch": 2.8639300992081553, "grad_norm": 15.375, "learning_rate": 1.0205458815965997e-06, "loss": 1.538287878036499, "step": 15734 }, { "epoch": 2.864294165832347, "grad_norm": 10.5, "learning_rate": 1.020436490755807e-06, "loss": 1.4265754222869873, "step": 15736 }, { "epoch": 2.8646582324565397, "grad_norm": 16.5, "learning_rate": 1.020327390407937e-06, "loss": 1.2899627685546875, "step": 15738 }, { "epoch": 2.865022299080732, "grad_norm": 17.75, "learning_rate": 1.020218580568999e-06, "loss": 1.650212049484253, "step": 15740 }, { "epoch": 2.865386365704924, "grad_norm": 9.1875, "learning_rate": 1.0201100612549615e-06, "loss": 1.469498872756958, "step": 15742 }, { "epoch": 2.8657504323291163, "grad_norm": 15.875, "learning_rate": 1.0200018324817484e-06, "loss": 1.4468848705291748, "step": 15744 }, { "epoch": 2.8661144989533085, "grad_norm": 7.125, "learning_rate": 1.0198938942652425e-06, "loss": 1.1684160232543945, "step": 15746 }, { "epoch": 2.8664785655775007, "grad_norm": 7.21875, "learning_rate": 1.0197862466212826e-06, "loss": 1.2453705072402954, "step": 15748 }, { "epoch": 2.866842632201693, "grad_norm": 10.125, "learning_rate": 1.0196788895656657e-06, "loss": 1.5134919881820679, "step": 15750 }, { "epoch": 2.867206698825885, "grad_norm": 10.625, "learning_rate": 1.0195718231141467e-06, "loss": 1.5316256284713745, "step": 15752 }, { "epoch": 2.8675707654500773, "grad_norm": 5.8125, "learning_rate": 1.0194650472824367e-06, "loss": 1.3344790935516357, "step": 15754 }, { "epoch": 2.8679348320742695, "grad_norm": 11.9375, "learning_rate": 1.0193585620862044e-06, "loss": 1.2970019578933716, "step": 15756 }, { "epoch": 2.8682988986984617, "grad_norm": 10.6875, "learning_rate": 1.0192523675410762e-06, "loss": 1.759852647781372, "step": 15758 }, { "epoch": 2.8686629653226543, "grad_norm": 8.0, "learning_rate": 1.0191464636626358e-06, "loss": 1.201621413230896, "step": 15760 }, { "epoch": 2.869027031946846, "grad_norm": 12.4375, "learning_rate": 1.0190408504664245e-06, "loss": 1.4449375867843628, "step": 15762 }, { "epoch": 2.8693910985710387, "grad_norm": 8.4375, "learning_rate": 1.0189355279679398e-06, "loss": 1.209423542022705, "step": 15764 }, { "epoch": 2.869755165195231, "grad_norm": 18.375, "learning_rate": 1.018830496182638e-06, "loss": 1.356123685836792, "step": 15766 }, { "epoch": 2.870119231819423, "grad_norm": 8.75, "learning_rate": 1.018725755125932e-06, "loss": 1.2158262729644775, "step": 15768 }, { "epoch": 2.8704832984436153, "grad_norm": 12.125, "learning_rate": 1.018621304813192e-06, "loss": 1.4276056289672852, "step": 15770 }, { "epoch": 2.8708473650678075, "grad_norm": 13.0625, "learning_rate": 1.018517145259746e-06, "loss": 1.4981703758239746, "step": 15772 }, { "epoch": 2.8712114316919997, "grad_norm": 17.5, "learning_rate": 1.018413276480878e-06, "loss": 1.4588618278503418, "step": 15774 }, { "epoch": 2.871575498316192, "grad_norm": 8.5625, "learning_rate": 1.0183096984918315e-06, "loss": 1.37358820438385, "step": 15776 }, { "epoch": 2.871939564940384, "grad_norm": 9.4375, "learning_rate": 1.0182064113078055e-06, "loss": 1.370955467224121, "step": 15778 }, { "epoch": 2.8723036315645762, "grad_norm": 7.96875, "learning_rate": 1.0181034149439572e-06, "loss": 1.507272481918335, "step": 15780 }, { "epoch": 2.8726676981887684, "grad_norm": 12.0625, "learning_rate": 1.0180007094154008e-06, "loss": 1.3962452411651611, "step": 15782 }, { "epoch": 2.8730317648129606, "grad_norm": 9.9375, "learning_rate": 1.017898294737208e-06, "loss": 2.1494193077087402, "step": 15784 }, { "epoch": 2.8733958314371533, "grad_norm": 16.625, "learning_rate": 1.0177961709244076e-06, "loss": 1.1891204118728638, "step": 15786 }, { "epoch": 2.873759898061345, "grad_norm": 16.25, "learning_rate": 1.017694337991986e-06, "loss": 1.8845057487487793, "step": 15788 }, { "epoch": 2.8741239646855377, "grad_norm": 10.125, "learning_rate": 1.0175927959548865e-06, "loss": 1.6219645738601685, "step": 15790 }, { "epoch": 2.8744880313097294, "grad_norm": 5.0625, "learning_rate": 1.0174915448280105e-06, "loss": 1.3163926601409912, "step": 15792 }, { "epoch": 2.874852097933922, "grad_norm": 6.625, "learning_rate": 1.0173905846262156e-06, "loss": 1.3613336086273193, "step": 15794 }, { "epoch": 2.8752161645581142, "grad_norm": 16.125, "learning_rate": 1.017289915364318e-06, "loss": 1.3420993089675903, "step": 15796 }, { "epoch": 2.8755802311823064, "grad_norm": 9.125, "learning_rate": 1.01718953705709e-06, "loss": 0.9244269132614136, "step": 15798 }, { "epoch": 2.8759442978064986, "grad_norm": 6.09375, "learning_rate": 1.0170894497192613e-06, "loss": 1.4073948860168457, "step": 15800 }, { "epoch": 2.876308364430691, "grad_norm": 14.875, "learning_rate": 1.0169896533655206e-06, "loss": 1.3569610118865967, "step": 15802 }, { "epoch": 2.876672431054883, "grad_norm": 21.25, "learning_rate": 1.0168901480105113e-06, "loss": 1.7138009071350098, "step": 15804 }, { "epoch": 2.877036497679075, "grad_norm": 18.625, "learning_rate": 1.016790933668836e-06, "loss": 1.5243194103240967, "step": 15806 }, { "epoch": 2.8774005643032674, "grad_norm": 20.75, "learning_rate": 1.016692010355054e-06, "loss": 1.9013592004776, "step": 15808 }, { "epoch": 2.8777646309274596, "grad_norm": 15.6875, "learning_rate": 1.0165933780836818e-06, "loss": 1.7598347663879395, "step": 15810 }, { "epoch": 2.878128697551652, "grad_norm": 7.6875, "learning_rate": 1.0164950368691936e-06, "loss": 1.249074101448059, "step": 15812 }, { "epoch": 2.878492764175844, "grad_norm": 63.75, "learning_rate": 1.0163969867260199e-06, "loss": 1.0511736869812012, "step": 15814 }, { "epoch": 2.8788568308000366, "grad_norm": 11.9375, "learning_rate": 1.0162992276685497e-06, "loss": 1.3306910991668701, "step": 15816 }, { "epoch": 2.8792208974242284, "grad_norm": 12.1875, "learning_rate": 1.0162017597111287e-06, "loss": 1.8575962781906128, "step": 15818 }, { "epoch": 2.879584964048421, "grad_norm": 8.0625, "learning_rate": 1.0161045828680597e-06, "loss": 0.8926464319229126, "step": 15820 }, { "epoch": 2.879949030672613, "grad_norm": 10.625, "learning_rate": 1.0160076971536032e-06, "loss": 1.4648663997650146, "step": 15822 }, { "epoch": 2.8803130972968054, "grad_norm": 4.40625, "learning_rate": 1.0159111025819768e-06, "loss": 0.931498646736145, "step": 15824 }, { "epoch": 2.8806771639209976, "grad_norm": 7.3125, "learning_rate": 1.0158147991673554e-06, "loss": 1.1095563173294067, "step": 15826 }, { "epoch": 2.88104123054519, "grad_norm": 7.3125, "learning_rate": 1.0157187869238707e-06, "loss": 1.2842364311218262, "step": 15828 }, { "epoch": 2.881405297169382, "grad_norm": 13.75, "learning_rate": 1.0156230658656124e-06, "loss": 1.4239689111709595, "step": 15830 }, { "epoch": 2.881769363793574, "grad_norm": 15.8125, "learning_rate": 1.0155276360066275e-06, "loss": 0.9221434593200684, "step": 15832 }, { "epoch": 2.8821334304177664, "grad_norm": 9.375, "learning_rate": 1.0154324973609196e-06, "loss": 1.387025237083435, "step": 15834 }, { "epoch": 2.8824974970419586, "grad_norm": 11.375, "learning_rate": 1.01533764994245e-06, "loss": 0.856332540512085, "step": 15836 }, { "epoch": 2.8828615636661508, "grad_norm": 5.78125, "learning_rate": 1.015243093765137e-06, "loss": 1.2872095108032227, "step": 15838 }, { "epoch": 2.883225630290343, "grad_norm": 16.25, "learning_rate": 1.0151488288428564e-06, "loss": 1.6521106958389282, "step": 15840 }, { "epoch": 2.8835896969145356, "grad_norm": 27.75, "learning_rate": 1.0150548551894415e-06, "loss": 0.745322585105896, "step": 15842 }, { "epoch": 2.8839537635387273, "grad_norm": 11.6875, "learning_rate": 1.014961172818682e-06, "loss": 1.750633716583252, "step": 15844 }, { "epoch": 2.88431783016292, "grad_norm": 7.1875, "learning_rate": 1.014867781744326e-06, "loss": 1.5747134685516357, "step": 15846 }, { "epoch": 2.884681896787112, "grad_norm": 14.75, "learning_rate": 1.014774681980078e-06, "loss": 1.9590178728103638, "step": 15848 }, { "epoch": 2.8850459634113044, "grad_norm": 8.8125, "learning_rate": 1.0146818735395998e-06, "loss": 1.7739530801773071, "step": 15850 }, { "epoch": 2.8854100300354966, "grad_norm": 4.0625, "learning_rate": 1.0145893564365112e-06, "loss": 1.267188310623169, "step": 15852 }, { "epoch": 2.8857740966596888, "grad_norm": 5.84375, "learning_rate": 1.014497130684388e-06, "loss": 0.7861636877059937, "step": 15854 }, { "epoch": 2.886138163283881, "grad_norm": 8.3125, "learning_rate": 1.0144051962967645e-06, "loss": 0.9685366153717041, "step": 15856 }, { "epoch": 2.886502229908073, "grad_norm": 111.5, "learning_rate": 1.0143135532871316e-06, "loss": 1.0216107368469238, "step": 15858 }, { "epoch": 2.8868662965322653, "grad_norm": 7.71875, "learning_rate": 1.0142222016689372e-06, "loss": 1.340867042541504, "step": 15860 }, { "epoch": 2.8872303631564575, "grad_norm": 15.25, "learning_rate": 1.0141311414555876e-06, "loss": 1.485758662223816, "step": 15862 }, { "epoch": 2.8875944297806497, "grad_norm": 16.875, "learning_rate": 1.0140403726604444e-06, "loss": 1.4802769422531128, "step": 15864 }, { "epoch": 2.887958496404842, "grad_norm": 41.25, "learning_rate": 1.0139498952968283e-06, "loss": 1.6250853538513184, "step": 15866 }, { "epoch": 2.8883225630290346, "grad_norm": 9.5, "learning_rate": 1.0138597093780166e-06, "loss": 1.2184441089630127, "step": 15868 }, { "epoch": 2.8886866296532263, "grad_norm": 9.6875, "learning_rate": 1.0137698149172428e-06, "loss": 1.2361960411071777, "step": 15870 }, { "epoch": 2.889050696277419, "grad_norm": 5.15625, "learning_rate": 1.0136802119277e-06, "loss": 1.1188724040985107, "step": 15872 }, { "epoch": 2.889414762901611, "grad_norm": 15.125, "learning_rate": 1.0135909004225356e-06, "loss": 1.520588994026184, "step": 15874 }, { "epoch": 2.8897788295258033, "grad_norm": 12.5, "learning_rate": 1.0135018804148566e-06, "loss": 1.1083598136901855, "step": 15876 }, { "epoch": 2.8901428961499955, "grad_norm": 11.4375, "learning_rate": 1.013413151917726e-06, "loss": 1.5388174057006836, "step": 15878 }, { "epoch": 2.8905069627741877, "grad_norm": 22.875, "learning_rate": 1.0133247149441643e-06, "loss": 1.6115424633026123, "step": 15880 }, { "epoch": 2.89087102939838, "grad_norm": 6.34375, "learning_rate": 1.0132365695071498e-06, "loss": 1.2949028015136719, "step": 15882 }, { "epoch": 2.891235096022572, "grad_norm": 5.78125, "learning_rate": 1.0131487156196168e-06, "loss": 0.806056559085846, "step": 15884 }, { "epoch": 2.8915991626467643, "grad_norm": 18.0, "learning_rate": 1.0130611532944578e-06, "loss": 0.6471291780471802, "step": 15886 }, { "epoch": 2.8919632292709565, "grad_norm": 3.6875, "learning_rate": 1.0129738825445221e-06, "loss": 0.920612096786499, "step": 15888 }, { "epoch": 2.8923272958951487, "grad_norm": 8.8125, "learning_rate": 1.0128869033826165e-06, "loss": 1.324739694595337, "step": 15890 }, { "epoch": 2.892691362519341, "grad_norm": 32.25, "learning_rate": 1.012800215821505e-06, "loss": 1.339063048362732, "step": 15892 }, { "epoch": 2.8930554291435335, "grad_norm": 23.75, "learning_rate": 1.012713819873908e-06, "loss": 1.1131782531738281, "step": 15894 }, { "epoch": 2.8934194957677253, "grad_norm": 20.25, "learning_rate": 1.0126277155525045e-06, "loss": 1.2030948400497437, "step": 15896 }, { "epoch": 2.893783562391918, "grad_norm": 12.0625, "learning_rate": 1.0125419028699293e-06, "loss": 1.7994149923324585, "step": 15898 }, { "epoch": 2.89414762901611, "grad_norm": 56.0, "learning_rate": 1.0124563818387755e-06, "loss": 1.214991807937622, "step": 15900 }, { "epoch": 2.8945116956403023, "grad_norm": 7.5, "learning_rate": 1.0123711524715932e-06, "loss": 1.540924072265625, "step": 15902 }, { "epoch": 2.8948757622644945, "grad_norm": 4.21875, "learning_rate": 1.0122862147808888e-06, "loss": 1.4240427017211914, "step": 15904 }, { "epoch": 2.8952398288886867, "grad_norm": 25.625, "learning_rate": 1.012201568779127e-06, "loss": 1.1652765274047852, "step": 15906 }, { "epoch": 2.895603895512879, "grad_norm": 63.0, "learning_rate": 1.012117214478729e-06, "loss": 1.975993037223816, "step": 15908 }, { "epoch": 2.895967962137071, "grad_norm": 20.25, "learning_rate": 1.0120331518920736e-06, "loss": 1.4749317169189453, "step": 15910 }, { "epoch": 2.8963320287612633, "grad_norm": 13.75, "learning_rate": 1.0119493810314968e-06, "loss": 1.105260968208313, "step": 15912 }, { "epoch": 2.8966960953854555, "grad_norm": 9.9375, "learning_rate": 1.0118659019092912e-06, "loss": 1.5636228322982788, "step": 15914 }, { "epoch": 2.8970601620096477, "grad_norm": 6.625, "learning_rate": 1.0117827145377075e-06, "loss": 1.3447386026382446, "step": 15916 }, { "epoch": 2.89742422863384, "grad_norm": 10.4375, "learning_rate": 1.0116998189289529e-06, "loss": 1.7130296230316162, "step": 15918 }, { "epoch": 2.897788295258032, "grad_norm": 18.625, "learning_rate": 1.011617215095192e-06, "loss": 1.3692744970321655, "step": 15920 }, { "epoch": 2.8981523618822242, "grad_norm": 16.125, "learning_rate": 1.0115349030485467e-06, "loss": 1.5688625574111938, "step": 15922 }, { "epoch": 2.898516428506417, "grad_norm": 5.4375, "learning_rate": 1.0114528828010955e-06, "loss": 1.1722689867019653, "step": 15924 }, { "epoch": 2.8988804951306086, "grad_norm": 7.1875, "learning_rate": 1.011371154364875e-06, "loss": 1.241307258605957, "step": 15926 }, { "epoch": 2.8992445617548013, "grad_norm": 18.375, "learning_rate": 1.0112897177518786e-06, "loss": 1.655705213546753, "step": 15928 }, { "epoch": 2.8996086283789935, "grad_norm": 12.9375, "learning_rate": 1.0112085729740563e-06, "loss": 1.8025476932525635, "step": 15930 }, { "epoch": 2.8999726950031857, "grad_norm": 6.5625, "learning_rate": 1.0111277200433163e-06, "loss": 1.1286834478378296, "step": 15932 }, { "epoch": 2.900336761627378, "grad_norm": 4.96875, "learning_rate": 1.0110471589715228e-06, "loss": 1.2490546703338623, "step": 15934 }, { "epoch": 2.90070082825157, "grad_norm": 13.6875, "learning_rate": 1.0109668897704986e-06, "loss": 1.3304779529571533, "step": 15936 }, { "epoch": 2.9010648948757622, "grad_norm": 14.125, "learning_rate": 1.0108869124520224e-06, "loss": 1.341776967048645, "step": 15938 }, { "epoch": 2.9014289614999544, "grad_norm": 24.5, "learning_rate": 1.0108072270278305e-06, "loss": 1.9102349281311035, "step": 15940 }, { "epoch": 2.9017930281241466, "grad_norm": 11.375, "learning_rate": 1.0107278335096167e-06, "loss": 0.9950995445251465, "step": 15942 }, { "epoch": 2.902157094748339, "grad_norm": 20.75, "learning_rate": 1.0106487319090313e-06, "loss": 1.0460076332092285, "step": 15944 }, { "epoch": 2.902521161372531, "grad_norm": 10.5, "learning_rate": 1.0105699222376826e-06, "loss": 1.4198015928268433, "step": 15946 }, { "epoch": 2.902885227996723, "grad_norm": 44.5, "learning_rate": 1.0104914045071352e-06, "loss": 1.555677890777588, "step": 15948 }, { "epoch": 2.903249294620916, "grad_norm": 6.09375, "learning_rate": 1.0104131787289113e-06, "loss": 1.1894680261611938, "step": 15950 }, { "epoch": 2.9036133612451076, "grad_norm": 17.875, "learning_rate": 1.0103352449144905e-06, "loss": 1.3933613300323486, "step": 15952 }, { "epoch": 2.9039774278693002, "grad_norm": 13.5625, "learning_rate": 1.0102576030753092e-06, "loss": 1.3102720975875854, "step": 15954 }, { "epoch": 2.9043414944934924, "grad_norm": 13.6875, "learning_rate": 1.0101802532227607e-06, "loss": 0.9838279485702515, "step": 15956 }, { "epoch": 2.9047055611176846, "grad_norm": 16.5, "learning_rate": 1.010103195368196e-06, "loss": 1.1908875703811646, "step": 15958 }, { "epoch": 2.905069627741877, "grad_norm": 32.5, "learning_rate": 1.0100264295229233e-06, "loss": 1.504979133605957, "step": 15960 }, { "epoch": 2.905433694366069, "grad_norm": 16.75, "learning_rate": 1.0099499556982073e-06, "loss": 1.5941108465194702, "step": 15962 }, { "epoch": 2.905797760990261, "grad_norm": 8.6875, "learning_rate": 1.0098737739052702e-06, "loss": 1.4507976770401, "step": 15964 }, { "epoch": 2.9061618276144534, "grad_norm": 11.375, "learning_rate": 1.0097978841552916e-06, "loss": 1.430513620376587, "step": 15966 }, { "epoch": 2.9065258942386456, "grad_norm": 26.625, "learning_rate": 1.009722286459408e-06, "loss": 1.370814323425293, "step": 15968 }, { "epoch": 2.906889960862838, "grad_norm": 14.0625, "learning_rate": 1.0096469808287129e-06, "loss": 1.3203754425048828, "step": 15970 }, { "epoch": 2.90725402748703, "grad_norm": 19.0, "learning_rate": 1.009571967274257e-06, "loss": 2.059998035430908, "step": 15972 }, { "epoch": 2.907618094111222, "grad_norm": 15.1875, "learning_rate": 1.0094972458070484e-06, "loss": 1.158681035041809, "step": 15974 }, { "epoch": 2.907982160735415, "grad_norm": 15.5, "learning_rate": 1.0094228164380526e-06, "loss": 1.795695185661316, "step": 15976 }, { "epoch": 2.9083462273596066, "grad_norm": 15.75, "learning_rate": 1.009348679178191e-06, "loss": 1.4717373847961426, "step": 15978 }, { "epoch": 2.908710293983799, "grad_norm": 13.0, "learning_rate": 1.0092748340383435e-06, "loss": 1.9813203811645508, "step": 15980 }, { "epoch": 2.9090743606079914, "grad_norm": 12.8125, "learning_rate": 1.0092012810293464e-06, "loss": 1.4742377996444702, "step": 15982 }, { "epoch": 2.9094384272321836, "grad_norm": 19.875, "learning_rate": 1.0091280201619931e-06, "loss": 1.4038236141204834, "step": 15984 }, { "epoch": 2.909802493856376, "grad_norm": 40.75, "learning_rate": 1.0090550514470349e-06, "loss": 0.9446954727172852, "step": 15986 }, { "epoch": 2.910166560480568, "grad_norm": 37.25, "learning_rate": 1.0089823748951792e-06, "loss": 1.0276025533676147, "step": 15988 }, { "epoch": 2.91053062710476, "grad_norm": 17.0, "learning_rate": 1.0089099905170908e-06, "loss": 0.5614534020423889, "step": 15990 }, { "epoch": 2.9108946937289524, "grad_norm": 7.59375, "learning_rate": 1.0088378983233921e-06, "loss": 1.460458755493164, "step": 15992 }, { "epoch": 2.9112587603531446, "grad_norm": 34.5, "learning_rate": 1.0087660983246627e-06, "loss": 1.9154887199401855, "step": 15994 }, { "epoch": 2.9116228269773368, "grad_norm": 11.0, "learning_rate": 1.0086945905314385e-06, "loss": 1.2142599821090698, "step": 15996 }, { "epoch": 2.911986893601529, "grad_norm": 16.25, "learning_rate": 1.0086233749542132e-06, "loss": 1.4964649677276611, "step": 15998 }, { "epoch": 2.912350960225721, "grad_norm": 7.53125, "learning_rate": 1.0085524516034368e-06, "loss": 1.079470157623291, "step": 16000 }, { "epoch": 2.912715026849914, "grad_norm": 7.59375, "learning_rate": 1.008481820489518e-06, "loss": 1.4527021646499634, "step": 16002 }, { "epoch": 2.9130790934741055, "grad_norm": 7.59375, "learning_rate": 1.0084114816228208e-06, "loss": 1.2002918720245361, "step": 16004 }, { "epoch": 2.913443160098298, "grad_norm": 5.625, "learning_rate": 1.0083414350136677e-06, "loss": 1.3399319648742676, "step": 16006 }, { "epoch": 2.9138072267224904, "grad_norm": 8.625, "learning_rate": 1.0082716806723374e-06, "loss": 1.4220551252365112, "step": 16008 }, { "epoch": 2.9141712933466826, "grad_norm": 10.6875, "learning_rate": 1.0082022186090664e-06, "loss": 1.491031289100647, "step": 16010 }, { "epoch": 2.9145353599708748, "grad_norm": 10.4375, "learning_rate": 1.0081330488340475e-06, "loss": 1.3106815814971924, "step": 16012 }, { "epoch": 2.914899426595067, "grad_norm": 6.84375, "learning_rate": 1.0080641713574313e-06, "loss": 1.2100751399993896, "step": 16014 }, { "epoch": 2.915263493219259, "grad_norm": 60.25, "learning_rate": 1.0079955861893256e-06, "loss": 1.539379596710205, "step": 16016 }, { "epoch": 2.9156275598434513, "grad_norm": 32.25, "learning_rate": 1.0079272933397948e-06, "loss": 1.5998427867889404, "step": 16018 }, { "epoch": 2.9159916264676435, "grad_norm": 65.0, "learning_rate": 1.0078592928188603e-06, "loss": 1.5707483291625977, "step": 16020 }, { "epoch": 2.9163556930918357, "grad_norm": 9.0, "learning_rate": 1.0077915846365013e-06, "loss": 1.5759389400482178, "step": 16022 }, { "epoch": 2.916719759716028, "grad_norm": 15.9375, "learning_rate": 1.0077241688026534e-06, "loss": 1.4107835292816162, "step": 16024 }, { "epoch": 2.91708382634022, "grad_norm": 19.75, "learning_rate": 1.0076570453272097e-06, "loss": 1.554302453994751, "step": 16026 }, { "epoch": 2.9174478929644128, "grad_norm": 8.875, "learning_rate": 1.0075902142200206e-06, "loss": 1.4295486211776733, "step": 16028 }, { "epoch": 2.9178119595886045, "grad_norm": 5.8125, "learning_rate": 1.0075236754908925e-06, "loss": 1.2804522514343262, "step": 16030 }, { "epoch": 2.918176026212797, "grad_norm": 8.6875, "learning_rate": 1.0074574291495908e-06, "loss": 0.9572818279266357, "step": 16032 }, { "epoch": 2.918540092836989, "grad_norm": 15.75, "learning_rate": 1.0073914752058362e-06, "loss": 1.9047865867614746, "step": 16034 }, { "epoch": 2.9189041594611815, "grad_norm": 23.875, "learning_rate": 1.0073258136693072e-06, "loss": 1.2155386209487915, "step": 16036 }, { "epoch": 2.9192682260853737, "grad_norm": 4.03125, "learning_rate": 1.0072604445496392e-06, "loss": 0.8650633096694946, "step": 16038 }, { "epoch": 2.919632292709566, "grad_norm": 6.21875, "learning_rate": 1.0071953678564254e-06, "loss": 1.579742193222046, "step": 16040 }, { "epoch": 2.919996359333758, "grad_norm": 6.03125, "learning_rate": 1.0071305835992152e-06, "loss": 0.9558074474334717, "step": 16042 }, { "epoch": 2.9203604259579503, "grad_norm": 10.25, "learning_rate": 1.0070660917875153e-06, "loss": 1.2999343872070312, "step": 16044 }, { "epoch": 2.9207244925821425, "grad_norm": 5.8125, "learning_rate": 1.0070018924307899e-06, "loss": 1.242353081703186, "step": 16046 }, { "epoch": 2.9210885592063347, "grad_norm": 9.625, "learning_rate": 1.0069379855384598e-06, "loss": 1.256218433380127, "step": 16048 }, { "epoch": 2.921452625830527, "grad_norm": 3.875, "learning_rate": 1.0068743711199032e-06, "loss": 1.217787265777588, "step": 16050 }, { "epoch": 2.921816692454719, "grad_norm": 6.9375, "learning_rate": 1.0068110491844552e-06, "loss": 1.188781499862671, "step": 16052 }, { "epoch": 2.9221807590789113, "grad_norm": 13.8125, "learning_rate": 1.0067480197414082e-06, "loss": 1.4043807983398438, "step": 16054 }, { "epoch": 2.9225448257031035, "grad_norm": 25.125, "learning_rate": 1.0066852828000112e-06, "loss": 1.4675672054290771, "step": 16056 }, { "epoch": 2.922908892327296, "grad_norm": 13.5, "learning_rate": 1.0066228383694708e-06, "loss": 1.5992004871368408, "step": 16058 }, { "epoch": 2.923272958951488, "grad_norm": 13.6875, "learning_rate": 1.0065606864589501e-06, "loss": 0.5050632953643799, "step": 16060 }, { "epoch": 2.9236370255756805, "grad_norm": 13.375, "learning_rate": 1.0064988270775705e-06, "loss": 1.3865153789520264, "step": 16062 }, { "epoch": 2.9240010921998727, "grad_norm": 9.375, "learning_rate": 1.0064372602344086e-06, "loss": 1.027376651763916, "step": 16064 }, { "epoch": 2.924365158824065, "grad_norm": 4.5625, "learning_rate": 1.0063759859384998e-06, "loss": 0.9757024645805359, "step": 16066 }, { "epoch": 2.924729225448257, "grad_norm": 7.5625, "learning_rate": 1.0063150041988357e-06, "loss": 1.3136813640594482, "step": 16068 }, { "epoch": 2.9250932920724493, "grad_norm": 9.0625, "learning_rate": 1.0062543150243647e-06, "loss": 1.339347243309021, "step": 16070 }, { "epoch": 2.9254573586966415, "grad_norm": 7.46875, "learning_rate": 1.0061939184239933e-06, "loss": 1.0517849922180176, "step": 16072 }, { "epoch": 2.9258214253208337, "grad_norm": 9.625, "learning_rate": 1.0061338144065843e-06, "loss": 1.1386849880218506, "step": 16074 }, { "epoch": 2.926185491945026, "grad_norm": 12.625, "learning_rate": 1.0060740029809575e-06, "loss": 1.4157938957214355, "step": 16076 }, { "epoch": 2.926549558569218, "grad_norm": 13.75, "learning_rate": 1.00601448415589e-06, "loss": 1.2470686435699463, "step": 16078 }, { "epoch": 2.9269136251934103, "grad_norm": 14.0625, "learning_rate": 1.0059552579401157e-06, "loss": 1.7663663625717163, "step": 16080 }, { "epoch": 2.9272776918176024, "grad_norm": 10.0, "learning_rate": 1.0058963243423267e-06, "loss": 1.9873912334442139, "step": 16082 }, { "epoch": 2.927641758441795, "grad_norm": 6.78125, "learning_rate": 1.0058376833711702e-06, "loss": 1.3792403936386108, "step": 16084 }, { "epoch": 2.928005825065987, "grad_norm": 17.875, "learning_rate": 1.0057793350352525e-06, "loss": 1.184129238128662, "step": 16086 }, { "epoch": 2.9283698916901795, "grad_norm": 5.78125, "learning_rate": 1.0057212793431356e-06, "loss": 1.0965029001235962, "step": 16088 }, { "epoch": 2.9287339583143717, "grad_norm": 13.1875, "learning_rate": 1.0056635163033386e-06, "loss": 1.2313885688781738, "step": 16090 }, { "epoch": 2.929098024938564, "grad_norm": 15.75, "learning_rate": 1.005606045924338e-06, "loss": 1.3645853996276855, "step": 16092 }, { "epoch": 2.929462091562756, "grad_norm": 17.25, "learning_rate": 1.0055488682145678e-06, "loss": 1.8306810855865479, "step": 16094 }, { "epoch": 2.9298261581869482, "grad_norm": 6.21875, "learning_rate": 1.0054919831824183e-06, "loss": 1.362161636352539, "step": 16096 }, { "epoch": 2.9301902248111404, "grad_norm": 4.5625, "learning_rate": 1.0054353908362375e-06, "loss": 1.3486385345458984, "step": 16098 }, { "epoch": 2.9305542914353326, "grad_norm": 11.3125, "learning_rate": 1.0053790911843296e-06, "loss": 1.1928051710128784, "step": 16100 }, { "epoch": 2.930918358059525, "grad_norm": 8.375, "learning_rate": 1.0053230842349566e-06, "loss": 0.9413809776306152, "step": 16102 }, { "epoch": 2.931282424683717, "grad_norm": 13.0, "learning_rate": 1.0052673699963374e-06, "loss": 1.3937246799468994, "step": 16104 }, { "epoch": 2.931646491307909, "grad_norm": 14.125, "learning_rate": 1.0052119484766475e-06, "loss": 1.998185634613037, "step": 16106 }, { "epoch": 2.9320105579321014, "grad_norm": 5.46875, "learning_rate": 1.0051568196840203e-06, "loss": 1.1789721250534058, "step": 16108 }, { "epoch": 2.932374624556294, "grad_norm": 12.875, "learning_rate": 1.0051019836265452e-06, "loss": 1.2565393447875977, "step": 16110 }, { "epoch": 2.932738691180486, "grad_norm": 8.125, "learning_rate": 1.0050474403122695e-06, "loss": 1.640028953552246, "step": 16112 }, { "epoch": 2.9331027578046784, "grad_norm": 17.75, "learning_rate": 1.004993189749197e-06, "loss": 1.1269093751907349, "step": 16114 }, { "epoch": 2.9334668244288706, "grad_norm": 5.8125, "learning_rate": 1.0049392319452888e-06, "loss": 1.2945148944854736, "step": 16116 }, { "epoch": 2.933830891053063, "grad_norm": 7.65625, "learning_rate": 1.0048855669084632e-06, "loss": 0.9564671516418457, "step": 16118 }, { "epoch": 2.934194957677255, "grad_norm": 11.125, "learning_rate": 1.004832194646595e-06, "loss": 1.4331605434417725, "step": 16120 }, { "epoch": 2.934559024301447, "grad_norm": 8.625, "learning_rate": 1.0047791151675167e-06, "loss": 1.063494324684143, "step": 16122 }, { "epoch": 2.9349230909256394, "grad_norm": 12.75, "learning_rate": 1.0047263284790171e-06, "loss": 1.5241312980651855, "step": 16124 }, { "epoch": 2.9352871575498316, "grad_norm": 35.25, "learning_rate": 1.004673834588843e-06, "loss": 1.893446683883667, "step": 16126 }, { "epoch": 2.935651224174024, "grad_norm": 23.5, "learning_rate": 1.004621633504697e-06, "loss": 1.8110666275024414, "step": 16128 }, { "epoch": 2.936015290798216, "grad_norm": 8.9375, "learning_rate": 1.0045697252342396e-06, "loss": 1.48243248462677, "step": 16130 }, { "epoch": 2.936379357422408, "grad_norm": 13.3125, "learning_rate": 1.0045181097850886e-06, "loss": 1.653139352798462, "step": 16132 }, { "epoch": 2.9367434240466004, "grad_norm": 12.25, "learning_rate": 1.004466787164818e-06, "loss": 1.4307513236999512, "step": 16134 }, { "epoch": 2.937107490670793, "grad_norm": 5.34375, "learning_rate": 1.0044157573809594e-06, "loss": 0.9555176496505737, "step": 16136 }, { "epoch": 2.9374715572949848, "grad_norm": 4.78125, "learning_rate": 1.0043650204410005e-06, "loss": 0.9934687614440918, "step": 16138 }, { "epoch": 2.9378356239191774, "grad_norm": 6.875, "learning_rate": 1.0043145763523875e-06, "loss": 1.3215734958648682, "step": 16140 }, { "epoch": 2.9381996905433696, "grad_norm": 10.25, "learning_rate": 1.0042644251225226e-06, "loss": 1.3117142915725708, "step": 16142 }, { "epoch": 2.938563757167562, "grad_norm": 10.375, "learning_rate": 1.004214566758765e-06, "loss": 1.7948687076568604, "step": 16144 }, { "epoch": 2.938927823791754, "grad_norm": 7.59375, "learning_rate": 1.004165001268432e-06, "loss": 1.3431758880615234, "step": 16146 }, { "epoch": 2.939291890415946, "grad_norm": 18.75, "learning_rate": 1.0041157286587965e-06, "loss": 1.3410488367080688, "step": 16148 }, { "epoch": 2.9396559570401384, "grad_norm": 21.625, "learning_rate": 1.004066748937089e-06, "loss": 1.3845548629760742, "step": 16150 }, { "epoch": 2.9400200236643306, "grad_norm": 22.125, "learning_rate": 1.0040180621104973e-06, "loss": 1.2215200662612915, "step": 16152 }, { "epoch": 2.9403840902885228, "grad_norm": 10.125, "learning_rate": 1.0039696681861661e-06, "loss": 1.0278096199035645, "step": 16154 }, { "epoch": 2.940748156912715, "grad_norm": 16.25, "learning_rate": 1.0039215671711972e-06, "loss": 1.5213755369186401, "step": 16156 }, { "epoch": 2.941112223536907, "grad_norm": 20.75, "learning_rate": 1.0038737590726484e-06, "loss": 1.7307862043380737, "step": 16158 }, { "epoch": 2.9414762901610993, "grad_norm": 68.5, "learning_rate": 1.003826243897536e-06, "loss": 1.465663194656372, "step": 16160 }, { "epoch": 2.9418403567852915, "grad_norm": 9.6875, "learning_rate": 1.0037790216528327e-06, "loss": 1.5341862440109253, "step": 16162 }, { "epoch": 2.9422044234094837, "grad_norm": 7.5625, "learning_rate": 1.003732092345468e-06, "loss": 1.1823664903640747, "step": 16164 }, { "epoch": 2.9425684900336764, "grad_norm": 8.5625, "learning_rate": 1.0036854559823283e-06, "loss": 1.4220274686813354, "step": 16166 }, { "epoch": 2.942932556657868, "grad_norm": 12.125, "learning_rate": 1.0036391125702577e-06, "loss": 1.408041000366211, "step": 16168 }, { "epoch": 2.9432966232820608, "grad_norm": 11.0, "learning_rate": 1.0035930621160571e-06, "loss": 1.4193745851516724, "step": 16170 }, { "epoch": 2.943660689906253, "grad_norm": 17.75, "learning_rate": 1.0035473046264834e-06, "loss": 1.0945427417755127, "step": 16172 }, { "epoch": 2.944024756530445, "grad_norm": 19.5, "learning_rate": 1.0035018401082522e-06, "loss": 0.9122292995452881, "step": 16174 }, { "epoch": 2.9443888231546373, "grad_norm": 16.75, "learning_rate": 1.0034566685680346e-06, "loss": 1.3807919025421143, "step": 16176 }, { "epoch": 2.9447528897788295, "grad_norm": 7.625, "learning_rate": 1.0034117900124598e-06, "loss": 1.3316371440887451, "step": 16178 }, { "epoch": 2.9451169564030217, "grad_norm": 7.75, "learning_rate": 1.0033672044481133e-06, "loss": 1.2266490459442139, "step": 16180 }, { "epoch": 2.945481023027214, "grad_norm": 10.5625, "learning_rate": 1.0033229118815379e-06, "loss": 1.5142892599105835, "step": 16182 }, { "epoch": 2.945845089651406, "grad_norm": 14.125, "learning_rate": 1.0032789123192335e-06, "loss": 1.4251207113265991, "step": 16184 }, { "epoch": 2.9462091562755983, "grad_norm": 13.1875, "learning_rate": 1.0032352057676567e-06, "loss": 1.6307628154754639, "step": 16186 }, { "epoch": 2.9465732228997905, "grad_norm": 21.125, "learning_rate": 1.003191792233221e-06, "loss": 1.972518801689148, "step": 16188 }, { "epoch": 2.9469372895239827, "grad_norm": 23.125, "learning_rate": 1.0031486717222976e-06, "loss": 1.2268198728561401, "step": 16190 }, { "epoch": 2.9473013561481753, "grad_norm": 31.875, "learning_rate": 1.003105844241214e-06, "loss": 1.3296505212783813, "step": 16192 }, { "epoch": 2.947665422772367, "grad_norm": 10.6875, "learning_rate": 1.0030633097962552e-06, "loss": 1.8601598739624023, "step": 16194 }, { "epoch": 2.9480294893965597, "grad_norm": 8.9375, "learning_rate": 1.0030210683936627e-06, "loss": 1.3801432847976685, "step": 16196 }, { "epoch": 2.948393556020752, "grad_norm": 6.03125, "learning_rate": 1.0029791200396355e-06, "loss": 0.9664236903190613, "step": 16198 }, { "epoch": 2.948757622644944, "grad_norm": 11.375, "learning_rate": 1.002937464740329e-06, "loss": 1.6388170719146729, "step": 16200 }, { "epoch": 2.9491216892691363, "grad_norm": 8.875, "learning_rate": 1.0028961025018564e-06, "loss": 1.5815627574920654, "step": 16202 }, { "epoch": 2.9494857558933285, "grad_norm": 12.4375, "learning_rate": 1.0028550333302872e-06, "loss": 1.4404516220092773, "step": 16204 }, { "epoch": 2.9498498225175207, "grad_norm": 40.75, "learning_rate": 1.002814257231648e-06, "loss": 1.1763116121292114, "step": 16206 }, { "epoch": 2.950213889141713, "grad_norm": 23.5, "learning_rate": 1.0027737742119227e-06, "loss": 0.905163049697876, "step": 16208 }, { "epoch": 2.950577955765905, "grad_norm": 31.0, "learning_rate": 1.002733584277052e-06, "loss": 1.0610320568084717, "step": 16210 }, { "epoch": 2.9509420223900973, "grad_norm": 4.4375, "learning_rate": 1.0026936874329336e-06, "loss": 0.9330928325653076, "step": 16212 }, { "epoch": 2.9513060890142895, "grad_norm": 8.375, "learning_rate": 1.002654083685422e-06, "loss": 1.1678614616394043, "step": 16214 }, { "epoch": 2.9516701556384817, "grad_norm": 19.125, "learning_rate": 1.0026147730403294e-06, "loss": 1.4060074090957642, "step": 16216 }, { "epoch": 2.9520342222626743, "grad_norm": 9.625, "learning_rate": 1.0025757555034238e-06, "loss": 1.4334461688995361, "step": 16218 }, { "epoch": 2.952398288886866, "grad_norm": 7.96875, "learning_rate": 1.0025370310804316e-06, "loss": 1.3008579015731812, "step": 16220 }, { "epoch": 2.9527623555110587, "grad_norm": 24.0, "learning_rate": 1.002498599777035e-06, "loss": 1.5103909969329834, "step": 16222 }, { "epoch": 2.953126422135251, "grad_norm": 12.4375, "learning_rate": 1.0024604615988734e-06, "loss": 2.1457014083862305, "step": 16224 }, { "epoch": 2.953490488759443, "grad_norm": 11.375, "learning_rate": 1.002422616551544e-06, "loss": 1.4554740190505981, "step": 16226 }, { "epoch": 2.9538545553836353, "grad_norm": 9.5625, "learning_rate": 1.0023850646406002e-06, "loss": 1.5656893253326416, "step": 16228 }, { "epoch": 2.9542186220078275, "grad_norm": 12.0625, "learning_rate": 1.0023478058715524e-06, "loss": 1.4388329982757568, "step": 16230 }, { "epoch": 2.9545826886320197, "grad_norm": 9.1875, "learning_rate": 1.0023108402498684e-06, "loss": 1.5340616703033447, "step": 16232 }, { "epoch": 2.954946755256212, "grad_norm": 8.75, "learning_rate": 1.0022741677809728e-06, "loss": 1.5657023191452026, "step": 16234 }, { "epoch": 2.955310821880404, "grad_norm": 8.4375, "learning_rate": 1.0022377884702468e-06, "loss": 1.2961970567703247, "step": 16236 }, { "epoch": 2.9556748885045963, "grad_norm": 9.6875, "learning_rate": 1.002201702323029e-06, "loss": 0.9746873378753662, "step": 16238 }, { "epoch": 2.9560389551287884, "grad_norm": 13.6875, "learning_rate": 1.0021659093446152e-06, "loss": 1.7138454914093018, "step": 16240 }, { "epoch": 2.9564030217529806, "grad_norm": 14.125, "learning_rate": 1.002130409540258e-06, "loss": 1.7269600629806519, "step": 16242 }, { "epoch": 2.9567670883771733, "grad_norm": 16.75, "learning_rate": 1.002095202915166e-06, "loss": 1.586848497390747, "step": 16244 }, { "epoch": 2.957131155001365, "grad_norm": 76.0, "learning_rate": 1.0020602894745063e-06, "loss": 1.769135594367981, "step": 16246 }, { "epoch": 2.9574952216255577, "grad_norm": 20.0, "learning_rate": 1.0020256692234023e-06, "loss": 1.4723286628723145, "step": 16248 }, { "epoch": 2.95785928824975, "grad_norm": 18.75, "learning_rate": 1.0019913421669344e-06, "loss": 1.0270074605941772, "step": 16250 }, { "epoch": 2.958223354873942, "grad_norm": 20.875, "learning_rate": 1.0019573083101397e-06, "loss": 1.5859978199005127, "step": 16252 }, { "epoch": 2.9585874214981343, "grad_norm": 10.125, "learning_rate": 1.0019235676580124e-06, "loss": 1.4734545946121216, "step": 16254 }, { "epoch": 2.9589514881223264, "grad_norm": 10.3125, "learning_rate": 1.0018901202155043e-06, "loss": 1.4765737056732178, "step": 16256 }, { "epoch": 2.9593155547465186, "grad_norm": 3.265625, "learning_rate": 1.0018569659875233e-06, "loss": 1.059638500213623, "step": 16258 }, { "epoch": 2.959679621370711, "grad_norm": 9.5, "learning_rate": 1.001824104978935e-06, "loss": 1.0306956768035889, "step": 16260 }, { "epoch": 2.960043687994903, "grad_norm": 27.75, "learning_rate": 1.0017915371945611e-06, "loss": 1.518880844116211, "step": 16262 }, { "epoch": 2.960407754619095, "grad_norm": 16.375, "learning_rate": 1.0017592626391813e-06, "loss": 1.4115920066833496, "step": 16264 }, { "epoch": 2.9607718212432874, "grad_norm": 16.5, "learning_rate": 1.0017272813175315e-06, "loss": 1.361602544784546, "step": 16266 }, { "epoch": 2.9611358878674796, "grad_norm": 34.25, "learning_rate": 1.0016955932343049e-06, "loss": 1.4944262504577637, "step": 16268 }, { "epoch": 2.9614999544916722, "grad_norm": 7.71875, "learning_rate": 1.0016641983941513e-06, "loss": 1.5077896118164062, "step": 16270 }, { "epoch": 2.961864021115864, "grad_norm": 8.5, "learning_rate": 1.0016330968016784e-06, "loss": 1.0913604497909546, "step": 16272 }, { "epoch": 2.9622280877400566, "grad_norm": 28.0, "learning_rate": 1.0016022884614495e-06, "loss": 1.1831109523773193, "step": 16274 }, { "epoch": 2.9625921543642484, "grad_norm": 5.46875, "learning_rate": 1.0015717733779864e-06, "loss": 1.3958098888397217, "step": 16276 }, { "epoch": 2.962956220988441, "grad_norm": 43.25, "learning_rate": 1.0015415515557666e-06, "loss": 0.8568093776702881, "step": 16278 }, { "epoch": 2.963320287612633, "grad_norm": 13.125, "learning_rate": 1.001511622999225e-06, "loss": 1.001417636871338, "step": 16280 }, { "epoch": 2.9636843542368254, "grad_norm": 16.625, "learning_rate": 1.001481987712753e-06, "loss": 1.5871542692184448, "step": 16282 }, { "epoch": 2.9640484208610176, "grad_norm": 21.625, "learning_rate": 1.0014526457007007e-06, "loss": 1.882049322128296, "step": 16284 }, { "epoch": 2.96441248748521, "grad_norm": 7.0, "learning_rate": 1.001423596967373e-06, "loss": 0.986940860748291, "step": 16286 }, { "epoch": 2.964776554109402, "grad_norm": 47.25, "learning_rate": 1.001394841517033e-06, "loss": 1.0480201244354248, "step": 16288 }, { "epoch": 2.965140620733594, "grad_norm": 117.0, "learning_rate": 1.0013663793539003e-06, "loss": 1.4762556552886963, "step": 16290 }, { "epoch": 2.9655046873577864, "grad_norm": 8.875, "learning_rate": 1.0013382104821517e-06, "loss": 1.1868630647659302, "step": 16292 }, { "epoch": 2.9658687539819786, "grad_norm": 37.25, "learning_rate": 1.0013103349059209e-06, "loss": 1.320264220237732, "step": 16294 }, { "epoch": 2.9662328206061708, "grad_norm": 30.25, "learning_rate": 1.0012827526292984e-06, "loss": 1.5974578857421875, "step": 16296 }, { "epoch": 2.966596887230363, "grad_norm": 11.5, "learning_rate": 1.0012554636563317e-06, "loss": 1.7325783967971802, "step": 16298 }, { "epoch": 2.9669609538545556, "grad_norm": 13.3125, "learning_rate": 1.0012284679910257e-06, "loss": 1.1121164560317993, "step": 16300 }, { "epoch": 2.9673250204787474, "grad_norm": 11.9375, "learning_rate": 1.0012017656373417e-06, "loss": 1.4275696277618408, "step": 16302 }, { "epoch": 2.96768908710294, "grad_norm": 14.8125, "learning_rate": 1.0011753565991983e-06, "loss": 1.3188142776489258, "step": 16304 }, { "epoch": 2.968053153727132, "grad_norm": 13.625, "learning_rate": 1.0011492408804704e-06, "loss": 1.5391064882278442, "step": 16306 }, { "epoch": 2.9684172203513244, "grad_norm": 8.125, "learning_rate": 1.0011234184849912e-06, "loss": 1.4445359706878662, "step": 16308 }, { "epoch": 2.9687812869755166, "grad_norm": 8.0625, "learning_rate": 1.0010978894165493e-06, "loss": 1.1950385570526123, "step": 16310 }, { "epoch": 2.9691453535997088, "grad_norm": 16.625, "learning_rate": 1.0010726536788912e-06, "loss": 1.749969244003296, "step": 16312 }, { "epoch": 2.969509420223901, "grad_norm": 11.9375, "learning_rate": 1.0010477112757206e-06, "loss": 1.4159072637557983, "step": 16314 }, { "epoch": 2.969873486848093, "grad_norm": 17.5, "learning_rate": 1.0010230622106972e-06, "loss": 1.3043038845062256, "step": 16316 }, { "epoch": 2.9702375534722854, "grad_norm": 7.96875, "learning_rate": 1.0009987064874382e-06, "loss": 1.3582814931869507, "step": 16318 }, { "epoch": 2.9706016200964775, "grad_norm": 24.625, "learning_rate": 1.000974644109518e-06, "loss": 1.5314178466796875, "step": 16320 }, { "epoch": 2.9709656867206697, "grad_norm": 29.375, "learning_rate": 1.000950875080467e-06, "loss": 1.9022207260131836, "step": 16322 }, { "epoch": 2.971329753344862, "grad_norm": 15.875, "learning_rate": 1.0009273994037738e-06, "loss": 1.324299931526184, "step": 16324 }, { "epoch": 2.9716938199690546, "grad_norm": 12.625, "learning_rate": 1.0009042170828834e-06, "loss": 1.4956713914871216, "step": 16326 }, { "epoch": 2.9720578865932463, "grad_norm": 11.5625, "learning_rate": 1.0008813281211973e-06, "loss": 1.8363910913467407, "step": 16328 }, { "epoch": 2.972421953217439, "grad_norm": 29.625, "learning_rate": 1.0008587325220747e-06, "loss": 1.766657829284668, "step": 16330 }, { "epoch": 2.972786019841631, "grad_norm": 16.25, "learning_rate": 1.0008364302888315e-06, "loss": 1.0280463695526123, "step": 16332 }, { "epoch": 2.9731500864658233, "grad_norm": 178.0, "learning_rate": 1.0008144214247401e-06, "loss": 1.1512819528579712, "step": 16334 }, { "epoch": 2.9735141530900155, "grad_norm": 8.1875, "learning_rate": 1.00079270593303e-06, "loss": 1.5159026384353638, "step": 16336 }, { "epoch": 2.9738782197142077, "grad_norm": 15.1875, "learning_rate": 1.0007712838168887e-06, "loss": 1.400529146194458, "step": 16338 }, { "epoch": 2.9742422863384, "grad_norm": 9.875, "learning_rate": 1.000750155079459e-06, "loss": 1.391846776008606, "step": 16340 }, { "epoch": 2.974606352962592, "grad_norm": 4.4375, "learning_rate": 1.000729319723842e-06, "loss": 1.2190568447113037, "step": 16342 }, { "epoch": 2.9749704195867843, "grad_norm": 5.25, "learning_rate": 1.0007087777530949e-06, "loss": 1.0561890602111816, "step": 16344 }, { "epoch": 2.9753344862109765, "grad_norm": 5.03125, "learning_rate": 1.0006885291702325e-06, "loss": 1.2405662536621094, "step": 16346 }, { "epoch": 2.9756985528351687, "grad_norm": 16.5, "learning_rate": 1.0006685739782257e-06, "loss": 1.23015296459198, "step": 16348 }, { "epoch": 2.976062619459361, "grad_norm": 10.375, "learning_rate": 1.000648912180003e-06, "loss": 1.0744677782058716, "step": 16350 }, { "epoch": 2.9764266860835535, "grad_norm": 20.5, "learning_rate": 1.0006295437784499e-06, "loss": 1.4442644119262695, "step": 16352 }, { "epoch": 2.9767907527077453, "grad_norm": 10.125, "learning_rate": 1.0006104687764085e-06, "loss": 1.543410301208496, "step": 16354 }, { "epoch": 2.977154819331938, "grad_norm": 15.1875, "learning_rate": 1.000591687176678e-06, "loss": 1.5970842838287354, "step": 16356 }, { "epoch": 2.97751888595613, "grad_norm": 6.84375, "learning_rate": 1.0005731989820144e-06, "loss": 1.445725917816162, "step": 16358 }, { "epoch": 2.9778829525803223, "grad_norm": 11.4375, "learning_rate": 1.0005550041951312e-06, "loss": 1.137832522392273, "step": 16360 }, { "epoch": 2.9782470192045145, "grad_norm": 48.5, "learning_rate": 1.0005371028186977e-06, "loss": 2.0945792198181152, "step": 16362 }, { "epoch": 2.9786110858287067, "grad_norm": 12.75, "learning_rate": 1.0005194948553415e-06, "loss": 1.913609504699707, "step": 16364 }, { "epoch": 2.978975152452899, "grad_norm": 20.75, "learning_rate": 1.0005021803076462e-06, "loss": 1.2479089498519897, "step": 16366 }, { "epoch": 2.979339219077091, "grad_norm": 27.375, "learning_rate": 1.0004851591781527e-06, "loss": 0.9958842992782593, "step": 16368 }, { "epoch": 2.9797032857012833, "grad_norm": 16.375, "learning_rate": 1.0004684314693587e-06, "loss": 1.4470421075820923, "step": 16370 }, { "epoch": 2.9800673523254755, "grad_norm": 9.4375, "learning_rate": 1.000451997183719e-06, "loss": 1.43610680103302, "step": 16372 }, { "epoch": 2.9804314189496677, "grad_norm": 14.4375, "learning_rate": 1.0004358563236452e-06, "loss": 1.3032466173171997, "step": 16374 }, { "epoch": 2.98079548557386, "grad_norm": 17.625, "learning_rate": 1.0004200088915061e-06, "loss": 0.6024792194366455, "step": 16376 }, { "epoch": 2.9811595521980525, "grad_norm": 18.25, "learning_rate": 1.000404454889627e-06, "loss": 0.8797003626823425, "step": 16378 }, { "epoch": 2.9815236188222443, "grad_norm": 7.6875, "learning_rate": 1.0003891943202906e-06, "loss": 1.5118615627288818, "step": 16380 }, { "epoch": 2.981887685446437, "grad_norm": 3.359375, "learning_rate": 1.0003742271857359e-06, "loss": 1.0019346475601196, "step": 16382 }, { "epoch": 2.9822517520706286, "grad_norm": 11.5625, "learning_rate": 1.00035955348816e-06, "loss": 1.1364027261734009, "step": 16384 }, { "epoch": 2.9826158186948213, "grad_norm": 19.625, "learning_rate": 1.0003451732297156e-06, "loss": 1.4811102151870728, "step": 16386 }, { "epoch": 2.9829798853190135, "grad_norm": 9.25, "learning_rate": 1.0003310864125132e-06, "loss": 1.590872049331665, "step": 16388 }, { "epoch": 2.9833439519432057, "grad_norm": 5.8125, "learning_rate": 1.0003172930386198e-06, "loss": 1.0282552242279053, "step": 16390 }, { "epoch": 2.983708018567398, "grad_norm": 5.34375, "learning_rate": 1.0003037931100597e-06, "loss": 1.512000322341919, "step": 16392 }, { "epoch": 2.98407208519159, "grad_norm": 8.3125, "learning_rate": 1.000290586628814e-06, "loss": 1.5235819816589355, "step": 16394 }, { "epoch": 2.9844361518157823, "grad_norm": 17.375, "learning_rate": 1.0002776735968207e-06, "loss": 1.734879732131958, "step": 16396 }, { "epoch": 2.9848002184399745, "grad_norm": 8.375, "learning_rate": 1.0002650540159742e-06, "loss": 1.3319530487060547, "step": 16398 }, { "epoch": 2.9851642850641666, "grad_norm": 9.1875, "learning_rate": 1.000252727888127e-06, "loss": 1.1793327331542969, "step": 16400 }, { "epoch": 2.985528351688359, "grad_norm": 26.5, "learning_rate": 1.0002406952150878e-06, "loss": 1.4188673496246338, "step": 16402 }, { "epoch": 2.985892418312551, "grad_norm": 8.9375, "learning_rate": 1.0002289559986223e-06, "loss": 1.2849725484848022, "step": 16404 }, { "epoch": 2.9862564849367432, "grad_norm": 18.5, "learning_rate": 1.0002175102404531e-06, "loss": 0.6944088935852051, "step": 16406 }, { "epoch": 2.986620551560936, "grad_norm": 15.75, "learning_rate": 1.00020635794226e-06, "loss": 0.4224565923213959, "step": 16408 }, { "epoch": 2.9869846181851276, "grad_norm": 11.875, "learning_rate": 1.000195499105679e-06, "loss": 1.7608332633972168, "step": 16410 }, { "epoch": 2.9873486848093203, "grad_norm": 8.75, "learning_rate": 1.0001849337323045e-06, "loss": 1.1287002563476562, "step": 16412 }, { "epoch": 2.9877127514335124, "grad_norm": 11.6875, "learning_rate": 1.0001746618236862e-06, "loss": 1.41616690158844, "step": 16414 }, { "epoch": 2.9880768180577046, "grad_norm": 6.875, "learning_rate": 1.0001646833813316e-06, "loss": 1.374161958694458, "step": 16416 }, { "epoch": 2.988440884681897, "grad_norm": 37.25, "learning_rate": 1.0001549984067052e-06, "loss": 1.2015546560287476, "step": 16418 }, { "epoch": 2.988804951306089, "grad_norm": 12.0625, "learning_rate": 1.0001456069012282e-06, "loss": 1.58562171459198, "step": 16420 }, { "epoch": 2.9891690179302812, "grad_norm": 9.6875, "learning_rate": 1.0001365088662784e-06, "loss": 1.0949738025665283, "step": 16422 }, { "epoch": 2.9895330845544734, "grad_norm": 10.9375, "learning_rate": 1.0001277043031915e-06, "loss": 1.4776755571365356, "step": 16424 }, { "epoch": 2.9898971511786656, "grad_norm": 9.5, "learning_rate": 1.000119193213259e-06, "loss": 1.2836664915084839, "step": 16426 }, { "epoch": 2.990261217802858, "grad_norm": 15.9375, "learning_rate": 1.0001109755977303e-06, "loss": 1.2599329948425293, "step": 16428 }, { "epoch": 2.99062528442705, "grad_norm": 11.625, "learning_rate": 1.000103051457811e-06, "loss": 0.8928711414337158, "step": 16430 }, { "epoch": 2.990989351051242, "grad_norm": 10.125, "learning_rate": 1.000095420794664e-06, "loss": 1.4531464576721191, "step": 16432 }, { "epoch": 2.991353417675435, "grad_norm": 6.9375, "learning_rate": 1.0000880836094091e-06, "loss": 1.4205862283706665, "step": 16434 }, { "epoch": 2.9917174842996266, "grad_norm": 8.0625, "learning_rate": 1.000081039903123e-06, "loss": 1.368853211402893, "step": 16436 }, { "epoch": 2.992081550923819, "grad_norm": 7.875, "learning_rate": 1.0000742896768392e-06, "loss": 1.3822475671768188, "step": 16438 }, { "epoch": 2.9924456175480114, "grad_norm": 10.5, "learning_rate": 1.0000678329315486e-06, "loss": 1.5048623085021973, "step": 16440 }, { "epoch": 2.9928096841722036, "grad_norm": 13.6875, "learning_rate": 1.0000616696681984e-06, "loss": 1.3586997985839844, "step": 16442 }, { "epoch": 2.993173750796396, "grad_norm": 12.6875, "learning_rate": 1.0000557998876933e-06, "loss": 1.4310898780822754, "step": 16444 }, { "epoch": 2.993537817420588, "grad_norm": 21.75, "learning_rate": 1.0000502235908943e-06, "loss": 1.4072825908660889, "step": 16446 }, { "epoch": 2.99390188404478, "grad_norm": 23.125, "learning_rate": 1.00004494077862e-06, "loss": 1.9890415668487549, "step": 16448 }, { "epoch": 2.9942659506689724, "grad_norm": 18.5, "learning_rate": 1.0000399514516453e-06, "loss": 1.4942454099655151, "step": 16450 }, { "epoch": 2.9946300172931646, "grad_norm": 13.875, "learning_rate": 1.0000352556107028e-06, "loss": 1.324810266494751, "step": 16452 }, { "epoch": 2.9949940839173568, "grad_norm": 6.9375, "learning_rate": 1.0000308532564813e-06, "loss": 1.202855110168457, "step": 16454 }, { "epoch": 2.995358150541549, "grad_norm": 8.0, "learning_rate": 1.000026744389627e-06, "loss": 1.2264525890350342, "step": 16456 }, { "epoch": 2.995722217165741, "grad_norm": 12.625, "learning_rate": 1.000022929010743e-06, "loss": 1.4550981521606445, "step": 16458 }, { "epoch": 2.996086283789934, "grad_norm": 10.75, "learning_rate": 1.0000194071203887e-06, "loss": 1.3527114391326904, "step": 16460 }, { "epoch": 2.9964503504141256, "grad_norm": 11.5625, "learning_rate": 1.0000161787190812e-06, "loss": 0.8071882724761963, "step": 16462 }, { "epoch": 2.996814417038318, "grad_norm": 6.59375, "learning_rate": 1.0000132438072942e-06, "loss": 1.4483782052993774, "step": 16464 }, { "epoch": 2.9971784836625104, "grad_norm": 4.40625, "learning_rate": 1.0000106023854585e-06, "loss": 1.272663950920105, "step": 16466 }, { "epoch": 2.9975425502867026, "grad_norm": 9.4375, "learning_rate": 1.0000082544539618e-06, "loss": 1.2252750396728516, "step": 16468 }, { "epoch": 2.9979066169108948, "grad_norm": 9.375, "learning_rate": 1.0000062000131483e-06, "loss": 1.4054439067840576, "step": 16470 }, { "epoch": 2.998270683535087, "grad_norm": 11.25, "learning_rate": 1.0000044390633198e-06, "loss": 1.309810757637024, "step": 16472 }, { "epoch": 2.998634750159279, "grad_norm": 8.4375, "learning_rate": 1.0000029716047346e-06, "loss": 1.3979363441467285, "step": 16474 }, { "epoch": 2.9989988167834714, "grad_norm": 7.53125, "learning_rate": 1.000001797637608e-06, "loss": 1.1941115856170654, "step": 16476 }, { "epoch": 2.9993628834076635, "grad_norm": 11.5625, "learning_rate": 1.0000009171621122e-06, "loss": 1.9067292213439941, "step": 16478 }, { "epoch": 2.9997269500318557, "grad_norm": 15.375, "learning_rate": 1.0000003301783765e-06, "loss": 1.603000283241272, "step": 16480 }, { "epoch": 3.0, "grad_norm": 25.75, "learning_rate": 1.0000000366864873e-06, "loss": 1.4978524446487427, "step": 16482 }, { "epoch": 3.0, "step": 16482, "total_flos": 3.229073396012679e+18, "train_loss": 1.3915999769032572, "train_runtime": 20987.8784, "train_samples_per_second": 1.57, "train_steps_per_second": 0.785 } ], "logging_steps": 2, "max_steps": 16482, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 9999999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.229073396012679e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }