{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1962, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0030581039755351682, "grad_norm": 0.6435028314590454, "learning_rate": 1.0101010101010103e-07, "loss": 1.8936554193496704, "step": 2 }, { "epoch": 0.0061162079510703364, "grad_norm": 0.5548882484436035, "learning_rate": 3.0303030303030305e-07, "loss": 1.8550586700439453, "step": 4 }, { "epoch": 0.009174311926605505, "grad_norm": 0.27108362317085266, "learning_rate": 5.05050505050505e-07, "loss": 1.890197992324829, "step": 6 }, { "epoch": 0.012232415902140673, "grad_norm": 0.24754057824611664, "learning_rate": 7.070707070707071e-07, "loss": 1.8445472717285156, "step": 8 }, { "epoch": 0.01529051987767584, "grad_norm": 0.39890649914741516, "learning_rate": 9.090909090909091e-07, "loss": 2.010572910308838, "step": 10 }, { "epoch": 0.01834862385321101, "grad_norm": 0.23249551653862, "learning_rate": 1.111111111111111e-06, "loss": 1.8801705837249756, "step": 12 }, { "epoch": 0.021406727828746176, "grad_norm": 0.4299562871456146, "learning_rate": 1.3131313131313134e-06, "loss": 1.8805203437805176, "step": 14 }, { "epoch": 0.024464831804281346, "grad_norm": 0.5231528282165527, "learning_rate": 1.5151515151515152e-06, "loss": 1.9465537071228027, "step": 16 }, { "epoch": 0.027522935779816515, "grad_norm": 0.3482355773448944, "learning_rate": 1.7171717171717173e-06, "loss": 1.8298053741455078, "step": 18 }, { "epoch": 0.03058103975535168, "grad_norm": 0.3003389239311218, "learning_rate": 1.9191919191919192e-06, "loss": 1.853845238685608, "step": 20 }, { "epoch": 0.03363914373088685, "grad_norm": 0.5087025165557861, "learning_rate": 2.1212121212121216e-06, "loss": 1.9923889636993408, "step": 22 }, { "epoch": 0.03669724770642202, "grad_norm": 2.0046560764312744, "learning_rate": 2.3232323232323234e-06, "loss": 2.008021354675293, "step": 24 }, { "epoch": 0.039755351681957186, "grad_norm": 0.2651369571685791, "learning_rate": 2.5252525252525258e-06, "loss": 1.7058303356170654, "step": 26 }, { "epoch": 0.04281345565749235, "grad_norm": 0.5547925233840942, "learning_rate": 2.7272727272727272e-06, "loss": 1.8821287155151367, "step": 28 }, { "epoch": 0.045871559633027525, "grad_norm": 0.5607280731201172, "learning_rate": 2.9292929292929295e-06, "loss": 2.1788079738616943, "step": 30 }, { "epoch": 0.04892966360856269, "grad_norm": 0.36416563391685486, "learning_rate": 3.131313131313132e-06, "loss": 1.8534326553344727, "step": 32 }, { "epoch": 0.05198776758409786, "grad_norm": 0.4965146481990814, "learning_rate": 3.3333333333333333e-06, "loss": 1.9557833671569824, "step": 34 }, { "epoch": 0.05504587155963303, "grad_norm": 0.3163432776927948, "learning_rate": 3.5353535353535356e-06, "loss": 1.7984235286712646, "step": 36 }, { "epoch": 0.0581039755351682, "grad_norm": 0.3063645362854004, "learning_rate": 3.737373737373738e-06, "loss": 1.8264985084533691, "step": 38 }, { "epoch": 0.06116207951070336, "grad_norm": 0.30639225244522095, "learning_rate": 3.93939393939394e-06, "loss": 1.8241571187973022, "step": 40 }, { "epoch": 0.06422018348623854, "grad_norm": 0.3971042335033417, "learning_rate": 4.141414141414142e-06, "loss": 1.874243974685669, "step": 42 }, { "epoch": 0.0672782874617737, "grad_norm": 0.6156560182571411, "learning_rate": 4.343434343434344e-06, "loss": 1.965466022491455, "step": 44 }, { "epoch": 0.07033639143730887, "grad_norm": 0.5533192753791809, "learning_rate": 4.5454545454545455e-06, "loss": 2.0693740844726562, "step": 46 }, { "epoch": 0.07339449541284404, "grad_norm": 1.9126055240631104, "learning_rate": 4.747474747474748e-06, "loss": 2.060253143310547, "step": 48 }, { "epoch": 0.0764525993883792, "grad_norm": 0.3860923647880554, "learning_rate": 4.94949494949495e-06, "loss": 1.8577625751495361, "step": 50 }, { "epoch": 0.07951070336391437, "grad_norm": 0.4684409499168396, "learning_rate": 5.151515151515152e-06, "loss": 1.8510971069335938, "step": 52 }, { "epoch": 0.08256880733944955, "grad_norm": 0.4307204484939575, "learning_rate": 5.353535353535354e-06, "loss": 1.9931628704071045, "step": 54 }, { "epoch": 0.0856269113149847, "grad_norm": 0.3140373229980469, "learning_rate": 5.555555555555557e-06, "loss": 1.925836443901062, "step": 56 }, { "epoch": 0.08868501529051988, "grad_norm": 0.36317509412765503, "learning_rate": 5.7575757575757586e-06, "loss": 1.9616905450820923, "step": 58 }, { "epoch": 0.09174311926605505, "grad_norm": 0.21478985249996185, "learning_rate": 5.95959595959596e-06, "loss": 1.895378589630127, "step": 60 }, { "epoch": 0.09480122324159021, "grad_norm": 0.2936638593673706, "learning_rate": 6.1616161616161615e-06, "loss": 1.8279492855072021, "step": 62 }, { "epoch": 0.09785932721712538, "grad_norm": 0.3114721179008484, "learning_rate": 6.363636363636364e-06, "loss": 1.715104103088379, "step": 64 }, { "epoch": 0.10091743119266056, "grad_norm": 0.32813334465026855, "learning_rate": 6.565656565656566e-06, "loss": 1.852712631225586, "step": 66 }, { "epoch": 0.10397553516819572, "grad_norm": 0.37994885444641113, "learning_rate": 6.767676767676769e-06, "loss": 1.9753448963165283, "step": 68 }, { "epoch": 0.10703363914373089, "grad_norm": 0.5206537246704102, "learning_rate": 6.969696969696971e-06, "loss": 1.8388103246688843, "step": 70 }, { "epoch": 0.11009174311926606, "grad_norm": 0.6430595517158508, "learning_rate": 7.171717171717172e-06, "loss": 2.0399489402770996, "step": 72 }, { "epoch": 0.11314984709480122, "grad_norm": 0.5809399485588074, "learning_rate": 7.373737373737374e-06, "loss": 2.1389784812927246, "step": 74 }, { "epoch": 0.1162079510703364, "grad_norm": 1.2094364166259766, "learning_rate": 7.5757575757575764e-06, "loss": 1.9202568531036377, "step": 76 }, { "epoch": 0.11926605504587157, "grad_norm": 0.7485645413398743, "learning_rate": 7.77777777777778e-06, "loss": 2.2573585510253906, "step": 78 }, { "epoch": 0.12232415902140673, "grad_norm": 0.47476136684417725, "learning_rate": 7.97979797979798e-06, "loss": 1.8947498798370361, "step": 80 }, { "epoch": 0.12538226299694188, "grad_norm": 0.24537041783332825, "learning_rate": 8.181818181818183e-06, "loss": 1.636450171470642, "step": 82 }, { "epoch": 0.12844036697247707, "grad_norm": 0.4732670783996582, "learning_rate": 8.383838383838384e-06, "loss": 1.818341612815857, "step": 84 }, { "epoch": 0.13149847094801223, "grad_norm": 0.37070026993751526, "learning_rate": 8.585858585858587e-06, "loss": 1.845613718032837, "step": 86 }, { "epoch": 0.1345565749235474, "grad_norm": 0.3881911635398865, "learning_rate": 8.787878787878788e-06, "loss": 1.7559518814086914, "step": 88 }, { "epoch": 0.13761467889908258, "grad_norm": 0.45207998156547546, "learning_rate": 8.98989898989899e-06, "loss": 1.7992792129516602, "step": 90 }, { "epoch": 0.14067278287461774, "grad_norm": 0.1907433420419693, "learning_rate": 9.191919191919193e-06, "loss": 1.8380980491638184, "step": 92 }, { "epoch": 0.1437308868501529, "grad_norm": 0.2265041321516037, "learning_rate": 9.393939393939396e-06, "loss": 1.9353697299957275, "step": 94 }, { "epoch": 0.14678899082568808, "grad_norm": 0.5571039319038391, "learning_rate": 9.595959595959597e-06, "loss": 1.861445665359497, "step": 96 }, { "epoch": 0.14984709480122324, "grad_norm": 0.318570613861084, "learning_rate": 9.797979797979798e-06, "loss": 1.7963485717773438, "step": 98 }, { "epoch": 0.1529051987767584, "grad_norm": 0.35685858130455017, "learning_rate": 1e-05, "loss": 1.955026626586914, "step": 100 }, { "epoch": 0.1559633027522936, "grad_norm": 0.7966809272766113, "learning_rate": 9.99997440729838e-06, "loss": 1.8856327533721924, "step": 102 }, { "epoch": 0.15902140672782875, "grad_norm": 0.2650541663169861, "learning_rate": 9.999897629484621e-06, "loss": 1.814586877822876, "step": 104 }, { "epoch": 0.1620795107033639, "grad_norm": 0.36088353395462036, "learning_rate": 9.999769667432037e-06, "loss": 1.8607715368270874, "step": 106 }, { "epoch": 0.1651376146788991, "grad_norm": 0.6270299553871155, "learning_rate": 9.999590522596136e-06, "loss": 1.9078267812728882, "step": 108 }, { "epoch": 0.16819571865443425, "grad_norm": 0.27504709362983704, "learning_rate": 9.999360197014607e-06, "loss": 1.9029535055160522, "step": 110 }, { "epoch": 0.1712538226299694, "grad_norm": 0.5007109642028809, "learning_rate": 9.999078693307296e-06, "loss": 1.7704020738601685, "step": 112 }, { "epoch": 0.1743119266055046, "grad_norm": 0.5426493883132935, "learning_rate": 9.99874601467618e-06, "loss": 1.8907287120819092, "step": 114 }, { "epoch": 0.17737003058103976, "grad_norm": 0.26077231764793396, "learning_rate": 9.998362164905318e-06, "loss": 1.760542869567871, "step": 116 }, { "epoch": 0.18042813455657492, "grad_norm": 0.37686067819595337, "learning_rate": 9.997927148360824e-06, "loss": 1.995668649673462, "step": 118 }, { "epoch": 0.1834862385321101, "grad_norm": 0.4259154498577118, "learning_rate": 9.99744096999081e-06, "loss": 1.8606561422348022, "step": 120 }, { "epoch": 0.18654434250764526, "grad_norm": 0.3365345299243927, "learning_rate": 9.996903635325326e-06, "loss": 1.909229040145874, "step": 122 }, { "epoch": 0.18960244648318042, "grad_norm": 0.25919589400291443, "learning_rate": 9.996315150476308e-06, "loss": 1.9200305938720703, "step": 124 }, { "epoch": 0.1926605504587156, "grad_norm": 0.2932458221912384, "learning_rate": 9.995675522137492e-06, "loss": 1.8696832656860352, "step": 126 }, { "epoch": 0.19571865443425077, "grad_norm": 0.38474535942077637, "learning_rate": 9.994984757584353e-06, "loss": 1.828667402267456, "step": 128 }, { "epoch": 0.19877675840978593, "grad_norm": 0.3214952349662781, "learning_rate": 9.994242864674021e-06, "loss": 1.8718284368515015, "step": 130 }, { "epoch": 0.2018348623853211, "grad_norm": 0.33034268021583557, "learning_rate": 9.993449851845176e-06, "loss": 1.8226697444915771, "step": 132 }, { "epoch": 0.20489296636085627, "grad_norm": 0.8973183631896973, "learning_rate": 9.992605728117972e-06, "loss": 1.9453703165054321, "step": 134 }, { "epoch": 0.20795107033639143, "grad_norm": 0.6750196218490601, "learning_rate": 9.991710503093923e-06, "loss": 1.820605993270874, "step": 136 }, { "epoch": 0.21100917431192662, "grad_norm": 0.2680327594280243, "learning_rate": 9.990764186955797e-06, "loss": 1.711888074874878, "step": 138 }, { "epoch": 0.21406727828746178, "grad_norm": 0.3089163899421692, "learning_rate": 9.989766790467498e-06, "loss": 1.668878197669983, "step": 140 }, { "epoch": 0.21712538226299694, "grad_norm": 0.5638787746429443, "learning_rate": 9.988718324973947e-06, "loss": 1.7612136602401733, "step": 142 }, { "epoch": 0.22018348623853212, "grad_norm": 0.24349473416805267, "learning_rate": 9.98761880240095e-06, "loss": 1.6873559951782227, "step": 144 }, { "epoch": 0.22324159021406728, "grad_norm": 0.3549518585205078, "learning_rate": 9.986468235255065e-06, "loss": 1.743373990058899, "step": 146 }, { "epoch": 0.22629969418960244, "grad_norm": 0.44438421726226807, "learning_rate": 9.985266636623457e-06, "loss": 1.6509066820144653, "step": 148 }, { "epoch": 0.22935779816513763, "grad_norm": 0.46152663230895996, "learning_rate": 9.984014020173748e-06, "loss": 1.8014967441558838, "step": 150 }, { "epoch": 0.2324159021406728, "grad_norm": 0.278169184923172, "learning_rate": 9.98271040015387e-06, "loss": 1.8622685670852661, "step": 152 }, { "epoch": 0.23547400611620795, "grad_norm": 0.3168479800224304, "learning_rate": 9.981355791391891e-06, "loss": 1.8940097093582153, "step": 154 }, { "epoch": 0.23853211009174313, "grad_norm": 0.3639688491821289, "learning_rate": 9.979950209295855e-06, "loss": 1.7917258739471436, "step": 156 }, { "epoch": 0.2415902140672783, "grad_norm": 0.40860888361930847, "learning_rate": 9.978493669853606e-06, "loss": 1.8766049146652222, "step": 158 }, { "epoch": 0.24464831804281345, "grad_norm": 0.315494179725647, "learning_rate": 9.976986189632597e-06, "loss": 1.7932193279266357, "step": 160 }, { "epoch": 0.24770642201834864, "grad_norm": 0.3525390923023224, "learning_rate": 9.975427785779717e-06, "loss": 1.9470767974853516, "step": 162 }, { "epoch": 0.25076452599388377, "grad_norm": 0.33575552701950073, "learning_rate": 9.97381847602108e-06, "loss": 1.7163609266281128, "step": 164 }, { "epoch": 0.25382262996941896, "grad_norm": 1.193529725074768, "learning_rate": 9.972158278661838e-06, "loss": 1.877960205078125, "step": 166 }, { "epoch": 0.25688073394495414, "grad_norm": 0.348765105009079, "learning_rate": 9.970447212585961e-06, "loss": 1.6149842739105225, "step": 168 }, { "epoch": 0.2599388379204893, "grad_norm": 0.5527969598770142, "learning_rate": 9.968685297256027e-06, "loss": 1.8597733974456787, "step": 170 }, { "epoch": 0.26299694189602446, "grad_norm": 0.656193196773529, "learning_rate": 9.966872552713006e-06, "loss": 1.5253994464874268, "step": 172 }, { "epoch": 0.26605504587155965, "grad_norm": 0.7701634764671326, "learning_rate": 9.965008999576018e-06, "loss": 1.5178442001342773, "step": 174 }, { "epoch": 0.2691131498470948, "grad_norm": 0.3889455795288086, "learning_rate": 9.963094659042113e-06, "loss": 1.7432003021240234, "step": 176 }, { "epoch": 0.27217125382262997, "grad_norm": 0.7660208344459534, "learning_rate": 9.961129552886024e-06, "loss": 1.655880331993103, "step": 178 }, { "epoch": 0.27522935779816515, "grad_norm": 0.7760636210441589, "learning_rate": 9.959113703459917e-06, "loss": 1.9860963821411133, "step": 180 }, { "epoch": 0.2782874617737003, "grad_norm": 1.5110101699829102, "learning_rate": 9.957047133693141e-06, "loss": 1.9139325618743896, "step": 182 }, { "epoch": 0.28134556574923547, "grad_norm": 1.1153804063796997, "learning_rate": 9.954929867091961e-06, "loss": 1.7500460147857666, "step": 184 }, { "epoch": 0.28440366972477066, "grad_norm": 0.3268054723739624, "learning_rate": 9.952761927739303e-06, "loss": 1.5284479856491089, "step": 186 }, { "epoch": 0.2874617737003058, "grad_norm": 0.2701658308506012, "learning_rate": 9.95054334029446e-06, "loss": 1.5575287342071533, "step": 188 }, { "epoch": 0.290519877675841, "grad_norm": 0.5897979140281677, "learning_rate": 9.948274129992838e-06, "loss": 1.5360642671585083, "step": 190 }, { "epoch": 0.29357798165137616, "grad_norm": 3.0125443935394287, "learning_rate": 9.945954322645643e-06, "loss": 1.7250124216079712, "step": 192 }, { "epoch": 0.2966360856269113, "grad_norm": 0.22849687933921814, "learning_rate": 9.9435839446396e-06, "loss": 1.7317864894866943, "step": 194 }, { "epoch": 0.2996941896024465, "grad_norm": 0.41497474908828735, "learning_rate": 9.941163022936659e-06, "loss": 1.7118513584136963, "step": 196 }, { "epoch": 0.30275229357798167, "grad_norm": 0.43153518438339233, "learning_rate": 9.938691585073677e-06, "loss": 1.4813673496246338, "step": 198 }, { "epoch": 0.3058103975535168, "grad_norm": 0.2877158522605896, "learning_rate": 9.936169659162105e-06, "loss": 1.5152385234832764, "step": 200 }, { "epoch": 0.308868501529052, "grad_norm": 0.319741427898407, "learning_rate": 9.933597273887676e-06, "loss": 1.657623291015625, "step": 202 }, { "epoch": 0.3119266055045872, "grad_norm": 0.4885481894016266, "learning_rate": 9.930974458510074e-06, "loss": 1.8340609073638916, "step": 204 }, { "epoch": 0.3149847094801223, "grad_norm": 0.3470771312713623, "learning_rate": 9.9283012428626e-06, "loss": 1.8779006004333496, "step": 206 }, { "epoch": 0.3180428134556575, "grad_norm": 0.21095849573612213, "learning_rate": 9.92557765735184e-06, "loss": 1.946405053138733, "step": 208 }, { "epoch": 0.3211009174311927, "grad_norm": 0.4015672504901886, "learning_rate": 9.922803732957309e-06, "loss": 1.5457347631454468, "step": 210 }, { "epoch": 0.3241590214067278, "grad_norm": 0.2712498903274536, "learning_rate": 9.919979501231102e-06, "loss": 1.6519064903259277, "step": 212 }, { "epoch": 0.327217125382263, "grad_norm": 0.24934278428554535, "learning_rate": 9.917104994297543e-06, "loss": 1.4617292881011963, "step": 214 }, { "epoch": 0.3302752293577982, "grad_norm": 0.22483140230178833, "learning_rate": 9.914180244852804e-06, "loss": 1.3875129222869873, "step": 216 }, { "epoch": 0.3333333333333333, "grad_norm": 0.6217460632324219, "learning_rate": 9.911205286164553e-06, "loss": 1.8669204711914062, "step": 218 }, { "epoch": 0.3363914373088685, "grad_norm": 0.4357741177082062, "learning_rate": 9.908180152071553e-06, "loss": 1.666574239730835, "step": 220 }, { "epoch": 0.3394495412844037, "grad_norm": 0.29025763273239136, "learning_rate": 9.9051048769833e-06, "loss": 1.810868263244629, "step": 222 }, { "epoch": 0.3425076452599388, "grad_norm": 0.7838276624679565, "learning_rate": 9.901979495879612e-06, "loss": 1.3125014305114746, "step": 224 }, { "epoch": 0.345565749235474, "grad_norm": 0.2543538212776184, "learning_rate": 9.898804044310245e-06, "loss": 1.6106175184249878, "step": 226 }, { "epoch": 0.3486238532110092, "grad_norm": 0.4557286500930786, "learning_rate": 9.89557855839448e-06, "loss": 1.886078953742981, "step": 228 }, { "epoch": 0.3516819571865443, "grad_norm": 0.2689090073108673, "learning_rate": 9.892303074820712e-06, "loss": 1.631593108177185, "step": 230 }, { "epoch": 0.3547400611620795, "grad_norm": 0.25291207432746887, "learning_rate": 9.888977630846048e-06, "loss": 1.7156798839569092, "step": 232 }, { "epoch": 0.3577981651376147, "grad_norm": 0.3357708752155304, "learning_rate": 9.88560226429586e-06, "loss": 1.6416988372802734, "step": 234 }, { "epoch": 0.36085626911314983, "grad_norm": 0.3246925473213196, "learning_rate": 9.88217701356337e-06, "loss": 1.5658977031707764, "step": 236 }, { "epoch": 0.363914373088685, "grad_norm": 0.2840614318847656, "learning_rate": 9.878701917609208e-06, "loss": 1.6534138917922974, "step": 238 }, { "epoch": 0.3669724770642202, "grad_norm": 0.5397573709487915, "learning_rate": 9.875177015960973e-06, "loss": 1.7614964246749878, "step": 240 }, { "epoch": 0.37003058103975534, "grad_norm": 0.28763291239738464, "learning_rate": 9.871602348712777e-06, "loss": 1.5937902927398682, "step": 242 }, { "epoch": 0.3730886850152905, "grad_norm": 0.21111302077770233, "learning_rate": 9.867977956524798e-06, "loss": 1.6914631128311157, "step": 244 }, { "epoch": 0.3761467889908257, "grad_norm": 0.5114771723747253, "learning_rate": 9.864303880622806e-06, "loss": 1.8919175863265991, "step": 246 }, { "epoch": 0.37920489296636084, "grad_norm": 0.4698966145515442, "learning_rate": 9.8605801627977e-06, "loss": 2.395404815673828, "step": 248 }, { "epoch": 0.382262996941896, "grad_norm": 0.604468047618866, "learning_rate": 9.85680684540504e-06, "loss": 1.523594617843628, "step": 250 }, { "epoch": 0.3853211009174312, "grad_norm": 0.295039564371109, "learning_rate": 9.852983971364549e-06, "loss": 1.520268440246582, "step": 252 }, { "epoch": 0.38837920489296635, "grad_norm": 0.2590586245059967, "learning_rate": 9.84911158415964e-06, "loss": 1.5712318420410156, "step": 254 }, { "epoch": 0.39143730886850153, "grad_norm": 0.9178432822227478, "learning_rate": 9.845189727836914e-06, "loss": 1.7512378692626953, "step": 256 }, { "epoch": 0.3944954128440367, "grad_norm": 0.512359619140625, "learning_rate": 9.841218447005657e-06, "loss": 1.677209496498108, "step": 258 }, { "epoch": 0.39755351681957185, "grad_norm": 0.8242136240005493, "learning_rate": 9.837197786837341e-06, "loss": 1.52079439163208, "step": 260 }, { "epoch": 0.40061162079510704, "grad_norm": 0.5057528614997864, "learning_rate": 9.833127793065098e-06, "loss": 1.3776154518127441, "step": 262 }, { "epoch": 0.4036697247706422, "grad_norm": 0.287590891122818, "learning_rate": 9.829008511983214e-06, "loss": 1.313464879989624, "step": 264 }, { "epoch": 0.40672782874617736, "grad_norm": 0.22291725873947144, "learning_rate": 9.82483999044659e-06, "loss": 1.4770923852920532, "step": 266 }, { "epoch": 0.40978593272171254, "grad_norm": 0.4278978109359741, "learning_rate": 9.820622275870219e-06, "loss": 1.713256597518921, "step": 268 }, { "epoch": 0.41284403669724773, "grad_norm": 0.7735996246337891, "learning_rate": 9.816355416228636e-06, "loss": 1.7301435470581055, "step": 270 }, { "epoch": 0.41590214067278286, "grad_norm": 0.36943763494491577, "learning_rate": 9.812039460055383e-06, "loss": 1.746875286102295, "step": 272 }, { "epoch": 0.41896024464831805, "grad_norm": 0.30427658557891846, "learning_rate": 9.807674456442448e-06, "loss": 1.7644126415252686, "step": 274 }, { "epoch": 0.42201834862385323, "grad_norm": 0.2680354416370392, "learning_rate": 9.80326045503972e-06, "loss": 1.6075056791305542, "step": 276 }, { "epoch": 0.42507645259938837, "grad_norm": 0.5165081024169922, "learning_rate": 9.798797506054398e-06, "loss": 1.7466685771942139, "step": 278 }, { "epoch": 0.42813455657492355, "grad_norm": 0.46960580348968506, "learning_rate": 9.794285660250457e-06, "loss": 1.6852364540100098, "step": 280 }, { "epoch": 0.43119266055045874, "grad_norm": 0.3378291130065918, "learning_rate": 9.789724968948034e-06, "loss": 1.5493333339691162, "step": 282 }, { "epoch": 0.43425076452599387, "grad_norm": 0.2972247004508972, "learning_rate": 9.78511548402287e-06, "loss": 1.5161151885986328, "step": 284 }, { "epoch": 0.43730886850152906, "grad_norm": 0.3610173165798187, "learning_rate": 9.780457257905708e-06, "loss": 1.698796272277832, "step": 286 }, { "epoch": 0.44036697247706424, "grad_norm": 0.4165475070476532, "learning_rate": 9.775750343581702e-06, "loss": 1.4344041347503662, "step": 288 }, { "epoch": 0.4434250764525994, "grad_norm": 0.565291702747345, "learning_rate": 9.770994794589804e-06, "loss": 1.6736053228378296, "step": 290 }, { "epoch": 0.44648318042813456, "grad_norm": 0.22272102534770966, "learning_rate": 9.766190665022173e-06, "loss": 1.515446424484253, "step": 292 }, { "epoch": 0.44954128440366975, "grad_norm": 0.292961061000824, "learning_rate": 9.761338009523542e-06, "loss": 1.5677558183670044, "step": 294 }, { "epoch": 0.4525993883792049, "grad_norm": 0.22576913237571716, "learning_rate": 9.756436883290608e-06, "loss": 1.6895636320114136, "step": 296 }, { "epoch": 0.45565749235474007, "grad_norm": 0.514447808265686, "learning_rate": 9.751487342071394e-06, "loss": 1.6961359977722168, "step": 298 }, { "epoch": 0.45871559633027525, "grad_norm": 0.4707038402557373, "learning_rate": 9.74648944216463e-06, "loss": 1.5364969968795776, "step": 300 }, { "epoch": 0.4617737003058104, "grad_norm": 0.3324492871761322, "learning_rate": 9.741443240419096e-06, "loss": 1.4445494413375854, "step": 302 }, { "epoch": 0.4648318042813456, "grad_norm": 0.40139055252075195, "learning_rate": 9.736348794232986e-06, "loss": 1.631695032119751, "step": 304 }, { "epoch": 0.46788990825688076, "grad_norm": 0.32826143503189087, "learning_rate": 9.731206161553253e-06, "loss": 1.5630545616149902, "step": 306 }, { "epoch": 0.4709480122324159, "grad_norm": 0.7137564420700073, "learning_rate": 9.726015400874945e-06, "loss": 1.7077264785766602, "step": 308 }, { "epoch": 0.4740061162079511, "grad_norm": 0.5834897756576538, "learning_rate": 9.72077657124055e-06, "loss": 1.541429877281189, "step": 310 }, { "epoch": 0.47706422018348627, "grad_norm": 0.30517715215682983, "learning_rate": 9.715489732239309e-06, "loss": 1.486952781677246, "step": 312 }, { "epoch": 0.4801223241590214, "grad_norm": 0.39915895462036133, "learning_rate": 9.710154944006558e-06, "loss": 1.4761033058166504, "step": 314 }, { "epoch": 0.4831804281345566, "grad_norm": 0.24902665615081787, "learning_rate": 9.70477226722302e-06, "loss": 1.555905818939209, "step": 316 }, { "epoch": 0.48623853211009177, "grad_norm": 0.27528202533721924, "learning_rate": 9.699341763114142e-06, "loss": 1.5418330430984497, "step": 318 }, { "epoch": 0.4892966360856269, "grad_norm": 0.37373027205467224, "learning_rate": 9.693863493449376e-06, "loss": 1.5460388660430908, "step": 320 }, { "epoch": 0.4923547400611621, "grad_norm": 0.3926723301410675, "learning_rate": 9.688337520541487e-06, "loss": 1.7003178596496582, "step": 322 }, { "epoch": 0.4954128440366973, "grad_norm": 0.2708083987236023, "learning_rate": 9.68276390724584e-06, "loss": 1.8639323711395264, "step": 324 }, { "epoch": 0.4984709480122324, "grad_norm": 0.3522673547267914, "learning_rate": 9.67714271695969e-06, "loss": 1.7603111267089844, "step": 326 }, { "epoch": 0.5015290519877675, "grad_norm": 0.2736775279045105, "learning_rate": 9.671474013621461e-06, "loss": 1.7426960468292236, "step": 328 }, { "epoch": 0.5045871559633027, "grad_norm": 0.34006989002227783, "learning_rate": 9.665757861710008e-06, "loss": 1.6802008152008057, "step": 330 }, { "epoch": 0.5076452599388379, "grad_norm": 0.7181631922721863, "learning_rate": 9.659994326243897e-06, "loss": 1.3610038757324219, "step": 332 }, { "epoch": 0.5107033639143731, "grad_norm": 0.3209435045719147, "learning_rate": 9.654183472780655e-06, "loss": 1.3310749530792236, "step": 334 }, { "epoch": 0.5137614678899083, "grad_norm": 0.3394523561000824, "learning_rate": 9.64832536741604e-06, "loss": 1.7552449703216553, "step": 336 }, { "epoch": 0.5168195718654435, "grad_norm": 0.26636433601379395, "learning_rate": 9.642420076783266e-06, "loss": 1.7648036479949951, "step": 338 }, { "epoch": 0.5198776758409785, "grad_norm": 0.4860476553440094, "learning_rate": 9.636467668052263e-06, "loss": 1.8371148109436035, "step": 340 }, { "epoch": 0.5229357798165137, "grad_norm": 0.3957999050617218, "learning_rate": 9.630468208928906e-06, "loss": 1.7691468000411987, "step": 342 }, { "epoch": 0.5259938837920489, "grad_norm": 0.29553869366645813, "learning_rate": 9.624421767654247e-06, "loss": 1.8050150871276855, "step": 344 }, { "epoch": 0.5290519877675841, "grad_norm": 0.8523488640785217, "learning_rate": 9.618328413003742e-06, "loss": 1.7548258304595947, "step": 346 }, { "epoch": 0.5321100917431193, "grad_norm": 0.30288758873939514, "learning_rate": 9.612188214286457e-06, "loss": 1.652245044708252, "step": 348 }, { "epoch": 0.5351681957186545, "grad_norm": 0.44331154227256775, "learning_rate": 9.606001241344293e-06, "loss": 1.5749201774597168, "step": 350 }, { "epoch": 0.5382262996941896, "grad_norm": 0.3775594234466553, "learning_rate": 9.599767564551185e-06, "loss": 1.8136138916015625, "step": 352 }, { "epoch": 0.5412844036697247, "grad_norm": 0.6260164976119995, "learning_rate": 9.593487254812298e-06, "loss": 1.753260850906372, "step": 354 }, { "epoch": 0.5443425076452599, "grad_norm": 0.21940867602825165, "learning_rate": 9.587160383563235e-06, "loss": 1.2595834732055664, "step": 356 }, { "epoch": 0.5474006116207951, "grad_norm": 0.45921286940574646, "learning_rate": 9.580787022769205e-06, "loss": 1.8687834739685059, "step": 358 }, { "epoch": 0.5504587155963303, "grad_norm": 0.25323811173439026, "learning_rate": 9.574367244924216e-06, "loss": 1.87260901927948, "step": 360 }, { "epoch": 0.5535168195718655, "grad_norm": 0.3825606405735016, "learning_rate": 9.567901123050255e-06, "loss": 1.9380344152450562, "step": 362 }, { "epoch": 0.5565749235474006, "grad_norm": 0.8433843851089478, "learning_rate": 9.56138873069644e-06, "loss": 1.854411005973816, "step": 364 }, { "epoch": 0.5596330275229358, "grad_norm": 0.5623306035995483, "learning_rate": 9.554830141938201e-06, "loss": 1.8307363986968994, "step": 366 }, { "epoch": 0.5626911314984709, "grad_norm": 0.5833460688591003, "learning_rate": 9.54822543137643e-06, "loss": 1.691839575767517, "step": 368 }, { "epoch": 0.5657492354740061, "grad_norm": 0.7582941651344299, "learning_rate": 9.541574674136634e-06, "loss": 1.5816738605499268, "step": 370 }, { "epoch": 0.5688073394495413, "grad_norm": 0.5991274118423462, "learning_rate": 9.534877945868075e-06, "loss": 1.141850471496582, "step": 372 }, { "epoch": 0.5718654434250765, "grad_norm": 0.27493157982826233, "learning_rate": 9.528135322742916e-06, "loss": 1.1190171241760254, "step": 374 }, { "epoch": 0.5749235474006116, "grad_norm": 0.20014670491218567, "learning_rate": 9.521346881455356e-06, "loss": 1.4172542095184326, "step": 376 }, { "epoch": 0.5779816513761468, "grad_norm": 0.45737189054489136, "learning_rate": 9.514512699220751e-06, "loss": 1.3267741203308105, "step": 378 }, { "epoch": 0.581039755351682, "grad_norm": 0.342574805021286, "learning_rate": 9.507632853774738e-06, "loss": 1.2848198413848877, "step": 380 }, { "epoch": 0.5840978593272171, "grad_norm": 0.2764483690261841, "learning_rate": 9.500707423372354e-06, "loss": 1.2696105241775513, "step": 382 }, { "epoch": 0.5871559633027523, "grad_norm": 0.5538342595100403, "learning_rate": 9.493736486787145e-06, "loss": 1.5733320713043213, "step": 384 }, { "epoch": 0.5902140672782875, "grad_norm": 0.5002435445785522, "learning_rate": 9.486720123310264e-06, "loss": 1.4811735153198242, "step": 386 }, { "epoch": 0.5932721712538226, "grad_norm": 0.2729179561138153, "learning_rate": 9.479658412749575e-06, "loss": 1.2759473323822021, "step": 388 }, { "epoch": 0.5963302752293578, "grad_norm": 0.422869473695755, "learning_rate": 9.472551435428751e-06, "loss": 1.6186537742614746, "step": 390 }, { "epoch": 0.599388379204893, "grad_norm": 0.18889868259429932, "learning_rate": 9.465399272186341e-06, "loss": 1.5904256105422974, "step": 392 }, { "epoch": 0.6024464831804281, "grad_norm": 0.4715130925178528, "learning_rate": 9.458202004374875e-06, "loss": 1.3664047718048096, "step": 394 }, { "epoch": 0.6055045871559633, "grad_norm": 0.3192538321018219, "learning_rate": 9.450959713859918e-06, "loss": 1.5540097951889038, "step": 396 }, { "epoch": 0.6085626911314985, "grad_norm": 0.48479557037353516, "learning_rate": 9.443672483019146e-06, "loss": 1.7298085689544678, "step": 398 }, { "epoch": 0.6116207951070336, "grad_norm": 0.40212106704711914, "learning_rate": 9.436340394741424e-06, "loss": 1.2515219449996948, "step": 400 }, { "epoch": 0.6146788990825688, "grad_norm": 0.31416311860084534, "learning_rate": 9.428963532425832e-06, "loss": 1.5272061824798584, "step": 402 }, { "epoch": 0.617737003058104, "grad_norm": 0.39595550298690796, "learning_rate": 9.421541979980743e-06, "loss": 1.584099531173706, "step": 404 }, { "epoch": 0.6207951070336392, "grad_norm": 0.3684428632259369, "learning_rate": 9.414075821822862e-06, "loss": 1.5516374111175537, "step": 406 }, { "epoch": 0.6238532110091743, "grad_norm": 0.2936325669288635, "learning_rate": 9.406565142876252e-06, "loss": 1.3937046527862549, "step": 408 }, { "epoch": 0.6269113149847095, "grad_norm": 0.8210769295692444, "learning_rate": 9.399010028571394e-06, "loss": 1.0384480953216553, "step": 410 }, { "epoch": 0.6299694189602446, "grad_norm": 0.31836938858032227, "learning_rate": 9.391410564844189e-06, "loss": 1.6605589389801025, "step": 412 }, { "epoch": 0.6330275229357798, "grad_norm": 0.4151877164840698, "learning_rate": 9.383766838134997e-06, "loss": 1.5902981758117676, "step": 414 }, { "epoch": 0.636085626911315, "grad_norm": 0.29467517137527466, "learning_rate": 9.376078935387647e-06, "loss": 1.511544942855835, "step": 416 }, { "epoch": 0.6391437308868502, "grad_norm": 0.4552344083786011, "learning_rate": 9.36834694404845e-06, "loss": 1.6092697381973267, "step": 418 }, { "epoch": 0.6422018348623854, "grad_norm": 0.3086092174053192, "learning_rate": 9.360570952065205e-06, "loss": 1.5458872318267822, "step": 420 }, { "epoch": 0.6452599388379205, "grad_norm": 0.29464077949523926, "learning_rate": 9.3527510478862e-06, "loss": 1.5201151371002197, "step": 422 }, { "epoch": 0.6483180428134556, "grad_norm": 0.35874319076538086, "learning_rate": 9.3448873204592e-06, "loss": 1.7184113264083862, "step": 424 }, { "epoch": 0.6513761467889908, "grad_norm": 0.6177545189857483, "learning_rate": 9.336979859230438e-06, "loss": 1.425230860710144, "step": 426 }, { "epoch": 0.654434250764526, "grad_norm": 0.4207315742969513, "learning_rate": 9.329028754143606e-06, "loss": 1.1580491065979004, "step": 428 }, { "epoch": 0.6574923547400612, "grad_norm": 0.40215086936950684, "learning_rate": 9.321034095638816e-06, "loss": 1.776092767715454, "step": 430 }, { "epoch": 0.6605504587155964, "grad_norm": 0.48207205533981323, "learning_rate": 9.312995974651581e-06, "loss": 1.5432982444763184, "step": 432 }, { "epoch": 0.6636085626911316, "grad_norm": 0.9188543558120728, "learning_rate": 9.304914482611788e-06, "loss": 1.6913204193115234, "step": 434 }, { "epoch": 0.6666666666666666, "grad_norm": 2.0712273120880127, "learning_rate": 9.296789711442641e-06, "loss": 1.5286757946014404, "step": 436 }, { "epoch": 0.6697247706422018, "grad_norm": 0.4487042725086212, "learning_rate": 9.288621753559624e-06, "loss": 1.7271997928619385, "step": 438 }, { "epoch": 0.672782874617737, "grad_norm": 0.4550405442714691, "learning_rate": 9.280410701869456e-06, "loss": 1.5852614641189575, "step": 440 }, { "epoch": 0.6758409785932722, "grad_norm": 0.8099808692932129, "learning_rate": 9.27215664976902e-06, "loss": 1.6332128047943115, "step": 442 }, { "epoch": 0.6788990825688074, "grad_norm": 0.5566719174385071, "learning_rate": 9.263859691144315e-06, "loss": 1.5285072326660156, "step": 444 }, { "epoch": 0.6819571865443425, "grad_norm": 0.3996361196041107, "learning_rate": 9.25551992036938e-06, "loss": 1.181262731552124, "step": 446 }, { "epoch": 0.6850152905198776, "grad_norm": 0.7320879697799683, "learning_rate": 9.247137432305221e-06, "loss": 1.6381134986877441, "step": 448 }, { "epoch": 0.6880733944954128, "grad_norm": 0.5473281741142273, "learning_rate": 9.238712322298733e-06, "loss": 1.623387098312378, "step": 450 }, { "epoch": 0.691131498470948, "grad_norm": 0.2673215866088867, "learning_rate": 9.230244686181616e-06, "loss": 1.6147091388702393, "step": 452 }, { "epoch": 0.6941896024464832, "grad_norm": 0.41044941544532776, "learning_rate": 9.22173462026929e-06, "loss": 1.6174466609954834, "step": 454 }, { "epoch": 0.6972477064220184, "grad_norm": 0.3210803270339966, "learning_rate": 9.213182221359785e-06, "loss": 1.4634352922439575, "step": 456 }, { "epoch": 0.7003058103975535, "grad_norm": 0.4366549551486969, "learning_rate": 9.204587586732653e-06, "loss": 1.6598728895187378, "step": 458 }, { "epoch": 0.7033639143730887, "grad_norm": 0.6817240118980408, "learning_rate": 9.195950814147862e-06, "loss": 1.7457971572875977, "step": 460 }, { "epoch": 0.7064220183486238, "grad_norm": 1.429196834564209, "learning_rate": 9.187272001844673e-06, "loss": 1.4895765781402588, "step": 462 }, { "epoch": 0.709480122324159, "grad_norm": 0.33415424823760986, "learning_rate": 9.178551248540534e-06, "loss": 1.7249622344970703, "step": 464 }, { "epoch": 0.7125382262996942, "grad_norm": 0.5185303092002869, "learning_rate": 9.169788653429949e-06, "loss": 1.5071038007736206, "step": 466 }, { "epoch": 0.7155963302752294, "grad_norm": 0.703040599822998, "learning_rate": 9.160984316183354e-06, "loss": 1.6332056522369385, "step": 468 }, { "epoch": 0.7186544342507645, "grad_norm": 0.2760729491710663, "learning_rate": 9.152138336945985e-06, "loss": 1.5567004680633545, "step": 470 }, { "epoch": 0.7217125382262997, "grad_norm": 0.26987555623054504, "learning_rate": 9.143250816336733e-06, "loss": 1.6896016597747803, "step": 472 }, { "epoch": 0.7247706422018348, "grad_norm": 0.4577353894710541, "learning_rate": 9.134321855447004e-06, "loss": 1.780794620513916, "step": 474 }, { "epoch": 0.72782874617737, "grad_norm": 0.3506152629852295, "learning_rate": 9.125351555839568e-06, "loss": 1.676330327987671, "step": 476 }, { "epoch": 0.7308868501529052, "grad_norm": 0.3420753479003906, "learning_rate": 9.116340019547403e-06, "loss": 1.53602933883667, "step": 478 }, { "epoch": 0.7339449541284404, "grad_norm": 0.615734875202179, "learning_rate": 9.107287349072535e-06, "loss": 1.6315178871154785, "step": 480 }, { "epoch": 0.7370030581039755, "grad_norm": 0.3383826017379761, "learning_rate": 9.098193647384872e-06, "loss": 1.646344542503357, "step": 482 }, { "epoch": 0.7400611620795107, "grad_norm": 0.40700384974479675, "learning_rate": 9.089059017921034e-06, "loss": 1.6499868631362915, "step": 484 }, { "epoch": 0.7431192660550459, "grad_norm": 0.4302765727043152, "learning_rate": 9.079883564583176e-06, "loss": 1.6223028898239136, "step": 486 }, { "epoch": 0.746177370030581, "grad_norm": 0.2995837330818176, "learning_rate": 9.070667391737804e-06, "loss": 1.639768123626709, "step": 488 }, { "epoch": 0.7492354740061162, "grad_norm": 0.3183751702308655, "learning_rate": 9.061410604214588e-06, "loss": 1.4172444343566895, "step": 490 }, { "epoch": 0.7522935779816514, "grad_norm": 0.41883519291877747, "learning_rate": 9.052113307305178e-06, "loss": 1.5172092914581299, "step": 492 }, { "epoch": 0.7553516819571865, "grad_norm": 0.4170067310333252, "learning_rate": 9.04277560676199e-06, "loss": 1.4581788778305054, "step": 494 }, { "epoch": 0.7584097859327217, "grad_norm": 0.4589844346046448, "learning_rate": 9.033397608797015e-06, "loss": 1.5675625801086426, "step": 496 }, { "epoch": 0.7614678899082569, "grad_norm": 0.4775915741920471, "learning_rate": 9.023979420080614e-06, "loss": 1.5760972499847412, "step": 498 }, { "epoch": 0.764525993883792, "grad_norm": 0.4255703389644623, "learning_rate": 9.014521147740295e-06, "loss": 1.4211878776550293, "step": 500 }, { "epoch": 0.7675840978593272, "grad_norm": 0.2350740283727646, "learning_rate": 9.005022899359498e-06, "loss": 1.0600173473358154, "step": 502 }, { "epoch": 0.7706422018348624, "grad_norm": 0.25523892045021057, "learning_rate": 8.995484782976372e-06, "loss": 1.3498680591583252, "step": 504 }, { "epoch": 0.7737003058103975, "grad_norm": 0.25793585181236267, "learning_rate": 8.985906907082548e-06, "loss": 1.4128957986831665, "step": 506 }, { "epoch": 0.7767584097859327, "grad_norm": 0.2672351002693176, "learning_rate": 8.9762893806219e-06, "loss": 1.4579813480377197, "step": 508 }, { "epoch": 0.7798165137614679, "grad_norm": 0.3467871844768524, "learning_rate": 8.96663231298931e-06, "loss": 1.469613790512085, "step": 510 }, { "epoch": 0.7828746177370031, "grad_norm": 0.2631012797355652, "learning_rate": 8.956935814029426e-06, "loss": 1.5352952480316162, "step": 512 }, { "epoch": 0.7859327217125383, "grad_norm": 0.42967817187309265, "learning_rate": 8.947199994035402e-06, "loss": 1.448859691619873, "step": 514 }, { "epoch": 0.7889908256880734, "grad_norm": 0.18720397353172302, "learning_rate": 8.937424963747656e-06, "loss": 1.4682276248931885, "step": 516 }, { "epoch": 0.7920489296636085, "grad_norm": 0.2571136951446533, "learning_rate": 8.9276108343526e-06, "loss": 1.430220365524292, "step": 518 }, { "epoch": 0.7951070336391437, "grad_norm": 0.49666231870651245, "learning_rate": 8.917757717481388e-06, "loss": 1.4388704299926758, "step": 520 }, { "epoch": 0.7981651376146789, "grad_norm": 0.18454308807849884, "learning_rate": 8.90786572520863e-06, "loss": 1.3887765407562256, "step": 522 }, { "epoch": 0.8012232415902141, "grad_norm": 0.19775497913360596, "learning_rate": 8.897934970051128e-06, "loss": 1.4397857189178467, "step": 524 }, { "epoch": 0.8042813455657493, "grad_norm": 0.24946311116218567, "learning_rate": 8.8879655649666e-06, "loss": 1.3772547245025635, "step": 526 }, { "epoch": 0.8073394495412844, "grad_norm": 0.1347188949584961, "learning_rate": 8.877957623352376e-06, "loss": 1.2148081064224243, "step": 528 }, { "epoch": 0.8103975535168195, "grad_norm": 0.17375752329826355, "learning_rate": 8.867911259044134e-06, "loss": 1.2351716756820679, "step": 530 }, { "epoch": 0.8134556574923547, "grad_norm": 0.12528319656848907, "learning_rate": 8.857826586314586e-06, "loss": 1.0168347358703613, "step": 532 }, { "epoch": 0.8165137614678899, "grad_norm": 0.22279202938079834, "learning_rate": 8.847703719872184e-06, "loss": 1.3256959915161133, "step": 534 }, { "epoch": 0.8195718654434251, "grad_norm": 0.22974777221679688, "learning_rate": 8.837542774859819e-06, "loss": 1.3868855237960815, "step": 536 }, { "epoch": 0.8226299694189603, "grad_norm": 0.2833384871482849, "learning_rate": 8.827343866853505e-06, "loss": 1.4037737846374512, "step": 538 }, { "epoch": 0.8256880733944955, "grad_norm": 0.20462170243263245, "learning_rate": 8.817107111861068e-06, "loss": 1.3688358068466187, "step": 540 }, { "epoch": 0.8287461773700305, "grad_norm": 0.21328498423099518, "learning_rate": 8.806832626320828e-06, "loss": 1.3812446594238281, "step": 542 }, { "epoch": 0.8318042813455657, "grad_norm": 0.2749079465866089, "learning_rate": 8.796520527100268e-06, "loss": 1.3695695400238037, "step": 544 }, { "epoch": 0.8348623853211009, "grad_norm": 0.17869983613491058, "learning_rate": 8.786170931494714e-06, "loss": 1.3381950855255127, "step": 546 }, { "epoch": 0.8379204892966361, "grad_norm": 0.23981167376041412, "learning_rate": 8.775783957225991e-06, "loss": 1.409177541732788, "step": 548 }, { "epoch": 0.8409785932721713, "grad_norm": 0.4634632170200348, "learning_rate": 8.765359722441096e-06, "loss": 1.3826044797897339, "step": 550 }, { "epoch": 0.8440366972477065, "grad_norm": 0.19470739364624023, "learning_rate": 8.754898345710839e-06, "loss": 1.3529078960418701, "step": 552 }, { "epoch": 0.8470948012232415, "grad_norm": 0.21753935515880585, "learning_rate": 8.744399946028506e-06, "loss": 1.3324353694915771, "step": 554 }, { "epoch": 0.8501529051987767, "grad_norm": 0.24797090888023376, "learning_rate": 8.733864642808505e-06, "loss": 1.3469841480255127, "step": 556 }, { "epoch": 0.8532110091743119, "grad_norm": 0.2123066782951355, "learning_rate": 8.723292555884997e-06, "loss": 1.343614101409912, "step": 558 }, { "epoch": 0.8562691131498471, "grad_norm": 0.25072529911994934, "learning_rate": 8.712683805510547e-06, "loss": 1.305376648902893, "step": 560 }, { "epoch": 0.8593272171253823, "grad_norm": 0.3219304382801056, "learning_rate": 8.702038512354746e-06, "loss": 1.3584821224212646, "step": 562 }, { "epoch": 0.8623853211009175, "grad_norm": 0.3253892660140991, "learning_rate": 8.691356797502846e-06, "loss": 1.3929443359375, "step": 564 }, { "epoch": 0.8654434250764526, "grad_norm": 0.22387385368347168, "learning_rate": 8.680638782454373e-06, "loss": 1.3898614645004272, "step": 566 }, { "epoch": 0.8685015290519877, "grad_norm": 0.2767902612686157, "learning_rate": 8.669884589121756e-06, "loss": 1.3842121362686157, "step": 568 }, { "epoch": 0.8715596330275229, "grad_norm": 0.2403760552406311, "learning_rate": 8.659094339828934e-06, "loss": 1.3873755931854248, "step": 570 }, { "epoch": 0.8746177370030581, "grad_norm": 0.30079615116119385, "learning_rate": 8.648268157309964e-06, "loss": 1.3781442642211914, "step": 572 }, { "epoch": 0.8776758409785933, "grad_norm": 0.24510778486728668, "learning_rate": 8.637406164707628e-06, "loss": 1.4003241062164307, "step": 574 }, { "epoch": 0.8807339449541285, "grad_norm": 0.19053591787815094, "learning_rate": 8.62650848557203e-06, "loss": 1.318782091140747, "step": 576 }, { "epoch": 0.8837920489296636, "grad_norm": 0.5118341445922852, "learning_rate": 8.615575243859194e-06, "loss": 1.3740344047546387, "step": 578 }, { "epoch": 0.8868501529051988, "grad_norm": 0.2653733193874359, "learning_rate": 8.604606563929649e-06, "loss": 1.3240249156951904, "step": 580 }, { "epoch": 0.8899082568807339, "grad_norm": 0.2646930515766144, "learning_rate": 8.59360257054702e-06, "loss": 1.3533198833465576, "step": 582 }, { "epoch": 0.8929663608562691, "grad_norm": 0.21842285990715027, "learning_rate": 8.582563388876602e-06, "loss": 1.3596748113632202, "step": 584 }, { "epoch": 0.8960244648318043, "grad_norm": 0.2090519517660141, "learning_rate": 8.571489144483945e-06, "loss": 1.3835537433624268, "step": 586 }, { "epoch": 0.8990825688073395, "grad_norm": 0.2362383008003235, "learning_rate": 8.560379963333416e-06, "loss": 1.368111252784729, "step": 588 }, { "epoch": 0.9021406727828746, "grad_norm": 0.4883694350719452, "learning_rate": 8.549235971786777e-06, "loss": 1.3067984580993652, "step": 590 }, { "epoch": 0.9051987767584098, "grad_norm": 0.3407292366027832, "learning_rate": 8.538057296601739e-06, "loss": 1.3290581703186035, "step": 592 }, { "epoch": 0.908256880733945, "grad_norm": 0.21036434173583984, "learning_rate": 8.526844064930523e-06, "loss": 1.3695251941680908, "step": 594 }, { "epoch": 0.9113149847094801, "grad_norm": 0.22752052545547485, "learning_rate": 8.515596404318415e-06, "loss": 1.3922007083892822, "step": 596 }, { "epoch": 0.9143730886850153, "grad_norm": 0.23141705989837646, "learning_rate": 8.504314442702315e-06, "loss": 1.371009111404419, "step": 598 }, { "epoch": 0.9174311926605505, "grad_norm": 0.18458011746406555, "learning_rate": 8.492998308409275e-06, "loss": 1.3468807935714722, "step": 600 }, { "epoch": 0.9204892966360856, "grad_norm": 0.2277638018131256, "learning_rate": 8.481648130155054e-06, "loss": 1.3067777156829834, "step": 602 }, { "epoch": 0.9235474006116208, "grad_norm": 0.2761037051677704, "learning_rate": 8.470264037042639e-06, "loss": 1.3436920642852783, "step": 604 }, { "epoch": 0.926605504587156, "grad_norm": 0.2718355059623718, "learning_rate": 8.458846158560787e-06, "loss": 1.368149995803833, "step": 606 }, { "epoch": 0.9296636085626911, "grad_norm": 0.471161812543869, "learning_rate": 8.447394624582544e-06, "loss": 1.3190257549285889, "step": 608 }, { "epoch": 0.9327217125382263, "grad_norm": 0.24170783162117004, "learning_rate": 8.435909565363772e-06, "loss": 1.3419578075408936, "step": 610 }, { "epoch": 0.9357798165137615, "grad_norm": 0.26485109329223633, "learning_rate": 8.424391111541673e-06, "loss": 1.338409662246704, "step": 612 }, { "epoch": 0.9388379204892966, "grad_norm": 0.23220610618591309, "learning_rate": 8.412839394133285e-06, "loss": 1.3877780437469482, "step": 614 }, { "epoch": 0.9418960244648318, "grad_norm": 0.24310626089572906, "learning_rate": 8.401254544534018e-06, "loss": 1.4051454067230225, "step": 616 }, { "epoch": 0.944954128440367, "grad_norm": 0.299958735704422, "learning_rate": 8.389636694516134e-06, "loss": 1.3702571392059326, "step": 618 }, { "epoch": 0.9480122324159022, "grad_norm": 0.449929803609848, "learning_rate": 8.377985976227265e-06, "loss": 1.379606008529663, "step": 620 }, { "epoch": 0.9510703363914373, "grad_norm": 0.24171197414398193, "learning_rate": 8.366302522188902e-06, "loss": 1.350182294845581, "step": 622 }, { "epoch": 0.9541284403669725, "grad_norm": 0.2935427129268646, "learning_rate": 8.354586465294894e-06, "loss": 1.2931137084960938, "step": 624 }, { "epoch": 0.9571865443425076, "grad_norm": 0.23755374550819397, "learning_rate": 8.342837938809925e-06, "loss": 1.3183162212371826, "step": 626 }, { "epoch": 0.9602446483180428, "grad_norm": 0.3486945331096649, "learning_rate": 8.331057076368012e-06, "loss": 1.3358354568481445, "step": 628 }, { "epoch": 0.963302752293578, "grad_norm": 0.3866771459579468, "learning_rate": 8.319244011970975e-06, "loss": 1.3079657554626465, "step": 630 }, { "epoch": 0.9663608562691132, "grad_norm": 0.23048752546310425, "learning_rate": 8.307398879986917e-06, "loss": 1.323075294494629, "step": 632 }, { "epoch": 0.9694189602446484, "grad_norm": 0.2808099687099457, "learning_rate": 8.295521815148697e-06, "loss": 1.376133918762207, "step": 634 }, { "epoch": 0.9724770642201835, "grad_norm": 0.3424737751483917, "learning_rate": 8.283612952552393e-06, "loss": 1.363619327545166, "step": 636 }, { "epoch": 0.9755351681957186, "grad_norm": 0.23272113502025604, "learning_rate": 8.271672427655765e-06, "loss": 1.3780806064605713, "step": 638 }, { "epoch": 0.9785932721712538, "grad_norm": 0.33965811133384705, "learning_rate": 8.259700376276724e-06, "loss": 1.3397910594940186, "step": 640 }, { "epoch": 0.981651376146789, "grad_norm": 0.25269240140914917, "learning_rate": 8.247696934591774e-06, "loss": 1.3255189657211304, "step": 642 }, { "epoch": 0.9847094801223242, "grad_norm": 1.2317392826080322, "learning_rate": 8.235662239134473e-06, "loss": 1.347729206085205, "step": 644 }, { "epoch": 0.9877675840978594, "grad_norm": 0.37982505559921265, "learning_rate": 8.22359642679387e-06, "loss": 1.3894901275634766, "step": 646 }, { "epoch": 0.9908256880733946, "grad_norm": 0.2849336564540863, "learning_rate": 8.211499634812966e-06, "loss": 1.429058313369751, "step": 648 }, { "epoch": 0.9938837920489296, "grad_norm": 0.6233349442481995, "learning_rate": 8.199372000787126e-06, "loss": 2.095426082611084, "step": 650 }, { "epoch": 0.9969418960244648, "grad_norm": 0.6541375517845154, "learning_rate": 8.187213662662539e-06, "loss": 2.1073060035705566, "step": 652 }, { "epoch": 1.0, "grad_norm": 11.037178039550781, "learning_rate": 8.175024758734636e-06, "loss": 2.095914840698242, "step": 654 }, { "epoch": 1.003058103975535, "grad_norm": 0.3948424160480499, "learning_rate": 8.16280542764652e-06, "loss": 1.4957305192947388, "step": 656 }, { "epoch": 1.0061162079510704, "grad_norm": 0.310005784034729, "learning_rate": 8.150555808387389e-06, "loss": 1.455479383468628, "step": 658 }, { "epoch": 1.0091743119266054, "grad_norm": 0.26789844036102295, "learning_rate": 8.138276040290952e-06, "loss": 1.4779293537139893, "step": 660 }, { "epoch": 1.0122324159021407, "grad_norm": 0.19781345129013062, "learning_rate": 8.125966263033852e-06, "loss": 1.4063279628753662, "step": 662 }, { "epoch": 1.0152905198776758, "grad_norm": 0.21764519810676575, "learning_rate": 8.11362661663407e-06, "loss": 1.5875146389007568, "step": 664 }, { "epoch": 1.018348623853211, "grad_norm": 0.25749847292900085, "learning_rate": 8.101257241449332e-06, "loss": 1.480888843536377, "step": 666 }, { "epoch": 1.0214067278287462, "grad_norm": 0.26426374912261963, "learning_rate": 8.08885827817552e-06, "loss": 1.4235765933990479, "step": 668 }, { "epoch": 1.0244648318042813, "grad_norm": 0.25188708305358887, "learning_rate": 8.07642986784506e-06, "loss": 1.5084459781646729, "step": 670 }, { "epoch": 1.0275229357798166, "grad_norm": 0.6583337783813477, "learning_rate": 8.063972151825332e-06, "loss": 1.369026780128479, "step": 672 }, { "epoch": 1.0305810397553516, "grad_norm": 0.21123117208480835, "learning_rate": 8.05148527181705e-06, "loss": 1.4445654153823853, "step": 674 }, { "epoch": 1.033639143730887, "grad_norm": 0.293588787317276, "learning_rate": 8.038969369852654e-06, "loss": 1.555469274520874, "step": 676 }, { "epoch": 1.036697247706422, "grad_norm": 0.27872779965400696, "learning_rate": 8.026424588294701e-06, "loss": 1.4869214296340942, "step": 678 }, { "epoch": 1.039755351681957, "grad_norm": 0.23042356967926025, "learning_rate": 8.013851069834233e-06, "loss": 1.279091238975525, "step": 680 }, { "epoch": 1.0428134556574924, "grad_norm": 0.289106547832489, "learning_rate": 8.001248957489164e-06, "loss": 1.4306490421295166, "step": 682 }, { "epoch": 1.0458715596330275, "grad_norm": 0.5272045135498047, "learning_rate": 7.988618394602653e-06, "loss": 1.6781132221221924, "step": 684 }, { "epoch": 1.0489296636085628, "grad_norm": 0.22576113045215607, "learning_rate": 7.975959524841464e-06, "loss": 1.3457372188568115, "step": 686 }, { "epoch": 1.0519877675840978, "grad_norm": 0.5630601644515991, "learning_rate": 7.963272492194344e-06, "loss": 1.4807915687561035, "step": 688 }, { "epoch": 1.0550458715596331, "grad_norm": 0.34389057755470276, "learning_rate": 7.950557440970377e-06, "loss": 1.368910789489746, "step": 690 }, { "epoch": 1.0581039755351682, "grad_norm": 0.21063481271266937, "learning_rate": 7.937814515797348e-06, "loss": 1.360002040863037, "step": 692 }, { "epoch": 1.0611620795107033, "grad_norm": 0.20320424437522888, "learning_rate": 7.92504386162009e-06, "loss": 1.3675504922866821, "step": 694 }, { "epoch": 1.0642201834862386, "grad_norm": 0.2813395857810974, "learning_rate": 7.912245623698846e-06, "loss": 1.395061731338501, "step": 696 }, { "epoch": 1.0672782874617737, "grad_norm": 0.4647752046585083, "learning_rate": 7.899419947607611e-06, "loss": 1.5662283897399902, "step": 698 }, { "epoch": 1.070336391437309, "grad_norm": 0.3765999972820282, "learning_rate": 7.886566979232471e-06, "loss": 1.5935697555541992, "step": 700 }, { "epoch": 1.073394495412844, "grad_norm": 0.29083383083343506, "learning_rate": 7.873686864769955e-06, "loss": 1.434537649154663, "step": 702 }, { "epoch": 1.0764525993883791, "grad_norm": 0.4763205349445343, "learning_rate": 7.860779750725362e-06, "loss": 1.4121177196502686, "step": 704 }, { "epoch": 1.0795107033639144, "grad_norm": 0.33439531922340393, "learning_rate": 7.8478457839111e-06, "loss": 1.3943579196929932, "step": 706 }, { "epoch": 1.0825688073394495, "grad_norm": 0.342690110206604, "learning_rate": 7.834885111445017e-06, "loss": 1.4776759147644043, "step": 708 }, { "epoch": 1.0856269113149848, "grad_norm": 0.29185494780540466, "learning_rate": 7.82189788074872e-06, "loss": 1.4435069561004639, "step": 710 }, { "epoch": 1.0886850152905199, "grad_norm": 1.3288284540176392, "learning_rate": 7.80888423954591e-06, "loss": 1.4731531143188477, "step": 712 }, { "epoch": 1.091743119266055, "grad_norm": 0.2119162380695343, "learning_rate": 7.795844335860691e-06, "loss": 1.4626476764678955, "step": 714 }, { "epoch": 1.0948012232415902, "grad_norm": 0.20571930706501007, "learning_rate": 7.782778318015892e-06, "loss": 1.342850685119629, "step": 716 }, { "epoch": 1.0978593272171253, "grad_norm": 0.22236645221710205, "learning_rate": 7.769686334631375e-06, "loss": 1.286208152770996, "step": 718 }, { "epoch": 1.1009174311926606, "grad_norm": 0.18384046852588654, "learning_rate": 7.756568534622355e-06, "loss": 1.4446015357971191, "step": 720 }, { "epoch": 1.1039755351681957, "grad_norm": 0.2486264407634735, "learning_rate": 7.743425067197693e-06, "loss": 1.5612818002700806, "step": 722 }, { "epoch": 1.107033639143731, "grad_norm": 0.23211126029491425, "learning_rate": 7.730256081858207e-06, "loss": 1.3999545574188232, "step": 724 }, { "epoch": 1.110091743119266, "grad_norm": 0.41483980417251587, "learning_rate": 7.717061728394968e-06, "loss": 1.591150164604187, "step": 726 }, { "epoch": 1.1131498470948011, "grad_norm": 0.3113287091255188, "learning_rate": 7.7038421568876e-06, "loss": 1.620883584022522, "step": 728 }, { "epoch": 1.1162079510703364, "grad_norm": 0.5611585378646851, "learning_rate": 7.690597517702569e-06, "loss": 1.3835599422454834, "step": 730 }, { "epoch": 1.1192660550458715, "grad_norm": 0.5187618732452393, "learning_rate": 7.677327961491475e-06, "loss": 1.3614990711212158, "step": 732 }, { "epoch": 1.1223241590214068, "grad_norm": 0.34465184807777405, "learning_rate": 7.664033639189336e-06, "loss": 1.467517614364624, "step": 734 }, { "epoch": 1.1253822629969419, "grad_norm": 0.22211050987243652, "learning_rate": 7.650714702012876e-06, "loss": 1.287433385848999, "step": 736 }, { "epoch": 1.1284403669724772, "grad_norm": 0.36259227991104126, "learning_rate": 7.637371301458797e-06, "loss": 1.367175817489624, "step": 738 }, { "epoch": 1.1314984709480123, "grad_norm": 0.44571414589881897, "learning_rate": 7.6240035893020625e-06, "loss": 1.3308281898498535, "step": 740 }, { "epoch": 1.1345565749235473, "grad_norm": 0.26124662160873413, "learning_rate": 7.610611717594173e-06, "loss": 1.3915913105010986, "step": 742 }, { "epoch": 1.1376146788990826, "grad_norm": 0.3137398064136505, "learning_rate": 7.597195838661426e-06, "loss": 1.3188378810882568, "step": 744 }, { "epoch": 1.1406727828746177, "grad_norm": 0.3484938144683838, "learning_rate": 7.583756105103195e-06, "loss": 1.3703608512878418, "step": 746 }, { "epoch": 1.143730886850153, "grad_norm": 0.3699035942554474, "learning_rate": 7.570292669790186e-06, "loss": 1.5115067958831787, "step": 748 }, { "epoch": 1.146788990825688, "grad_norm": 0.24170878529548645, "learning_rate": 7.556805685862703e-06, "loss": 1.3954684734344482, "step": 750 }, { "epoch": 1.1498470948012232, "grad_norm": 0.20038793981075287, "learning_rate": 7.543295306728904e-06, "loss": 1.345947027206421, "step": 752 }, { "epoch": 1.1529051987767585, "grad_norm": 0.38949868083000183, "learning_rate": 7.529761686063056e-06, "loss": 1.5590949058532715, "step": 754 }, { "epoch": 1.1559633027522935, "grad_norm": 0.33645766973495483, "learning_rate": 7.516204977803789e-06, "loss": 1.446972370147705, "step": 756 }, { "epoch": 1.1590214067278288, "grad_norm": 0.18463970720767975, "learning_rate": 7.5026253361523435e-06, "loss": 1.3630192279815674, "step": 758 }, { "epoch": 1.162079510703364, "grad_norm": 0.33572879433631897, "learning_rate": 7.489022915570813e-06, "loss": 1.457106113433838, "step": 760 }, { "epoch": 1.165137614678899, "grad_norm": 0.2753995954990387, "learning_rate": 7.475397870780397e-06, "loss": 1.4502360820770264, "step": 762 }, { "epoch": 1.1681957186544343, "grad_norm": 0.35596194863319397, "learning_rate": 7.4617503567596295e-06, "loss": 1.4977834224700928, "step": 764 }, { "epoch": 1.1712538226299694, "grad_norm": 0.4726940095424652, "learning_rate": 7.448080528742624e-06, "loss": 1.3764468431472778, "step": 766 }, { "epoch": 1.1743119266055047, "grad_norm": 0.26225268840789795, "learning_rate": 7.434388542217303e-06, "loss": 1.4741466045379639, "step": 768 }, { "epoch": 1.1773700305810397, "grad_norm": 0.27619338035583496, "learning_rate": 7.420674552923638e-06, "loss": 1.3593350648880005, "step": 770 }, { "epoch": 1.1804281345565748, "grad_norm": 0.3182947635650635, "learning_rate": 7.4069387168518615e-06, "loss": 1.673621654510498, "step": 772 }, { "epoch": 1.18348623853211, "grad_norm": 0.28721779584884644, "learning_rate": 7.393181190240714e-06, "loss": 1.4450278282165527, "step": 774 }, { "epoch": 1.1865443425076452, "grad_norm": 0.2768658399581909, "learning_rate": 7.379402129575645e-06, "loss": 1.5032843351364136, "step": 776 }, { "epoch": 1.1896024464831805, "grad_norm": 0.3218024969100952, "learning_rate": 7.3656016915870545e-06, "loss": 1.4965013265609741, "step": 778 }, { "epoch": 1.1926605504587156, "grad_norm": 0.4919971227645874, "learning_rate": 7.351780033248491e-06, "loss": 1.4509224891662598, "step": 780 }, { "epoch": 1.1957186544342508, "grad_norm": 0.3981909155845642, "learning_rate": 7.33793731177488e-06, "loss": 1.4464759826660156, "step": 782 }, { "epoch": 1.198776758409786, "grad_norm": 0.3076995611190796, "learning_rate": 7.324073684620726e-06, "loss": 1.4577126502990723, "step": 784 }, { "epoch": 1.2018348623853212, "grad_norm": 0.28227174282073975, "learning_rate": 7.310189309478331e-06, "loss": 1.439997911453247, "step": 786 }, { "epoch": 1.2048929663608563, "grad_norm": 0.26599401235580444, "learning_rate": 7.296284344275991e-06, "loss": 1.531783103942871, "step": 788 }, { "epoch": 1.2079510703363914, "grad_norm": 0.69685959815979, "learning_rate": 7.282358947176207e-06, "loss": 1.4577662944793701, "step": 790 }, { "epoch": 1.2110091743119267, "grad_norm": 0.25103896856307983, "learning_rate": 7.268413276573881e-06, "loss": 1.3561824560165405, "step": 792 }, { "epoch": 1.2140672782874617, "grad_norm": 0.21765579283237457, "learning_rate": 7.25444749109452e-06, "loss": 1.3165652751922607, "step": 794 }, { "epoch": 1.217125382262997, "grad_norm": 0.2564055919647217, "learning_rate": 7.2404617495924254e-06, "loss": 1.383346676826477, "step": 796 }, { "epoch": 1.2201834862385321, "grad_norm": 0.40797773003578186, "learning_rate": 7.226456211148891e-06, "loss": 1.3315465450286865, "step": 798 }, { "epoch": 1.2232415902140672, "grad_norm": 0.31532490253448486, "learning_rate": 7.212431035070391e-06, "loss": 1.3896580934524536, "step": 800 }, { "epoch": 1.2262996941896025, "grad_norm": 0.25705334544181824, "learning_rate": 7.198386380886765e-06, "loss": 1.3460421562194824, "step": 802 }, { "epoch": 1.2293577981651376, "grad_norm": 0.31377753615379333, "learning_rate": 7.1843224083494154e-06, "loss": 1.595191240310669, "step": 804 }, { "epoch": 1.2324159021406729, "grad_norm": 0.2853119969367981, "learning_rate": 7.170239277429474e-06, "loss": 1.6170880794525146, "step": 806 }, { "epoch": 1.235474006116208, "grad_norm": 0.44243165850639343, "learning_rate": 7.156137148315993e-06, "loss": 1.6550755500793457, "step": 808 }, { "epoch": 1.238532110091743, "grad_norm": 0.3517357110977173, "learning_rate": 7.14201618141412e-06, "loss": 1.566192865371704, "step": 810 }, { "epoch": 1.2415902140672783, "grad_norm": 0.2986673414707184, "learning_rate": 7.127876537343277e-06, "loss": 1.63118314743042, "step": 812 }, { "epoch": 1.2446483180428134, "grad_norm": 0.3479074537754059, "learning_rate": 7.1137183769353225e-06, "loss": 1.5168559551239014, "step": 814 }, { "epoch": 1.2477064220183487, "grad_norm": 0.4152420461177826, "learning_rate": 7.099541861232736e-06, "loss": 1.6398264169692993, "step": 816 }, { "epoch": 1.2507645259938838, "grad_norm": 0.384573370218277, "learning_rate": 7.085347151486779e-06, "loss": 1.4128949642181396, "step": 818 }, { "epoch": 1.2538226299694188, "grad_norm": 0.3804616630077362, "learning_rate": 7.071134409155659e-06, "loss": 1.557448148727417, "step": 820 }, { "epoch": 1.2568807339449541, "grad_norm": 0.6236130595207214, "learning_rate": 7.056903795902701e-06, "loss": 1.3184959888458252, "step": 822 }, { "epoch": 1.2599388379204892, "grad_norm": 0.7443933486938477, "learning_rate": 7.042655473594495e-06, "loss": 1.537932276725769, "step": 824 }, { "epoch": 1.2629969418960245, "grad_norm": 0.5472233891487122, "learning_rate": 7.028389604299074e-06, "loss": 1.1561626195907593, "step": 826 }, { "epoch": 1.2660550458715596, "grad_norm": 0.847542941570282, "learning_rate": 7.01410635028405e-06, "loss": 1.1249284744262695, "step": 828 }, { "epoch": 1.2691131498470947, "grad_norm": 0.3495579957962036, "learning_rate": 6.9998058740147835e-06, "loss": 1.3474421501159668, "step": 830 }, { "epoch": 1.27217125382263, "grad_norm": 0.4069005846977234, "learning_rate": 6.985488338152529e-06, "loss": 1.3892837762832642, "step": 832 }, { "epoch": 1.2752293577981653, "grad_norm": 0.6165335178375244, "learning_rate": 6.971153905552587e-06, "loss": 1.524814248085022, "step": 834 }, { "epoch": 1.2782874617737003, "grad_norm": 0.6481596827507019, "learning_rate": 6.956802739262446e-06, "loss": 1.464059829711914, "step": 836 }, { "epoch": 1.2813455657492354, "grad_norm": 0.3051135241985321, "learning_rate": 6.942435002519938e-06, "loss": 1.212691307067871, "step": 838 }, { "epoch": 1.2844036697247707, "grad_norm": 0.31896138191223145, "learning_rate": 6.9280508587513725e-06, "loss": 1.179284691810608, "step": 840 }, { "epoch": 1.2874617737003058, "grad_norm": 0.2261551022529602, "learning_rate": 6.913650471569684e-06, "loss": 1.38997220993042, "step": 842 }, { "epoch": 1.290519877675841, "grad_norm": 0.3368714451789856, "learning_rate": 6.899234004772566e-06, "loss": 1.3169426918029785, "step": 844 }, { "epoch": 1.2935779816513762, "grad_norm": 0.49499788880348206, "learning_rate": 6.884801622340612e-06, "loss": 1.293768048286438, "step": 846 }, { "epoch": 1.2966360856269112, "grad_norm": 0.2904210686683655, "learning_rate": 6.870353488435447e-06, "loss": 1.5008976459503174, "step": 848 }, { "epoch": 1.2996941896024465, "grad_norm": 0.4230108857154846, "learning_rate": 6.855889767397863e-06, "loss": 1.4707106351852417, "step": 850 }, { "epoch": 1.3027522935779816, "grad_norm": 0.2836777865886688, "learning_rate": 6.841410623745944e-06, "loss": 1.182532548904419, "step": 852 }, { "epoch": 1.305810397553517, "grad_norm": 0.3048684895038605, "learning_rate": 6.826916222173205e-06, "loss": 1.373314380645752, "step": 854 }, { "epoch": 1.308868501529052, "grad_norm": 0.38874655961990356, "learning_rate": 6.812406727546713e-06, "loss": 1.5207183361053467, "step": 856 }, { "epoch": 1.311926605504587, "grad_norm": 0.541847288608551, "learning_rate": 6.7978823049052046e-06, "loss": 1.6546745300292969, "step": 858 }, { "epoch": 1.3149847094801224, "grad_norm": 0.3354927897453308, "learning_rate": 6.783343119457221e-06, "loss": 1.6852827072143555, "step": 860 }, { "epoch": 1.3180428134556574, "grad_norm": 0.22799281775951385, "learning_rate": 6.768789336579224e-06, "loss": 1.7998615503311157, "step": 862 }, { "epoch": 1.3211009174311927, "grad_norm": 0.2829393446445465, "learning_rate": 6.754221121813707e-06, "loss": 1.3555914163589478, "step": 864 }, { "epoch": 1.3241590214067278, "grad_norm": 0.2552604377269745, "learning_rate": 6.739638640867332e-06, "loss": 1.44038724899292, "step": 866 }, { "epoch": 1.3272171253822629, "grad_norm": 0.2328341007232666, "learning_rate": 6.72504205960902e-06, "loss": 1.2792387008666992, "step": 868 }, { "epoch": 1.3302752293577982, "grad_norm": 0.19776956737041473, "learning_rate": 6.710431544068085e-06, "loss": 1.2014856338500977, "step": 870 }, { "epoch": 1.3333333333333333, "grad_norm": 0.2862965762615204, "learning_rate": 6.695807260432332e-06, "loss": 1.612195372581482, "step": 872 }, { "epoch": 1.3363914373088686, "grad_norm": 0.2737024426460266, "learning_rate": 6.681169375046173e-06, "loss": 1.4856352806091309, "step": 874 }, { "epoch": 1.3394495412844036, "grad_norm": 0.33617132902145386, "learning_rate": 6.666518054408734e-06, "loss": 1.6690922975540161, "step": 876 }, { "epoch": 1.3425076452599387, "grad_norm": 0.33230748772621155, "learning_rate": 6.65185346517196e-06, "loss": 1.134220838546753, "step": 878 }, { "epoch": 1.345565749235474, "grad_norm": 0.34520813822746277, "learning_rate": 6.637175774138722e-06, "loss": 1.3939542770385742, "step": 880 }, { "epoch": 1.3486238532110093, "grad_norm": 0.3193676471710205, "learning_rate": 6.622485148260916e-06, "loss": 1.6689043045043945, "step": 882 }, { "epoch": 1.3516819571865444, "grad_norm": 0.2586718499660492, "learning_rate": 6.607781754637567e-06, "loss": 1.3927881717681885, "step": 884 }, { "epoch": 1.3547400611620795, "grad_norm": 0.36470475792884827, "learning_rate": 6.593065760512924e-06, "loss": 1.5524687767028809, "step": 886 }, { "epoch": 1.3577981651376148, "grad_norm": 0.5333327054977417, "learning_rate": 6.578337333274566e-06, "loss": 1.4335553646087646, "step": 888 }, { "epoch": 1.3608562691131498, "grad_norm": 0.24828922748565674, "learning_rate": 6.563596640451489e-06, "loss": 1.3478354215621948, "step": 890 }, { "epoch": 1.3639143730886851, "grad_norm": 0.2684786021709442, "learning_rate": 6.548843849712206e-06, "loss": 1.4221248626708984, "step": 892 }, { "epoch": 1.3669724770642202, "grad_norm": 0.2922813594341278, "learning_rate": 6.534079128862835e-06, "loss": 1.4792616367340088, "step": 894 }, { "epoch": 1.3700305810397553, "grad_norm": 0.21960243582725525, "learning_rate": 6.5193026458452006e-06, "loss": 1.3363940715789795, "step": 896 }, { "epoch": 1.3730886850152906, "grad_norm": 0.41456371545791626, "learning_rate": 6.50451456873491e-06, "loss": 1.4480544328689575, "step": 898 }, { "epoch": 1.3761467889908257, "grad_norm": 0.6222192049026489, "learning_rate": 6.489715065739448e-06, "loss": 1.7465565204620361, "step": 900 }, { "epoch": 1.379204892966361, "grad_norm": 0.5998108983039856, "learning_rate": 6.474904305196268e-06, "loss": 2.144679546356201, "step": 902 }, { "epoch": 1.382262996941896, "grad_norm": 0.5612609386444092, "learning_rate": 6.4600824555708695e-06, "loss": 1.378048300743103, "step": 904 }, { "epoch": 1.385321100917431, "grad_norm": 0.32021385431289673, "learning_rate": 6.445249685454885e-06, "loss": 1.361167073249817, "step": 906 }, { "epoch": 1.3883792048929664, "grad_norm": 0.36393630504608154, "learning_rate": 6.4304061635641645e-06, "loss": 1.433903694152832, "step": 908 }, { "epoch": 1.3914373088685015, "grad_norm": 0.7985405325889587, "learning_rate": 6.415552058736854e-06, "loss": 1.5466125011444092, "step": 910 }, { "epoch": 1.3944954128440368, "grad_norm": 0.30912530422210693, "learning_rate": 6.4006875399314705e-06, "loss": 1.463235855102539, "step": 912 }, { "epoch": 1.3975535168195719, "grad_norm": 0.2953026294708252, "learning_rate": 6.3858127762249945e-06, "loss": 1.3276557922363281, "step": 914 }, { "epoch": 1.400611620795107, "grad_norm": 0.19828742742538452, "learning_rate": 6.3709279368109264e-06, "loss": 1.2300511598587036, "step": 916 }, { "epoch": 1.4036697247706422, "grad_norm": 0.21878407895565033, "learning_rate": 6.356033190997386e-06, "loss": 1.1606783866882324, "step": 918 }, { "epoch": 1.4067278287461773, "grad_norm": 0.19046013057231903, "learning_rate": 6.341128708205162e-06, "loss": 1.3056751489639282, "step": 920 }, { "epoch": 1.4097859327217126, "grad_norm": 0.40108954906463623, "learning_rate": 6.326214657965804e-06, "loss": 1.5421757698059082, "step": 922 }, { "epoch": 1.4128440366972477, "grad_norm": 0.46537211537361145, "learning_rate": 6.311291209919682e-06, "loss": 1.5684192180633545, "step": 924 }, { "epoch": 1.4159021406727827, "grad_norm": 0.5733487606048584, "learning_rate": 6.296358533814065e-06, "loss": 1.5650339126586914, "step": 926 }, { "epoch": 1.418960244648318, "grad_norm": 0.4306733310222626, "learning_rate": 6.281416799501188e-06, "loss": 1.5992372035980225, "step": 928 }, { "epoch": 1.4220183486238533, "grad_norm": 0.407654732465744, "learning_rate": 6.266466176936313e-06, "loss": 1.4283607006072998, "step": 930 }, { "epoch": 1.4250764525993884, "grad_norm": 4.419346332550049, "learning_rate": 6.251506836175807e-06, "loss": 1.5659562349319458, "step": 932 }, { "epoch": 1.4281345565749235, "grad_norm": 0.7012003064155579, "learning_rate": 6.236538947375203e-06, "loss": 1.4677741527557373, "step": 934 }, { "epoch": 1.4311926605504588, "grad_norm": 0.22764644026756287, "learning_rate": 6.221562680787258e-06, "loss": 1.374863624572754, "step": 936 }, { "epoch": 1.4342507645259939, "grad_norm": 0.4946407973766327, "learning_rate": 6.20657820676003e-06, "loss": 1.3795430660247803, "step": 938 }, { "epoch": 1.4373088685015292, "grad_norm": 1.4666649103164673, "learning_rate": 6.191585695734925e-06, "loss": 1.584106683731079, "step": 940 }, { "epoch": 1.4403669724770642, "grad_norm": 0.9116813540458679, "learning_rate": 6.176585318244775e-06, "loss": 1.3207650184631348, "step": 942 }, { "epoch": 1.4434250764525993, "grad_norm": 0.4549460709095001, "learning_rate": 6.161577244911883e-06, "loss": 1.5188086032867432, "step": 944 }, { "epoch": 1.4464831804281346, "grad_norm": 0.6293279528617859, "learning_rate": 6.146561646446088e-06, "loss": 1.40483558177948, "step": 946 }, { "epoch": 1.4495412844036697, "grad_norm": 0.5348030924797058, "learning_rate": 6.131538693642828e-06, "loss": 1.4180057048797607, "step": 948 }, { "epoch": 1.452599388379205, "grad_norm": 0.7010774612426758, "learning_rate": 6.116508557381191e-06, "loss": 1.5555238723754883, "step": 950 }, { "epoch": 1.45565749235474, "grad_norm": 0.3996182382106781, "learning_rate": 6.1014714086219725e-06, "loss": 1.5635944604873657, "step": 952 }, { "epoch": 1.4587155963302751, "grad_norm": 0.3819827139377594, "learning_rate": 6.086427418405735e-06, "loss": 1.3868696689605713, "step": 954 }, { "epoch": 1.4617737003058104, "grad_norm": 0.24838334321975708, "learning_rate": 6.071376757850858e-06, "loss": 1.3217381238937378, "step": 956 }, { "epoch": 1.4648318042813455, "grad_norm": 0.5527139902114868, "learning_rate": 6.0563195981515885e-06, "loss": 1.456415057182312, "step": 958 }, { "epoch": 1.4678899082568808, "grad_norm": 0.2822090983390808, "learning_rate": 6.0412561105761055e-06, "loss": 1.3990404605865479, "step": 960 }, { "epoch": 1.470948012232416, "grad_norm": 0.370832234621048, "learning_rate": 6.026186466464562e-06, "loss": 1.5524400472640991, "step": 962 }, { "epoch": 1.474006116207951, "grad_norm": 0.30970191955566406, "learning_rate": 6.011110837227138e-06, "loss": 1.4143943786621094, "step": 964 }, { "epoch": 1.4770642201834863, "grad_norm": 0.3659932613372803, "learning_rate": 5.996029394342089e-06, "loss": 1.3726913928985596, "step": 966 }, { "epoch": 1.4801223241590213, "grad_norm": 0.40378639101982117, "learning_rate": 5.980942309353803e-06, "loss": 1.3403112888336182, "step": 968 }, { "epoch": 1.4831804281345566, "grad_norm": 0.2668818235397339, "learning_rate": 5.965849753870841e-06, "loss": 1.4581551551818848, "step": 970 }, { "epoch": 1.4862385321100917, "grad_norm": 0.39147576689720154, "learning_rate": 5.950751899563989e-06, "loss": 1.4426075220108032, "step": 972 }, { "epoch": 1.4892966360856268, "grad_norm": 0.4053312838077545, "learning_rate": 5.935648918164308e-06, "loss": 1.429807424545288, "step": 974 }, { "epoch": 1.492354740061162, "grad_norm": 0.2912329435348511, "learning_rate": 5.9205409814611694e-06, "loss": 1.6015820503234863, "step": 976 }, { "epoch": 1.4954128440366974, "grad_norm": 0.39581140875816345, "learning_rate": 5.9054282613003165e-06, "loss": 1.7901129722595215, "step": 978 }, { "epoch": 1.4984709480122325, "grad_norm": 5.4772210121154785, "learning_rate": 5.890310929581899e-06, "loss": 1.665008544921875, "step": 980 }, { "epoch": 1.5015290519877675, "grad_norm": 0.32753488421440125, "learning_rate": 5.875189158258521e-06, "loss": 1.658569574356079, "step": 982 }, { "epoch": 1.5045871559633026, "grad_norm": 0.3322629928588867, "learning_rate": 5.860063119333287e-06, "loss": 1.568853735923767, "step": 984 }, { "epoch": 1.507645259938838, "grad_norm": 0.3625146746635437, "learning_rate": 5.844932984857841e-06, "loss": 1.2555010318756104, "step": 986 }, { "epoch": 1.5107033639143732, "grad_norm": 0.3967174291610718, "learning_rate": 5.829798926930411e-06, "loss": 1.2352030277252197, "step": 988 }, { "epoch": 1.5137614678899083, "grad_norm": 0.92249995470047, "learning_rate": 5.814661117693856e-06, "loss": 1.6529834270477295, "step": 990 }, { "epoch": 1.5168195718654434, "grad_norm": 0.43264713883399963, "learning_rate": 5.799519729333702e-06, "loss": 1.6510822772979736, "step": 992 }, { "epoch": 1.5198776758409784, "grad_norm": 0.48226049542427063, "learning_rate": 5.784374934076188e-06, "loss": 1.7469120025634766, "step": 994 }, { "epoch": 1.5229357798165137, "grad_norm": 0.6006577014923096, "learning_rate": 5.769226904186301e-06, "loss": 1.6751326322555542, "step": 996 }, { "epoch": 1.525993883792049, "grad_norm": 0.417524129152298, "learning_rate": 5.754075811965826e-06, "loss": 1.7241541147232056, "step": 998 }, { "epoch": 1.529051987767584, "grad_norm": 0.4846678674221039, "learning_rate": 5.738921829751374e-06, "loss": 1.5894498825073242, "step": 1000 }, { "epoch": 1.5321100917431192, "grad_norm": 0.37620386481285095, "learning_rate": 5.723765129912433e-06, "loss": 1.5567536354064941, "step": 1002 }, { "epoch": 1.5351681957186545, "grad_norm": 0.9559251070022583, "learning_rate": 5.708605884849402e-06, "loss": 1.444126844406128, "step": 1004 }, { "epoch": 1.5382262996941896, "grad_norm": 0.4608314335346222, "learning_rate": 5.6934442669916315e-06, "loss": 1.7045128345489502, "step": 1006 }, { "epoch": 1.5412844036697249, "grad_norm": 0.5580506920814514, "learning_rate": 5.678280448795457e-06, "loss": 1.576319932937622, "step": 1008 }, { "epoch": 1.54434250764526, "grad_norm": 0.414983332157135, "learning_rate": 5.663114602742247e-06, "loss": 1.1866123676300049, "step": 1010 }, { "epoch": 1.547400611620795, "grad_norm": 0.5494526624679565, "learning_rate": 5.647946901336433e-06, "loss": 1.7420477867126465, "step": 1012 }, { "epoch": 1.5504587155963303, "grad_norm": 0.6842697262763977, "learning_rate": 5.632777517103552e-06, "loss": 1.7904109954833984, "step": 1014 }, { "epoch": 1.5535168195718656, "grad_norm": 0.43980666995048523, "learning_rate": 5.617606622588282e-06, "loss": 1.862006425857544, "step": 1016 }, { "epoch": 1.5565749235474007, "grad_norm": 0.3990402817726135, "learning_rate": 5.602434390352476e-06, "loss": 1.7830100059509277, "step": 1018 }, { "epoch": 1.5596330275229358, "grad_norm": 0.4031524360179901, "learning_rate": 5.58726099297321e-06, "loss": 1.7594141960144043, "step": 1020 }, { "epoch": 1.5626911314984708, "grad_norm": 0.6580591797828674, "learning_rate": 5.572086603040809e-06, "loss": 1.6219829320907593, "step": 1022 }, { "epoch": 1.5657492354740061, "grad_norm": 0.36656439304351807, "learning_rate": 5.556911393156885e-06, "loss": 1.4893901348114014, "step": 1024 }, { "epoch": 1.5688073394495414, "grad_norm": 0.6261524558067322, "learning_rate": 5.541735535932383e-06, "loss": 1.058058261871338, "step": 1026 }, { "epoch": 1.5718654434250765, "grad_norm": 0.3441345691680908, "learning_rate": 5.526559203985605e-06, "loss": 1.0509142875671387, "step": 1028 }, { "epoch": 1.5749235474006116, "grad_norm": 0.2408900260925293, "learning_rate": 5.511382569940258e-06, "loss": 1.2871123552322388, "step": 1030 }, { "epoch": 1.5779816513761467, "grad_norm": 0.45723816752433777, "learning_rate": 5.496205806423481e-06, "loss": 1.2235673666000366, "step": 1032 }, { "epoch": 1.581039755351682, "grad_norm": 0.3109905421733856, "learning_rate": 5.481029086063887e-06, "loss": 1.177577018737793, "step": 1034 }, { "epoch": 1.5840978593272173, "grad_norm": 0.20282985270023346, "learning_rate": 5.4658525814896014e-06, "loss": 1.2040612697601318, "step": 1036 }, { "epoch": 1.5871559633027523, "grad_norm": 0.43076759576797485, "learning_rate": 5.45067646532629e-06, "loss": 1.4584531784057617, "step": 1038 }, { "epoch": 1.5902140672782874, "grad_norm": 0.472885400056839, "learning_rate": 5.435500910195203e-06, "loss": 1.387641429901123, "step": 1040 }, { "epoch": 1.5932721712538225, "grad_norm": 3.1532437801361084, "learning_rate": 5.420326088711209e-06, "loss": 1.221092700958252, "step": 1042 }, { "epoch": 1.5963302752293578, "grad_norm": 0.6743189692497253, "learning_rate": 5.405152173480833e-06, "loss": 1.4836219549179077, "step": 1044 }, { "epoch": 1.599388379204893, "grad_norm": 0.20277228951454163, "learning_rate": 5.389979337100289e-06, "loss": 1.5031371116638184, "step": 1046 }, { "epoch": 1.6024464831804281, "grad_norm": 0.5120447874069214, "learning_rate": 5.374807752153522e-06, "loss": 1.282975673675537, "step": 1048 }, { "epoch": 1.6055045871559632, "grad_norm": 0.35753709077835083, "learning_rate": 5.359637591210242e-06, "loss": 1.4665361642837524, "step": 1050 }, { "epoch": 1.6085626911314985, "grad_norm": 0.7353309988975525, "learning_rate": 5.344469026823959e-06, "loss": 1.6730611324310303, "step": 1052 }, { "epoch": 1.6116207951070336, "grad_norm": 0.4338257610797882, "learning_rate": 5.329302231530029e-06, "loss": 1.186348795890808, "step": 1054 }, { "epoch": 1.614678899082569, "grad_norm": 0.42416566610336304, "learning_rate": 5.31413737784368e-06, "loss": 1.4430310726165771, "step": 1056 }, { "epoch": 1.617737003058104, "grad_norm": 0.2432592213153839, "learning_rate": 5.298974638258055e-06, "loss": 1.518967866897583, "step": 1058 }, { "epoch": 1.620795107033639, "grad_norm": 0.408245712518692, "learning_rate": 5.283814185242252e-06, "loss": 1.426690697669983, "step": 1060 }, { "epoch": 1.6238532110091743, "grad_norm": 0.2117079198360443, "learning_rate": 5.2686561912393606e-06, "loss": 1.2693121433258057, "step": 1062 }, { "epoch": 1.6269113149847096, "grad_norm": 4.30716609954834, "learning_rate": 5.253500828664501e-06, "loss": 0.9013931155204773, "step": 1064 }, { "epoch": 1.6299694189602447, "grad_norm": 0.38770049810409546, "learning_rate": 5.23834826990286e-06, "loss": 1.5694489479064941, "step": 1066 }, { "epoch": 1.6330275229357798, "grad_norm": 0.6700468063354492, "learning_rate": 5.223198687307733e-06, "loss": 1.503030776977539, "step": 1068 }, { "epoch": 1.6360856269113149, "grad_norm": 0.2767106294631958, "learning_rate": 5.208052253198564e-06, "loss": 1.3917062282562256, "step": 1070 }, { "epoch": 1.6391437308868502, "grad_norm": 0.3463125228881836, "learning_rate": 5.192909139858981e-06, "loss": 1.5068938732147217, "step": 1072 }, { "epoch": 1.6422018348623855, "grad_norm": 0.3212260603904724, "learning_rate": 5.177769519534846e-06, "loss": 1.4421181678771973, "step": 1074 }, { "epoch": 1.6452599388379205, "grad_norm": 0.4484805762767792, "learning_rate": 5.162633564432285e-06, "loss": 1.408212661743164, "step": 1076 }, { "epoch": 1.6483180428134556, "grad_norm": 0.4805358350276947, "learning_rate": 5.1475014467157325e-06, "loss": 1.6133791208267212, "step": 1078 }, { "epoch": 1.6513761467889907, "grad_norm": 0.5775420665740967, "learning_rate": 5.132373338505978e-06, "loss": 1.2856450080871582, "step": 1080 }, { "epoch": 1.654434250764526, "grad_norm": 0.32906994223594666, "learning_rate": 5.117249411878204e-06, "loss": 1.04205322265625, "step": 1082 }, { "epoch": 1.6574923547400613, "grad_norm": 0.5074779987335205, "learning_rate": 5.10212983886003e-06, "loss": 1.6698901653289795, "step": 1084 }, { "epoch": 1.6605504587155964, "grad_norm": 0.36449626088142395, "learning_rate": 5.087014791429552e-06, "loss": 1.449878215789795, "step": 1086 }, { "epoch": 1.6636085626911314, "grad_norm": 1.0477646589279175, "learning_rate": 5.071904441513393e-06, "loss": 1.5865240097045898, "step": 1088 }, { "epoch": 1.6666666666666665, "grad_norm": 0.3797400891780853, "learning_rate": 5.056798960984741e-06, "loss": 1.4271771907806396, "step": 1090 }, { "epoch": 1.6697247706422018, "grad_norm": 0.3018883466720581, "learning_rate": 5.041698521661401e-06, "loss": 1.6418373584747314, "step": 1092 }, { "epoch": 1.6727828746177371, "grad_norm": 0.5908496379852295, "learning_rate": 5.026603295303833e-06, "loss": 1.5063586235046387, "step": 1094 }, { "epoch": 1.6758409785932722, "grad_norm": 0.5799764394760132, "learning_rate": 5.011513453613205e-06, "loss": 1.5312390327453613, "step": 1096 }, { "epoch": 1.6788990825688073, "grad_norm": 0.4648537337779999, "learning_rate": 4.996429168229432e-06, "loss": 1.4155495166778564, "step": 1098 }, { "epoch": 1.6819571865443423, "grad_norm": 0.3357274830341339, "learning_rate": 4.981350610729234e-06, "loss": 1.07462477684021, "step": 1100 }, { "epoch": 1.6850152905198776, "grad_norm": 0.8209952712059021, "learning_rate": 4.966277952624179e-06, "loss": 1.532288670539856, "step": 1102 }, { "epoch": 1.688073394495413, "grad_norm": 0.6916195750236511, "learning_rate": 4.951211365358723e-06, "loss": 1.5015881061553955, "step": 1104 }, { "epoch": 1.691131498470948, "grad_norm": 0.6677690148353577, "learning_rate": 4.936151020308282e-06, "loss": 1.5166327953338623, "step": 1106 }, { "epoch": 1.694189602446483, "grad_norm": 0.7889437675476074, "learning_rate": 4.921097088777261e-06, "loss": 1.5232961177825928, "step": 1108 }, { "epoch": 1.6972477064220184, "grad_norm": 0.5421835780143738, "learning_rate": 4.906049741997119e-06, "loss": 1.3370258808135986, "step": 1110 }, { "epoch": 1.7003058103975535, "grad_norm": 0.28672778606414795, "learning_rate": 4.8910091511244115e-06, "loss": 1.5552886724472046, "step": 1112 }, { "epoch": 1.7033639143730888, "grad_norm": 0.8609727025032043, "learning_rate": 4.875975487238853e-06, "loss": 1.6477062702178955, "step": 1114 }, { "epoch": 1.7064220183486238, "grad_norm": 0.46577727794647217, "learning_rate": 4.860948921341366e-06, "loss": 1.3554713726043701, "step": 1116 }, { "epoch": 1.709480122324159, "grad_norm": 0.4357546865940094, "learning_rate": 4.845929624352136e-06, "loss": 1.616469383239746, "step": 1118 }, { "epoch": 1.7125382262996942, "grad_norm": 0.8016573786735535, "learning_rate": 4.830917767108666e-06, "loss": 1.4049677848815918, "step": 1120 }, { "epoch": 1.7155963302752295, "grad_norm": 0.34570103883743286, "learning_rate": 4.8159135203638394e-06, "loss": 1.5350430011749268, "step": 1122 }, { "epoch": 1.7186544342507646, "grad_norm": 0.6164813041687012, "learning_rate": 4.800917054783971e-06, "loss": 1.4737257957458496, "step": 1124 }, { "epoch": 1.7217125382262997, "grad_norm": 0.30021098256111145, "learning_rate": 4.785928540946869e-06, "loss": 1.59697425365448, "step": 1126 }, { "epoch": 1.7247706422018347, "grad_norm": 0.3294142782688141, "learning_rate": 4.770948149339897e-06, "loss": 1.6918811798095703, "step": 1128 }, { "epoch": 1.72782874617737, "grad_norm": 0.33221927285194397, "learning_rate": 4.755976050358026e-06, "loss": 1.581977128982544, "step": 1130 }, { "epoch": 1.7308868501529053, "grad_norm": 0.27995747327804565, "learning_rate": 4.741012414301907e-06, "loss": 1.42479407787323, "step": 1132 }, { "epoch": 1.7339449541284404, "grad_norm": 0.4526294767856598, "learning_rate": 4.726057411375927e-06, "loss": 1.5270183086395264, "step": 1134 }, { "epoch": 1.7370030581039755, "grad_norm": 0.6458525657653809, "learning_rate": 4.711111211686279e-06, "loss": 1.5350821018218994, "step": 1136 }, { "epoch": 1.7400611620795106, "grad_norm": 0.40516841411590576, "learning_rate": 4.6961739852390175e-06, "loss": 1.5310497283935547, "step": 1138 }, { "epoch": 1.7431192660550459, "grad_norm": 1.3104746341705322, "learning_rate": 4.681245901938134e-06, "loss": 1.5385562181472778, "step": 1140 }, { "epoch": 1.7461773700305812, "grad_norm": 0.40381914377212524, "learning_rate": 4.666327131583621e-06, "loss": 1.5392662286758423, "step": 1142 }, { "epoch": 1.7492354740061162, "grad_norm": 0.8844152688980103, "learning_rate": 4.65141784386954e-06, "loss": 1.333682894706726, "step": 1144 }, { "epoch": 1.7522935779816513, "grad_norm": 0.423922061920166, "learning_rate": 4.636518208382091e-06, "loss": 1.4100391864776611, "step": 1146 }, { "epoch": 1.7553516819571864, "grad_norm": 0.3589678406715393, "learning_rate": 4.621628394597687e-06, "loss": 1.341862440109253, "step": 1148 }, { "epoch": 1.7584097859327217, "grad_norm": 0.6498292088508606, "learning_rate": 4.606748571881018e-06, "loss": 1.4297010898590088, "step": 1150 }, { "epoch": 1.761467889908257, "grad_norm": 0.5506405234336853, "learning_rate": 4.59187890948314e-06, "loss": 1.4309487342834473, "step": 1152 }, { "epoch": 1.764525993883792, "grad_norm": 0.45955854654312134, "learning_rate": 4.577019576539527e-06, "loss": 1.2851155996322632, "step": 1154 }, { "epoch": 1.7675840978593271, "grad_norm": 0.28625011444091797, "learning_rate": 4.562170742068175e-06, "loss": 0.9397743940353394, "step": 1156 }, { "epoch": 1.7706422018348624, "grad_norm": 0.22773736715316772, "learning_rate": 4.547332574967653e-06, "loss": 1.237460732460022, "step": 1158 }, { "epoch": 1.7737003058103975, "grad_norm": 0.25427719950675964, "learning_rate": 4.5325052440151985e-06, "loss": 1.3028910160064697, "step": 1160 }, { "epoch": 1.7767584097859328, "grad_norm": 0.2875189781188965, "learning_rate": 4.517688917864794e-06, "loss": 1.3547457456588745, "step": 1162 }, { "epoch": 1.7798165137614679, "grad_norm": 0.21899199485778809, "learning_rate": 4.502883765045244e-06, "loss": 1.36411714553833, "step": 1164 }, { "epoch": 1.782874617737003, "grad_norm": 0.21183030307292938, "learning_rate": 4.488089953958264e-06, "loss": 1.4323028326034546, "step": 1166 }, { "epoch": 1.7859327217125383, "grad_norm": 0.22526955604553223, "learning_rate": 4.473307652876563e-06, "loss": 1.3429040908813477, "step": 1168 }, { "epoch": 1.7889908256880735, "grad_norm": 0.266107439994812, "learning_rate": 4.458537029941926e-06, "loss": 1.3663442134857178, "step": 1170 }, { "epoch": 1.7920489296636086, "grad_norm": 0.490496963262558, "learning_rate": 4.4437782531633074e-06, "loss": 1.3354597091674805, "step": 1172 }, { "epoch": 1.7951070336391437, "grad_norm": 0.1854841560125351, "learning_rate": 4.429031490414919e-06, "loss": 1.3446393013000488, "step": 1174 }, { "epoch": 1.7981651376146788, "grad_norm": 0.1960364729166031, "learning_rate": 4.414296909434311e-06, "loss": 1.3029416799545288, "step": 1176 }, { "epoch": 1.801223241590214, "grad_norm": 0.35048866271972656, "learning_rate": 4.399574677820481e-06, "loss": 1.348449945449829, "step": 1178 }, { "epoch": 1.8042813455657494, "grad_norm": 0.3793323040008545, "learning_rate": 4.384864963031952e-06, "loss": 1.297593593597412, "step": 1180 }, { "epoch": 1.8073394495412844, "grad_norm": 0.14626124501228333, "learning_rate": 4.370167932384873e-06, "loss": 1.1695170402526855, "step": 1182 }, { "epoch": 1.8103975535168195, "grad_norm": 0.16865181922912598, "learning_rate": 4.355483753051125e-06, "loss": 1.2123092412948608, "step": 1184 }, { "epoch": 1.8134556574923546, "grad_norm": 0.1931789070367813, "learning_rate": 4.340812592056401e-06, "loss": 0.9932126998901367, "step": 1186 }, { "epoch": 1.81651376146789, "grad_norm": 0.2547837793827057, "learning_rate": 4.326154616278326e-06, "loss": 1.2431546449661255, "step": 1188 }, { "epoch": 1.8195718654434252, "grad_norm": 0.23825769126415253, "learning_rate": 4.311509992444539e-06, "loss": 1.286515712738037, "step": 1190 }, { "epoch": 1.8226299694189603, "grad_norm": 0.25244706869125366, "learning_rate": 4.296878887130819e-06, "loss": 1.3000450134277344, "step": 1192 }, { "epoch": 1.8256880733944953, "grad_norm": 0.23451480269432068, "learning_rate": 4.282261466759165e-06, "loss": 1.2664532661437988, "step": 1194 }, { "epoch": 1.8287461773700304, "grad_norm": 0.2735919952392578, "learning_rate": 4.267657897595929e-06, "loss": 1.288360834121704, "step": 1196 }, { "epoch": 1.8318042813455657, "grad_norm": 0.18107269704341888, "learning_rate": 4.253068345749903e-06, "loss": 1.2625651359558105, "step": 1198 }, { "epoch": 1.834862385321101, "grad_norm": 0.2293253242969513, "learning_rate": 4.238492977170439e-06, "loss": 1.234043836593628, "step": 1200 }, { "epoch": 1.837920489296636, "grad_norm": 0.27160146832466125, "learning_rate": 4.223931957645566e-06, "loss": 1.300539493560791, "step": 1202 }, { "epoch": 1.8409785932721712, "grad_norm": 0.25112462043762207, "learning_rate": 4.2093854528000955e-06, "loss": 1.2719401121139526, "step": 1204 }, { "epoch": 1.8440366972477065, "grad_norm": 0.33997592329978943, "learning_rate": 4.194853628093742e-06, "loss": 1.2453508377075195, "step": 1206 }, { "epoch": 1.8470948012232415, "grad_norm": 0.6576793789863586, "learning_rate": 4.180336648819242e-06, "loss": 1.233917236328125, "step": 1208 }, { "epoch": 1.8501529051987768, "grad_norm": 0.26551222801208496, "learning_rate": 4.165834680100469e-06, "loss": 1.2595276832580566, "step": 1210 }, { "epoch": 1.853211009174312, "grad_norm": 0.2170596420764923, "learning_rate": 4.151347886890562e-06, "loss": 1.2505378723144531, "step": 1212 }, { "epoch": 1.856269113149847, "grad_norm": 0.2974804937839508, "learning_rate": 4.1368764339700404e-06, "loss": 1.2092756032943726, "step": 1214 }, { "epoch": 1.8593272171253823, "grad_norm": 0.2567199468612671, "learning_rate": 4.1224204859449425e-06, "loss": 1.2698951959609985, "step": 1216 }, { "epoch": 1.8623853211009176, "grad_norm": 0.23152267932891846, "learning_rate": 4.107980207244937e-06, "loss": 1.3027379512786865, "step": 1218 }, { "epoch": 1.8654434250764527, "grad_norm": 0.26830926537513733, "learning_rate": 4.093555762121469e-06, "loss": 1.308929443359375, "step": 1220 }, { "epoch": 1.8685015290519877, "grad_norm": 0.2566030025482178, "learning_rate": 4.07914731464588e-06, "loss": 1.2964577674865723, "step": 1222 }, { "epoch": 1.8715596330275228, "grad_norm": 0.4025701582431793, "learning_rate": 4.064755028707546e-06, "loss": 1.31220543384552, "step": 1224 }, { "epoch": 1.8746177370030581, "grad_norm": 0.25386303663253784, "learning_rate": 4.0503790680120136e-06, "loss": 1.299830436706543, "step": 1226 }, { "epoch": 1.8776758409785934, "grad_norm": 0.39947405457496643, "learning_rate": 4.036019596079136e-06, "loss": 1.3202039003372192, "step": 1228 }, { "epoch": 1.8807339449541285, "grad_norm": 0.23179592192173004, "learning_rate": 4.021676776241218e-06, "loss": 1.2405881881713867, "step": 1230 }, { "epoch": 1.8837920489296636, "grad_norm": 0.48796483874320984, "learning_rate": 4.007350771641151e-06, "loss": 1.288329005241394, "step": 1232 }, { "epoch": 1.8868501529051986, "grad_norm": 0.26645490527153015, "learning_rate": 3.993041745230562e-06, "loss": 1.2443333864212036, "step": 1234 }, { "epoch": 1.889908256880734, "grad_norm": 0.19715459644794464, "learning_rate": 3.978749859767961e-06, "loss": 1.2754254341125488, "step": 1236 }, { "epoch": 1.8929663608562692, "grad_norm": 0.2424282431602478, "learning_rate": 3.9644752778168836e-06, "loss": 1.2853577136993408, "step": 1238 }, { "epoch": 1.8960244648318043, "grad_norm": 0.22451399266719818, "learning_rate": 3.950218161744049e-06, "loss": 1.308832049369812, "step": 1240 }, { "epoch": 1.8990825688073394, "grad_norm": 0.38970160484313965, "learning_rate": 3.935978673717512e-06, "loss": 1.2945680618286133, "step": 1242 }, { "epoch": 1.9021406727828745, "grad_norm": 0.22287186980247498, "learning_rate": 3.921756975704809e-06, "loss": 1.2276027202606201, "step": 1244 }, { "epoch": 1.9051987767584098, "grad_norm": 0.2538350820541382, "learning_rate": 3.9075532294711326e-06, "loss": 1.2546557188034058, "step": 1246 }, { "epoch": 1.908256880733945, "grad_norm": 0.19810384511947632, "learning_rate": 3.893367596577475e-06, "loss": 1.2940235137939453, "step": 1248 }, { "epoch": 1.9113149847094801, "grad_norm": 0.20586298406124115, "learning_rate": 3.8792002383788044e-06, "loss": 1.3136601448059082, "step": 1250 }, { "epoch": 1.9143730886850152, "grad_norm": 0.2770041227340698, "learning_rate": 3.865051316022215e-06, "loss": 1.2952957153320312, "step": 1252 }, { "epoch": 1.9174311926605505, "grad_norm": 0.22728121280670166, "learning_rate": 3.85092099044511e-06, "loss": 1.271630048751831, "step": 1254 }, { "epoch": 1.9204892966360856, "grad_norm": 0.1984010934829712, "learning_rate": 3.836809422373354e-06, "loss": 1.2360022068023682, "step": 1256 }, { "epoch": 1.9235474006116209, "grad_norm": 0.24555295705795288, "learning_rate": 3.822716772319463e-06, "loss": 1.271683692932129, "step": 1258 }, { "epoch": 1.926605504587156, "grad_norm": 0.20771312713623047, "learning_rate": 3.8086432005807616e-06, "loss": 1.2962419986724854, "step": 1260 }, { "epoch": 1.929663608562691, "grad_norm": 0.268265962600708, "learning_rate": 3.794588867237574e-06, "loss": 1.2458467483520508, "step": 1262 }, { "epoch": 1.9327217125382263, "grad_norm": 0.3802253007888794, "learning_rate": 3.780553932151392e-06, "loss": 1.2733559608459473, "step": 1264 }, { "epoch": 1.9357798165137616, "grad_norm": 0.6309070587158203, "learning_rate": 3.766538554963062e-06, "loss": 1.270596981048584, "step": 1266 }, { "epoch": 1.9388379204892967, "grad_norm": 0.3053569793701172, "learning_rate": 3.752542895090969e-06, "loss": 1.3194211721420288, "step": 1268 }, { "epoch": 1.9418960244648318, "grad_norm": 0.21923166513442993, "learning_rate": 3.7385671117292245e-06, "loss": 1.3323618173599243, "step": 1270 }, { "epoch": 1.9449541284403669, "grad_norm": 0.2166883647441864, "learning_rate": 3.72461136384585e-06, "loss": 1.2965784072875977, "step": 1272 }, { "epoch": 1.9480122324159022, "grad_norm": 0.2825508117675781, "learning_rate": 3.710675810180977e-06, "loss": 1.3159446716308594, "step": 1274 }, { "epoch": 1.9510703363914375, "grad_norm": 0.299638956785202, "learning_rate": 3.696760609245035e-06, "loss": 1.2833199501037598, "step": 1276 }, { "epoch": 1.9541284403669725, "grad_norm": 0.2223178744316101, "learning_rate": 3.68286591931695e-06, "loss": 1.22653329372406, "step": 1278 }, { "epoch": 1.9571865443425076, "grad_norm": 0.2592408359050751, "learning_rate": 3.668991898442347e-06, "loss": 1.2542335987091064, "step": 1280 }, { "epoch": 1.9602446483180427, "grad_norm": 0.2755810618400574, "learning_rate": 3.6551387044317464e-06, "loss": 1.2745262384414673, "step": 1282 }, { "epoch": 1.963302752293578, "grad_norm": 0.21057268977165222, "learning_rate": 3.6413064948587773e-06, "loss": 1.2521765232086182, "step": 1284 }, { "epoch": 1.9663608562691133, "grad_norm": 0.34427741169929504, "learning_rate": 3.6274954270583797e-06, "loss": 1.263521432876587, "step": 1286 }, { "epoch": 1.9694189602446484, "grad_norm": 0.2196524441242218, "learning_rate": 3.6137056581250142e-06, "loss": 1.3154864311218262, "step": 1288 }, { "epoch": 1.9724770642201834, "grad_norm": 0.3191309869289398, "learning_rate": 3.599937344910872e-06, "loss": 1.2999801635742188, "step": 1290 }, { "epoch": 1.9755351681957185, "grad_norm": 0.22587168216705322, "learning_rate": 3.5861906440241057e-06, "loss": 1.3176116943359375, "step": 1292 }, { "epoch": 1.9785932721712538, "grad_norm": 0.2769485414028168, "learning_rate": 3.5724657118270344e-06, "loss": 1.273116111755371, "step": 1294 }, { "epoch": 1.981651376146789, "grad_norm": 0.3299882411956787, "learning_rate": 3.558762704434361e-06, "loss": 1.268465280532837, "step": 1296 }, { "epoch": 1.9847094801223242, "grad_norm": 0.26859885454177856, "learning_rate": 3.545081777711412e-06, "loss": 1.2919847965240479, "step": 1298 }, { "epoch": 1.9877675840978593, "grad_norm": 0.9502137899398804, "learning_rate": 3.5314230872723564e-06, "loss": 1.342604160308838, "step": 1300 }, { "epoch": 1.9908256880733946, "grad_norm": 0.2677958011627197, "learning_rate": 3.5177867884784334e-06, "loss": 1.3786706924438477, "step": 1302 }, { "epoch": 1.9938837920489296, "grad_norm": 0.40644171833992004, "learning_rate": 3.504173036436186e-06, "loss": 1.7326993942260742, "step": 1304 }, { "epoch": 1.996941896024465, "grad_norm": 0.45419755578041077, "learning_rate": 3.4905819859957002e-06, "loss": 1.7214076519012451, "step": 1306 }, { "epoch": 2.0, "grad_norm": 0.9430392980575562, "learning_rate": 3.4770137917488454e-06, "loss": 1.8467901945114136, "step": 1308 }, { "epoch": 2.003058103975535, "grad_norm": 0.26824504137039185, "learning_rate": 3.463468608027505e-06, "loss": 1.4361066818237305, "step": 1310 }, { "epoch": 2.00611620795107, "grad_norm": 0.22578075528144836, "learning_rate": 3.4499465889018337e-06, "loss": 1.394030213356018, "step": 1312 }, { "epoch": 2.0091743119266057, "grad_norm": 0.26776137948036194, "learning_rate": 3.4364478881785002e-06, "loss": 1.4127156734466553, "step": 1314 }, { "epoch": 2.0122324159021407, "grad_norm": 0.3707635998725891, "learning_rate": 3.4229726593989353e-06, "loss": 1.340601921081543, "step": 1316 }, { "epoch": 2.015290519877676, "grad_norm": 0.23890726268291473, "learning_rate": 3.409521055837586e-06, "loss": 1.5300512313842773, "step": 1318 }, { "epoch": 2.018348623853211, "grad_norm": 0.21163959801197052, "learning_rate": 3.396093230500176e-06, "loss": 1.4162603616714478, "step": 1320 }, { "epoch": 2.021406727828746, "grad_norm": 0.3320009112358093, "learning_rate": 3.3826893361219614e-06, "loss": 1.3640984296798706, "step": 1322 }, { "epoch": 2.0244648318042815, "grad_norm": 0.2645728886127472, "learning_rate": 3.3693095251659975e-06, "loss": 1.4446080923080444, "step": 1324 }, { "epoch": 2.0275229357798166, "grad_norm": 0.2824868857860565, "learning_rate": 3.3559539498213965e-06, "loss": 1.3105710744857788, "step": 1326 }, { "epoch": 2.0305810397553516, "grad_norm": 0.23126038908958435, "learning_rate": 3.342622762001606e-06, "loss": 1.3857829570770264, "step": 1328 }, { "epoch": 2.0336391437308867, "grad_norm": 0.3670974671840668, "learning_rate": 3.3293161133426777e-06, "loss": 1.496924638748169, "step": 1330 }, { "epoch": 2.036697247706422, "grad_norm": 0.3528394401073456, "learning_rate": 3.3160341552015375e-06, "loss": 1.4135003089904785, "step": 1332 }, { "epoch": 2.0397553516819573, "grad_norm": 0.20478151738643646, "learning_rate": 3.3027770386542706e-06, "loss": 1.2156240940093994, "step": 1334 }, { "epoch": 2.0428134556574924, "grad_norm": 0.46617865562438965, "learning_rate": 3.289544914494403e-06, "loss": 1.3763898611068726, "step": 1336 }, { "epoch": 2.0458715596330275, "grad_norm": 0.3884037733078003, "learning_rate": 3.276337933231179e-06, "loss": 1.622403860092163, "step": 1338 }, { "epoch": 2.0489296636085625, "grad_norm": 0.25180479884147644, "learning_rate": 3.2631562450878597e-06, "loss": 1.2860331535339355, "step": 1340 }, { "epoch": 2.051987767584098, "grad_norm": 0.3756599426269531, "learning_rate": 3.2500000000000015e-06, "loss": 1.4189289808273315, "step": 1342 }, { "epoch": 2.055045871559633, "grad_norm": 0.32630693912506104, "learning_rate": 3.236869347613764e-06, "loss": 1.308931827545166, "step": 1344 }, { "epoch": 2.058103975535168, "grad_norm": 0.28512176871299744, "learning_rate": 3.2237644372842016e-06, "loss": 1.2988288402557373, "step": 1346 }, { "epoch": 2.0611620795107033, "grad_norm": 0.19952069222927094, "learning_rate": 3.2106854180735625e-06, "loss": 1.3092859983444214, "step": 1348 }, { "epoch": 2.0642201834862384, "grad_norm": 0.24031268060207367, "learning_rate": 3.1976324387495948e-06, "loss": 1.3389842510223389, "step": 1350 }, { "epoch": 2.067278287461774, "grad_norm": 0.26569297909736633, "learning_rate": 3.1846056477838572e-06, "loss": 1.5241750478744507, "step": 1352 }, { "epoch": 2.070336391437309, "grad_norm": 0.5251048803329468, "learning_rate": 3.171605193350028e-06, "loss": 1.542860507965088, "step": 1354 }, { "epoch": 2.073394495412844, "grad_norm": 0.34643858671188354, "learning_rate": 3.158631223322216e-06, "loss": 1.3612843751907349, "step": 1356 }, { "epoch": 2.076452599388379, "grad_norm": 0.2934923470020294, "learning_rate": 3.145683885273288e-06, "loss": 1.355604648590088, "step": 1358 }, { "epoch": 2.079510703363914, "grad_norm": 0.743224024772644, "learning_rate": 3.1327633264731806e-06, "loss": 1.341210126876831, "step": 1360 }, { "epoch": 2.0825688073394497, "grad_norm": 0.32269051671028137, "learning_rate": 3.11986969388723e-06, "loss": 1.4118154048919678, "step": 1362 }, { "epoch": 2.085626911314985, "grad_norm": 0.29159843921661377, "learning_rate": 3.1070031341744983e-06, "loss": 1.389265775680542, "step": 1364 }, { "epoch": 2.08868501529052, "grad_norm": 0.24911250174045563, "learning_rate": 3.094163793686108e-06, "loss": 1.422662377357483, "step": 1366 }, { "epoch": 2.091743119266055, "grad_norm": 0.21826767921447754, "learning_rate": 3.0813518184635737e-06, "loss": 1.4053363800048828, "step": 1368 }, { "epoch": 2.09480122324159, "grad_norm": 0.3076784610748291, "learning_rate": 3.0685673542371465e-06, "loss": 1.283433198928833, "step": 1370 }, { "epoch": 2.0978593272171255, "grad_norm": 0.17591321468353271, "learning_rate": 3.0558105464241466e-06, "loss": 1.237450361251831, "step": 1372 }, { "epoch": 2.1009174311926606, "grad_norm": 0.2663421332836151, "learning_rate": 3.0430815401273206e-06, "loss": 1.3944424390792847, "step": 1374 }, { "epoch": 2.1039755351681957, "grad_norm": 0.26904943585395813, "learning_rate": 3.030380480133186e-06, "loss": 1.5187671184539795, "step": 1376 }, { "epoch": 2.1070336391437308, "grad_norm": 0.6649749279022217, "learning_rate": 3.017707510910378e-06, "loss": 1.3504502773284912, "step": 1378 }, { "epoch": 2.1100917431192663, "grad_norm": 0.37516942620277405, "learning_rate": 3.0050627766080188e-06, "loss": 1.5420799255371094, "step": 1380 }, { "epoch": 2.1131498470948014, "grad_norm": 0.342439204454422, "learning_rate": 2.9924464210540717e-06, "loss": 1.5547534227371216, "step": 1382 }, { "epoch": 2.1162079510703364, "grad_norm": 0.48497647047042847, "learning_rate": 2.979858587753698e-06, "loss": 1.3153679370880127, "step": 1384 }, { "epoch": 2.1192660550458715, "grad_norm": 0.39512813091278076, "learning_rate": 2.96729941988764e-06, "loss": 1.2663487195968628, "step": 1386 }, { "epoch": 2.1223241590214066, "grad_norm": 0.3283194899559021, "learning_rate": 2.9547690603105774e-06, "loss": 1.4247238636016846, "step": 1388 }, { "epoch": 2.1253822629969417, "grad_norm": 0.3506661355495453, "learning_rate": 2.942267651549513e-06, "loss": 1.2393386363983154, "step": 1390 }, { "epoch": 2.128440366972477, "grad_norm": 0.3594140112400055, "learning_rate": 2.9297953358021487e-06, "loss": 1.317380666732788, "step": 1392 }, { "epoch": 2.1314984709480123, "grad_norm": 0.5971735715866089, "learning_rate": 2.9173522549352608e-06, "loss": 1.2773442268371582, "step": 1394 }, { "epoch": 2.1345565749235473, "grad_norm": 0.3666265606880188, "learning_rate": 2.9049385504830987e-06, "loss": 1.34925377368927, "step": 1396 }, { "epoch": 2.1376146788990824, "grad_norm": 0.31561410427093506, "learning_rate": 2.892554363645766e-06, "loss": 1.2674505710601807, "step": 1398 }, { "epoch": 2.140672782874618, "grad_norm": 0.2038232684135437, "learning_rate": 2.880199835287618e-06, "loss": 1.3169916868209839, "step": 1400 }, { "epoch": 2.143730886850153, "grad_norm": 0.25303685665130615, "learning_rate": 2.867875105935658e-06, "loss": 1.4587633609771729, "step": 1402 }, { "epoch": 2.146788990825688, "grad_norm": 0.31143543124198914, "learning_rate": 2.8555803157779384e-06, "loss": 1.3396885395050049, "step": 1404 }, { "epoch": 2.149847094801223, "grad_norm": 0.2281101942062378, "learning_rate": 2.8433156046619705e-06, "loss": 1.2936108112335205, "step": 1406 }, { "epoch": 2.1529051987767582, "grad_norm": 0.3648523688316345, "learning_rate": 2.831081112093129e-06, "loss": 1.5100679397583008, "step": 1408 }, { "epoch": 2.1559633027522938, "grad_norm": 0.278677374124527, "learning_rate": 2.8188769772330637e-06, "loss": 1.3869754076004028, "step": 1410 }, { "epoch": 2.159021406727829, "grad_norm": 0.21437983214855194, "learning_rate": 2.806703338898123e-06, "loss": 1.3129749298095703, "step": 1412 }, { "epoch": 2.162079510703364, "grad_norm": 0.24729043245315552, "learning_rate": 2.794560335557771e-06, "loss": 1.4099204540252686, "step": 1414 }, { "epoch": 2.165137614678899, "grad_norm": 0.3120039701461792, "learning_rate": 2.7824481053330154e-06, "loss": 1.3897459506988525, "step": 1416 }, { "epoch": 2.168195718654434, "grad_norm": 0.4525415897369385, "learning_rate": 2.770366785994827e-06, "loss": 1.445647954940796, "step": 1418 }, { "epoch": 2.1712538226299696, "grad_norm": 0.4266716241836548, "learning_rate": 2.758316514962585e-06, "loss": 1.3233726024627686, "step": 1420 }, { "epoch": 2.1743119266055047, "grad_norm": 0.28266647458076477, "learning_rate": 2.7462974293025112e-06, "loss": 1.4238274097442627, "step": 1422 }, { "epoch": 2.1773700305810397, "grad_norm": 0.3248072564601898, "learning_rate": 2.7343096657261e-06, "loss": 1.3104677200317383, "step": 1424 }, { "epoch": 2.180428134556575, "grad_norm": 0.3584449887275696, "learning_rate": 2.7223533605885784e-06, "loss": 1.6277508735656738, "step": 1426 }, { "epoch": 2.18348623853211, "grad_norm": 0.35764527320861816, "learning_rate": 2.710428649887348e-06, "loss": 1.3882687091827393, "step": 1428 }, { "epoch": 2.1865443425076454, "grad_norm": 0.24804551899433136, "learning_rate": 2.6985356692604336e-06, "loss": 1.4513651132583618, "step": 1430 }, { "epoch": 2.1896024464831805, "grad_norm": 0.2202014923095703, "learning_rate": 2.686674553984951e-06, "loss": 1.4342420101165771, "step": 1432 }, { "epoch": 2.1926605504587156, "grad_norm": 0.36250677704811096, "learning_rate": 2.6748454389755576e-06, "loss": 1.394620656967163, "step": 1434 }, { "epoch": 2.1957186544342506, "grad_norm": 0.3232296109199524, "learning_rate": 2.6630484587829265e-06, "loss": 1.3978071212768555, "step": 1436 }, { "epoch": 2.198776758409786, "grad_norm": 0.4420628547668457, "learning_rate": 2.651283747592211e-06, "loss": 1.4031468629837036, "step": 1438 }, { "epoch": 2.2018348623853212, "grad_norm": 0.6229142546653748, "learning_rate": 2.639551439221516e-06, "loss": 1.3914484977722168, "step": 1440 }, { "epoch": 2.2048929663608563, "grad_norm": 0.3233772814273834, "learning_rate": 2.627851667120387e-06, "loss": 1.476043701171875, "step": 1442 }, { "epoch": 2.2079510703363914, "grad_norm": 0.35107681155204773, "learning_rate": 2.6161845643682763e-06, "loss": 1.407777190208435, "step": 1444 }, { "epoch": 2.2110091743119265, "grad_norm": 0.3123028874397278, "learning_rate": 2.6045502636730457e-06, "loss": 1.3102259635925293, "step": 1446 }, { "epoch": 2.214067278287462, "grad_norm": 0.2534146308898926, "learning_rate": 2.5929488973694406e-06, "loss": 1.2788276672363281, "step": 1448 }, { "epoch": 2.217125382262997, "grad_norm": 0.24462664127349854, "learning_rate": 2.581380597417599e-06, "loss": 1.3362743854522705, "step": 1450 }, { "epoch": 2.220183486238532, "grad_norm": 0.2978283166885376, "learning_rate": 2.569845495401542e-06, "loss": 1.2902576923370361, "step": 1452 }, { "epoch": 2.223241590214067, "grad_norm": 0.299277126789093, "learning_rate": 2.5583437225276818e-06, "loss": 1.3449206352233887, "step": 1454 }, { "epoch": 2.2262996941896023, "grad_norm": 0.36601486802101135, "learning_rate": 2.546875409623324e-06, "loss": 1.3038407564163208, "step": 1456 }, { "epoch": 2.229357798165138, "grad_norm": 0.42299339175224304, "learning_rate": 2.5354406871351833e-06, "loss": 1.5554304122924805, "step": 1458 }, { "epoch": 2.232415902140673, "grad_norm": 0.32388123869895935, "learning_rate": 2.5240396851279043e-06, "loss": 1.5746049880981445, "step": 1460 }, { "epoch": 2.235474006116208, "grad_norm": 0.39095836877822876, "learning_rate": 2.5126725332825675e-06, "loss": 1.6094728708267212, "step": 1462 }, { "epoch": 2.238532110091743, "grad_norm": 0.5842258930206299, "learning_rate": 2.501339360895231e-06, "loss": 1.5279463529586792, "step": 1464 }, { "epoch": 2.241590214067278, "grad_norm": 0.3429890275001526, "learning_rate": 2.4900402968754504e-06, "loss": 1.5856099128723145, "step": 1466 }, { "epoch": 2.2446483180428136, "grad_norm": 0.35519224405288696, "learning_rate": 2.4787754697448153e-06, "loss": 1.4757394790649414, "step": 1468 }, { "epoch": 2.2477064220183487, "grad_norm": 0.46203580498695374, "learning_rate": 2.4675450076354822e-06, "loss": 1.584846019744873, "step": 1470 }, { "epoch": 2.2507645259938838, "grad_norm": 0.8099899888038635, "learning_rate": 2.4563490382887267e-06, "loss": 1.367172360420227, "step": 1472 }, { "epoch": 2.253822629969419, "grad_norm": 0.7287035584449768, "learning_rate": 2.4451876890534847e-06, "loss": 1.492293357849121, "step": 1474 }, { "epoch": 2.2568807339449544, "grad_norm": 0.3203519284725189, "learning_rate": 2.4340610868849e-06, "loss": 1.2751667499542236, "step": 1476 }, { "epoch": 2.2599388379204894, "grad_norm": 0.6493098139762878, "learning_rate": 2.4229693583428916e-06, "loss": 1.4823472499847412, "step": 1478 }, { "epoch": 2.2629969418960245, "grad_norm": 0.4101910889148712, "learning_rate": 2.4119126295906997e-06, "loss": 1.09395170211792, "step": 1480 }, { "epoch": 2.2660550458715596, "grad_norm": 0.4682796597480774, "learning_rate": 2.400891026393464e-06, "loss": 1.0601507425308228, "step": 1482 }, { "epoch": 2.2691131498470947, "grad_norm": 0.5146844387054443, "learning_rate": 2.3899046741167868e-06, "loss": 1.2724342346191406, "step": 1484 }, { "epoch": 2.2721712538226297, "grad_norm": 0.8610156178474426, "learning_rate": 2.3789536977253034e-06, "loss": 1.3352521657943726, "step": 1486 }, { "epoch": 2.2752293577981653, "grad_norm": 1.053831696510315, "learning_rate": 2.3680382217812685e-06, "loss": 1.4391016960144043, "step": 1488 }, { "epoch": 2.2782874617737003, "grad_norm": 0.6413374543190002, "learning_rate": 2.3571583704431355e-06, "loss": 1.3907897472381592, "step": 1490 }, { "epoch": 2.2813455657492354, "grad_norm": 0.30044737458229065, "learning_rate": 2.346314267464145e-06, "loss": 1.1618599891662598, "step": 1492 }, { "epoch": 2.2844036697247705, "grad_norm": 0.3427642285823822, "learning_rate": 2.3355060361909134e-06, "loss": 1.134230375289917, "step": 1494 }, { "epoch": 2.287461773700306, "grad_norm": 0.28166523575782776, "learning_rate": 2.3247337995620363e-06, "loss": 1.357274055480957, "step": 1496 }, { "epoch": 2.290519877675841, "grad_norm": 0.7598418593406677, "learning_rate": 2.313997680106686e-06, "loss": 1.2663555145263672, "step": 1498 }, { "epoch": 2.293577981651376, "grad_norm": 1.0048569440841675, "learning_rate": 2.3032977999432205e-06, "loss": 1.2259790897369385, "step": 1500 }, { "epoch": 2.2966360856269112, "grad_norm": 0.3067741096019745, "learning_rate": 2.2926342807777886e-06, "loss": 1.435164213180542, "step": 1502 }, { "epoch": 2.2996941896024463, "grad_norm": 0.5623937249183655, "learning_rate": 2.2820072439029524e-06, "loss": 1.4023568630218506, "step": 1504 }, { "epoch": 2.302752293577982, "grad_norm": 0.3359718918800354, "learning_rate": 2.271416810196308e-06, "loss": 1.1277801990509033, "step": 1506 }, { "epoch": 2.305810397553517, "grad_norm": 0.3305533528327942, "learning_rate": 2.2608631001190994e-06, "loss": 1.3414134979248047, "step": 1508 }, { "epoch": 2.308868501529052, "grad_norm": 0.28481531143188477, "learning_rate": 2.2503462337148642e-06, "loss": 1.4879052639007568, "step": 1510 }, { "epoch": 2.311926605504587, "grad_norm": 0.28595951199531555, "learning_rate": 2.239866330608057e-06, "loss": 1.6209688186645508, "step": 1512 }, { "epoch": 2.314984709480122, "grad_norm": 0.29558923840522766, "learning_rate": 2.2294235100026933e-06, "loss": 1.6481235027313232, "step": 1514 }, { "epoch": 2.3180428134556577, "grad_norm": 0.5758782029151917, "learning_rate": 2.21901789068099e-06, "loss": 1.7679166793823242, "step": 1516 }, { "epoch": 2.3211009174311927, "grad_norm": 0.3111439347267151, "learning_rate": 2.2086495910020192e-06, "loss": 1.3151183128356934, "step": 1518 }, { "epoch": 2.324159021406728, "grad_norm": 0.44918501377105713, "learning_rate": 2.1983187289003587e-06, "loss": 1.3933916091918945, "step": 1520 }, { "epoch": 2.327217125382263, "grad_norm": 0.3173042833805084, "learning_rate": 2.188025421884754e-06, "loss": 1.240437388420105, "step": 1522 }, { "epoch": 2.330275229357798, "grad_norm": 0.2350539118051529, "learning_rate": 2.1777697870367713e-06, "loss": 1.1647779941558838, "step": 1524 }, { "epoch": 2.3333333333333335, "grad_norm": 0.3137843906879425, "learning_rate": 2.1675519410094803e-06, "loss": 1.5445265769958496, "step": 1526 }, { "epoch": 2.3363914373088686, "grad_norm": 0.5268841981887817, "learning_rate": 2.157372000026119e-06, "loss": 1.444595217704773, "step": 1528 }, { "epoch": 2.3394495412844036, "grad_norm": 0.3506692349910736, "learning_rate": 2.1472300798787746e-06, "loss": 1.6354224681854248, "step": 1530 }, { "epoch": 2.3425076452599387, "grad_norm": 0.3233583867549896, "learning_rate": 2.1371262959270594e-06, "loss": 1.1021732091903687, "step": 1532 }, { "epoch": 2.3455657492354742, "grad_norm": 0.29296091198921204, "learning_rate": 2.1270607630968104e-06, "loss": 1.3453254699707031, "step": 1534 }, { "epoch": 2.3486238532110093, "grad_norm": 0.3317727744579315, "learning_rate": 2.1170335958787736e-06, "loss": 1.607575535774231, "step": 1536 }, { "epoch": 2.3516819571865444, "grad_norm": 0.2295382171869278, "learning_rate": 2.1070449083273047e-06, "loss": 1.3497262001037598, "step": 1538 }, { "epoch": 2.3547400611620795, "grad_norm": 0.4568946957588196, "learning_rate": 2.0970948140590672e-06, "loss": 1.509822130203247, "step": 1540 }, { "epoch": 2.3577981651376145, "grad_norm": 0.34416595101356506, "learning_rate": 2.08718342625175e-06, "loss": 1.385573148727417, "step": 1542 }, { "epoch": 2.3608562691131496, "grad_norm": 0.33610644936561584, "learning_rate": 2.077310857642772e-06, "loss": 1.3133833408355713, "step": 1544 }, { "epoch": 2.363914373088685, "grad_norm": 0.332163006067276, "learning_rate": 2.067477220527998e-06, "loss": 1.3794035911560059, "step": 1546 }, { "epoch": 2.36697247706422, "grad_norm": 0.46091410517692566, "learning_rate": 2.05768262676047e-06, "loss": 1.4221172332763672, "step": 1548 }, { "epoch": 2.3700305810397553, "grad_norm": 0.2670794427394867, "learning_rate": 2.0479271877491278e-06, "loss": 1.2908828258514404, "step": 1550 }, { "epoch": 2.3730886850152904, "grad_norm": 0.31927385926246643, "learning_rate": 2.038211014457546e-06, "loss": 1.3988337516784668, "step": 1552 }, { "epoch": 2.376146788990826, "grad_norm": 0.4126211404800415, "learning_rate": 2.028534217402667e-06, "loss": 1.7016716003417969, "step": 1554 }, { "epoch": 2.379204892966361, "grad_norm": 0.6094360947608948, "learning_rate": 2.0188969066535484e-06, "loss": 2.0326876640319824, "step": 1556 }, { "epoch": 2.382262996941896, "grad_norm": 0.40967652201652527, "learning_rate": 2.0092991918301106e-06, "loss": 1.3301377296447754, "step": 1558 }, { "epoch": 2.385321100917431, "grad_norm": 0.6155174970626831, "learning_rate": 1.9997411821018885e-06, "loss": 1.319265604019165, "step": 1560 }, { "epoch": 2.388379204892966, "grad_norm": 0.4441206455230713, "learning_rate": 1.990222986186786e-06, "loss": 1.3922169208526611, "step": 1562 }, { "epoch": 2.3914373088685017, "grad_norm": 0.5924298167228699, "learning_rate": 1.980744712349849e-06, "loss": 1.4741730690002441, "step": 1564 }, { "epoch": 2.3944954128440368, "grad_norm": 0.42252296209335327, "learning_rate": 1.9713064684020262e-06, "loss": 1.4076108932495117, "step": 1566 }, { "epoch": 2.397553516819572, "grad_norm": 0.36031708121299744, "learning_rate": 1.9619083616989457e-06, "loss": 1.278861403465271, "step": 1568 }, { "epoch": 2.400611620795107, "grad_norm": 0.24064381420612335, "learning_rate": 1.952550499139689e-06, "loss": 1.19804048538208, "step": 1570 }, { "epoch": 2.4036697247706424, "grad_norm": 0.18197159469127655, "learning_rate": 1.9432329871655837e-06, "loss": 1.12447988986969, "step": 1572 }, { "epoch": 2.4067278287461775, "grad_norm": 0.30438297986984253, "learning_rate": 1.933955931758988e-06, "loss": 1.2643486261367798, "step": 1574 }, { "epoch": 2.4097859327217126, "grad_norm": 0.5426669120788574, "learning_rate": 1.9247194384420855e-06, "loss": 1.504340410232544, "step": 1576 }, { "epoch": 2.4128440366972477, "grad_norm": 0.6118716597557068, "learning_rate": 1.915523612275681e-06, "loss": 1.5359920263290405, "step": 1578 }, { "epoch": 2.4159021406727827, "grad_norm": 0.5290548801422119, "learning_rate": 1.9063685578580137e-06, "loss": 1.5219250917434692, "step": 1580 }, { "epoch": 2.418960244648318, "grad_norm": 0.348886638879776, "learning_rate": 1.8972543793235626e-06, "loss": 1.5620722770690918, "step": 1582 }, { "epoch": 2.4220183486238533, "grad_norm": 0.4480542838573456, "learning_rate": 1.8881811803418624e-06, "loss": 1.3870704174041748, "step": 1584 }, { "epoch": 2.4250764525993884, "grad_norm": 0.6594481468200684, "learning_rate": 1.8791490641163218e-06, "loss": 1.5246330499649048, "step": 1586 }, { "epoch": 2.4281345565749235, "grad_norm": 0.48964548110961914, "learning_rate": 1.870158133383055e-06, "loss": 1.4073295593261719, "step": 1588 }, { "epoch": 2.4311926605504586, "grad_norm": 0.40440455079078674, "learning_rate": 1.8612084904097117e-06, "loss": 1.329315423965454, "step": 1590 }, { "epoch": 2.434250764525994, "grad_norm": 0.3714819550514221, "learning_rate": 1.852300236994308e-06, "loss": 1.3444490432739258, "step": 1592 }, { "epoch": 2.437308868501529, "grad_norm": 0.5145377516746521, "learning_rate": 1.8434334744640763e-06, "loss": 1.5467479228973389, "step": 1594 }, { "epoch": 2.4403669724770642, "grad_norm": 0.46002912521362305, "learning_rate": 1.8346083036743104e-06, "loss": 1.289878249168396, "step": 1596 }, { "epoch": 2.4434250764525993, "grad_norm": 0.793483555316925, "learning_rate": 1.8258248250072158e-06, "loss": 1.4660496711730957, "step": 1598 }, { "epoch": 2.4464831804281344, "grad_norm": 0.44911351799964905, "learning_rate": 1.8170831383707683e-06, "loss": 1.3652875423431396, "step": 1600 }, { "epoch": 2.44954128440367, "grad_norm": 0.38207677006721497, "learning_rate": 1.8083833431975805e-06, "loss": 1.3762791156768799, "step": 1602 }, { "epoch": 2.452599388379205, "grad_norm": 0.4357513189315796, "learning_rate": 1.7997255384437695e-06, "loss": 1.5232503414154053, "step": 1604 }, { "epoch": 2.45565749235474, "grad_norm": 0.3423779308795929, "learning_rate": 1.7911098225878309e-06, "loss": 1.5271486043930054, "step": 1606 }, { "epoch": 2.458715596330275, "grad_norm": 5.960415363311768, "learning_rate": 1.7825362936295171e-06, "loss": 1.3485842943191528, "step": 1608 }, { "epoch": 2.46177370030581, "grad_norm": 0.36111417412757874, "learning_rate": 1.774005049088725e-06, "loss": 1.2900433540344238, "step": 1610 }, { "epoch": 2.4648318042813457, "grad_norm": 0.33147767186164856, "learning_rate": 1.7655161860043873e-06, "loss": 1.4210761785507202, "step": 1612 }, { "epoch": 2.467889908256881, "grad_norm": 0.3786766231060028, "learning_rate": 1.7570698009333664e-06, "loss": 1.370017409324646, "step": 1614 }, { "epoch": 2.470948012232416, "grad_norm": 1.8267617225646973, "learning_rate": 1.7486659899493537e-06, "loss": 1.5153461694717407, "step": 1616 }, { "epoch": 2.474006116207951, "grad_norm": 0.3199278712272644, "learning_rate": 1.740304848641787e-06, "loss": 1.3838684558868408, "step": 1618 }, { "epoch": 2.477064220183486, "grad_norm": 0.3670620322227478, "learning_rate": 1.731986472114751e-06, "loss": 1.33723783493042, "step": 1620 }, { "epoch": 2.4801223241590216, "grad_norm": 0.36861374974250793, "learning_rate": 1.7237109549859043e-06, "loss": 1.2932226657867432, "step": 1622 }, { "epoch": 2.4831804281345566, "grad_norm": 0.34438320994377136, "learning_rate": 1.7154783913853968e-06, "loss": 1.42689049243927, "step": 1624 }, { "epoch": 2.4862385321100917, "grad_norm": 0.23838122189044952, "learning_rate": 1.7072888749548033e-06, "loss": 1.4100431203842163, "step": 1626 }, { "epoch": 2.489296636085627, "grad_norm": 0.46484264731407166, "learning_rate": 1.6991424988460592e-06, "loss": 1.3829045295715332, "step": 1628 }, { "epoch": 2.4923547400611623, "grad_norm": 0.3008574843406677, "learning_rate": 1.6910393557203964e-06, "loss": 1.5693084001541138, "step": 1630 }, { "epoch": 2.4954128440366974, "grad_norm": 0.37115153670310974, "learning_rate": 1.6829795377472908e-06, "loss": 1.7590757608413696, "step": 1632 }, { "epoch": 2.4984709480122325, "grad_norm": 0.616698682308197, "learning_rate": 1.674963136603417e-06, "loss": 1.6397650241851807, "step": 1634 }, { "epoch": 2.5015290519877675, "grad_norm": 0.384959876537323, "learning_rate": 1.6669902434716046e-06, "loss": 1.6299896240234375, "step": 1636 }, { "epoch": 2.5045871559633026, "grad_norm": 0.8294275403022766, "learning_rate": 1.6590609490397958e-06, "loss": 1.5394856929779053, "step": 1638 }, { "epoch": 2.5076452599388377, "grad_norm": 0.40894415974617004, "learning_rate": 1.6511753435000205e-06, "loss": 1.2182371616363525, "step": 1640 }, { "epoch": 2.510703363914373, "grad_norm": 0.45905759930610657, "learning_rate": 1.6433335165473686e-06, "loss": 1.2023439407348633, "step": 1642 }, { "epoch": 2.5137614678899083, "grad_norm": 0.38532376289367676, "learning_rate": 1.635535557378968e-06, "loss": 1.6095008850097656, "step": 1644 }, { "epoch": 2.5168195718654434, "grad_norm": 1.44415283203125, "learning_rate": 1.6277815546929688e-06, "loss": 1.6082322597503662, "step": 1646 }, { "epoch": 2.5198776758409784, "grad_norm": 0.5093996524810791, "learning_rate": 1.6200715966875394e-06, "loss": 1.7141090631484985, "step": 1648 }, { "epoch": 2.522935779816514, "grad_norm": 0.5241023898124695, "learning_rate": 1.6124057710598603e-06, "loss": 1.6450610160827637, "step": 1650 }, { "epoch": 2.525993883792049, "grad_norm": 0.49204516410827637, "learning_rate": 1.6047841650051272e-06, "loss": 1.6974513530731201, "step": 1652 }, { "epoch": 2.529051987767584, "grad_norm": 0.8506813049316406, "learning_rate": 1.5972068652155554e-06, "loss": 1.5313912630081177, "step": 1654 }, { "epoch": 2.532110091743119, "grad_norm": 0.33754727244377136, "learning_rate": 1.5896739578794e-06, "loss": 1.5209699869155884, "step": 1656 }, { "epoch": 2.5351681957186543, "grad_norm": 0.7774704694747925, "learning_rate": 1.5821855286799742e-06, "loss": 1.4035563468933105, "step": 1658 }, { "epoch": 2.5382262996941893, "grad_norm": 0.6433319449424744, "learning_rate": 1.5747416627946673e-06, "loss": 1.665273666381836, "step": 1660 }, { "epoch": 2.541284403669725, "grad_norm": 0.6971220970153809, "learning_rate": 1.5673424448939887e-06, "loss": 1.5019344091415405, "step": 1662 }, { "epoch": 2.54434250764526, "grad_norm": 0.40314802527427673, "learning_rate": 1.5599879591405917e-06, "loss": 1.1620054244995117, "step": 1664 }, { "epoch": 2.547400611620795, "grad_norm": 0.48018017411231995, "learning_rate": 1.552678289188326e-06, "loss": 1.6923828125, "step": 1666 }, { "epoch": 2.5504587155963305, "grad_norm": 0.4809359312057495, "learning_rate": 1.545413518181283e-06, "loss": 1.7656713724136353, "step": 1668 }, { "epoch": 2.5535168195718656, "grad_norm": 0.40401753783226013, "learning_rate": 1.5381937287528449e-06, "loss": 1.8313161134719849, "step": 1670 }, { "epoch": 2.5565749235474007, "grad_norm": 0.4581202268600464, "learning_rate": 1.5310190030247546e-06, "loss": 1.7572789192199707, "step": 1672 }, { "epoch": 2.5596330275229358, "grad_norm": 0.9305920600891113, "learning_rate": 1.5238894226061737e-06, "loss": 1.7307026386260986, "step": 1674 }, { "epoch": 2.562691131498471, "grad_norm": 0.47380930185317993, "learning_rate": 1.5168050685927566e-06, "loss": 1.5947740077972412, "step": 1676 }, { "epoch": 2.565749235474006, "grad_norm": 1.2263463735580444, "learning_rate": 1.5097660215657306e-06, "loss": 1.4555588960647583, "step": 1678 }, { "epoch": 2.5688073394495414, "grad_norm": 0.43118909001350403, "learning_rate": 1.5027723615909745e-06, "loss": 1.0147868394851685, "step": 1680 }, { "epoch": 2.5718654434250765, "grad_norm": 0.5391921401023865, "learning_rate": 1.4958241682181137e-06, "loss": 1.0223249197006226, "step": 1682 }, { "epoch": 2.5749235474006116, "grad_norm": 0.2522028386592865, "learning_rate": 1.4889215204796082e-06, "loss": 1.250197172164917, "step": 1684 }, { "epoch": 2.5779816513761467, "grad_norm": 0.29159918427467346, "learning_rate": 1.4820644968898605e-06, "loss": 1.1835776567459106, "step": 1686 }, { "epoch": 2.581039755351682, "grad_norm": 0.2946909964084625, "learning_rate": 1.47525317544432e-06, "loss": 1.1374409198760986, "step": 1688 }, { "epoch": 2.5840978593272173, "grad_norm": 0.19036340713500977, "learning_rate": 1.468487633618594e-06, "loss": 1.1817882061004639, "step": 1690 }, { "epoch": 2.5871559633027523, "grad_norm": 1.4873279333114624, "learning_rate": 1.4617679483675673e-06, "loss": 1.4171775579452515, "step": 1692 }, { "epoch": 2.5902140672782874, "grad_norm": 0.32151684165000916, "learning_rate": 1.4550941961245288e-06, "loss": 1.3625459671020508, "step": 1694 }, { "epoch": 2.5932721712538225, "grad_norm": 0.26637983322143555, "learning_rate": 1.4484664528003026e-06, "loss": 1.2058180570602417, "step": 1696 }, { "epoch": 2.5963302752293576, "grad_norm": 0.5087877511978149, "learning_rate": 1.4418847937823784e-06, "loss": 1.425114631652832, "step": 1698 }, { "epoch": 2.599388379204893, "grad_norm": 0.9368872046470642, "learning_rate": 1.4353492939340618e-06, "loss": 1.4749643802642822, "step": 1700 }, { "epoch": 2.602446483180428, "grad_norm": 0.48912081122398376, "learning_rate": 1.4288600275936184e-06, "loss": 1.245436668395996, "step": 1702 }, { "epoch": 2.6055045871559632, "grad_norm": 0.4674423635005951, "learning_rate": 1.4224170685734303e-06, "loss": 1.4404422044754028, "step": 1704 }, { "epoch": 2.6085626911314987, "grad_norm": 0.7305318117141724, "learning_rate": 1.416020490159152e-06, "loss": 1.6482999324798584, "step": 1706 }, { "epoch": 2.611620795107034, "grad_norm": 0.5728065371513367, "learning_rate": 1.4096703651088848e-06, "loss": 1.1557910442352295, "step": 1708 }, { "epoch": 2.614678899082569, "grad_norm": 0.6479355096817017, "learning_rate": 1.4033667656523405e-06, "loss": 1.4093899726867676, "step": 1710 }, { "epoch": 2.617737003058104, "grad_norm": 1.1274484395980835, "learning_rate": 1.3971097634900262e-06, "loss": 1.4923943281173706, "step": 1712 }, { "epoch": 2.620795107033639, "grad_norm": 0.5374640822410583, "learning_rate": 1.3908994297924275e-06, "loss": 1.3800336122512817, "step": 1714 }, { "epoch": 2.623853211009174, "grad_norm": 0.6038364171981812, "learning_rate": 1.3847358351991945e-06, "loss": 1.2194199562072754, "step": 1716 }, { "epoch": 2.6269113149847096, "grad_norm": 0.7064008712768555, "learning_rate": 1.3786190498183446e-06, "loss": 0.8604775667190552, "step": 1718 }, { "epoch": 2.6299694189602447, "grad_norm": 0.3798482418060303, "learning_rate": 1.3725491432254627e-06, "loss": 1.5459158420562744, "step": 1720 }, { "epoch": 2.63302752293578, "grad_norm": 0.47553232312202454, "learning_rate": 1.3665261844629053e-06, "loss": 1.466538429260254, "step": 1722 }, { "epoch": 2.636085626911315, "grad_norm": 0.3397771716117859, "learning_rate": 1.360550242039024e-06, "loss": 1.3562582731246948, "step": 1724 }, { "epoch": 2.6391437308868504, "grad_norm": 0.282279908657074, "learning_rate": 1.354621383927379e-06, "loss": 1.4752657413482666, "step": 1726 }, { "epoch": 2.6422018348623855, "grad_norm": 0.3183048963546753, "learning_rate": 1.3487396775659691e-06, "loss": 1.4154858589172363, "step": 1728 }, { "epoch": 2.6452599388379205, "grad_norm": 0.4210142493247986, "learning_rate": 1.3429051898564623e-06, "loss": 1.3750901222229004, "step": 1730 }, { "epoch": 2.6483180428134556, "grad_norm": 0.6870266795158386, "learning_rate": 1.337117987163439e-06, "loss": 1.5814931392669678, "step": 1732 }, { "epoch": 2.6513761467889907, "grad_norm": 0.4824894964694977, "learning_rate": 1.3313781353136329e-06, "loss": 1.2281584739685059, "step": 1734 }, { "epoch": 2.6544342507645258, "grad_norm": 0.2543982267379761, "learning_rate": 1.3256856995951852e-06, "loss": 1.0042641162872314, "step": 1736 }, { "epoch": 2.6574923547400613, "grad_norm": 0.39150846004486084, "learning_rate": 1.3200407447568985e-06, "loss": 1.6282243728637695, "step": 1738 }, { "epoch": 2.6605504587155964, "grad_norm": 0.43744921684265137, "learning_rate": 1.3144433350075045e-06, "loss": 1.419670820236206, "step": 1740 }, { "epoch": 2.6636085626911314, "grad_norm": 0.5169599652290344, "learning_rate": 1.3088935340149312e-06, "loss": 1.5492973327636719, "step": 1742 }, { "epoch": 2.6666666666666665, "grad_norm": 0.3686998188495636, "learning_rate": 1.3033914049055776e-06, "loss": 1.390296459197998, "step": 1744 }, { "epoch": 2.669724770642202, "grad_norm": 0.3961811363697052, "learning_rate": 1.2979370102636001e-06, "loss": 1.6185352802276611, "step": 1746 }, { "epoch": 2.672782874617737, "grad_norm": 0.4181622266769409, "learning_rate": 1.2925304121301956e-06, "loss": 1.47446608543396, "step": 1748 }, { "epoch": 2.675840978593272, "grad_norm": 0.5175849199295044, "learning_rate": 1.2871716720029001e-06, "loss": 1.4941065311431885, "step": 1750 }, { "epoch": 2.6788990825688073, "grad_norm": 0.4671924412250519, "learning_rate": 1.2818608508348831e-06, "loss": 1.3738720417022705, "step": 1752 }, { "epoch": 2.6819571865443423, "grad_norm": 0.31229135394096375, "learning_rate": 1.2765980090342638e-06, "loss": 1.0343739986419678, "step": 1754 }, { "epoch": 2.6850152905198774, "grad_norm": 0.5780667662620544, "learning_rate": 1.2713832064634127e-06, "loss": 1.4987692832946777, "step": 1756 }, { "epoch": 2.688073394495413, "grad_norm": 0.29605942964553833, "learning_rate": 1.2662165024382813e-06, "loss": 1.4711230993270874, "step": 1758 }, { "epoch": 2.691131498470948, "grad_norm": 0.4572795629501343, "learning_rate": 1.2610979557277186e-06, "loss": 1.4898228645324707, "step": 1760 }, { "epoch": 2.694189602446483, "grad_norm": 0.5139583945274353, "learning_rate": 1.2560276245528099e-06, "loss": 1.4924449920654297, "step": 1762 }, { "epoch": 2.6972477064220186, "grad_norm": 0.3455151319503784, "learning_rate": 1.251005566586209e-06, "loss": 1.3008229732513428, "step": 1764 }, { "epoch": 2.7003058103975537, "grad_norm": 0.5034812092781067, "learning_rate": 1.2460318389514868e-06, "loss": 1.5259795188903809, "step": 1766 }, { "epoch": 2.7033639143730888, "grad_norm": 0.55739825963974, "learning_rate": 1.241106498222476e-06, "loss": 1.610971212387085, "step": 1768 }, { "epoch": 2.706422018348624, "grad_norm": 0.3922676146030426, "learning_rate": 1.2362296004226327e-06, "loss": 1.3188968896865845, "step": 1770 }, { "epoch": 2.709480122324159, "grad_norm": 0.4953126311302185, "learning_rate": 1.2314012010243973e-06, "loss": 1.5828558206558228, "step": 1772 }, { "epoch": 2.712538226299694, "grad_norm": 0.6791023015975952, "learning_rate": 1.2266213549485638e-06, "loss": 1.3703022003173828, "step": 1774 }, { "epoch": 2.7155963302752295, "grad_norm": 0.37211811542510986, "learning_rate": 1.2218901165636526e-06, "loss": 1.504420280456543, "step": 1776 }, { "epoch": 2.7186544342507646, "grad_norm": 0.2997111678123474, "learning_rate": 1.2172075396852972e-06, "loss": 1.442054271697998, "step": 1778 }, { "epoch": 2.7217125382262997, "grad_norm": 0.3290131390094757, "learning_rate": 1.212573677575627e-06, "loss": 1.5728079080581665, "step": 1780 }, { "epoch": 2.7247706422018347, "grad_norm": 0.3726375102996826, "learning_rate": 1.2079885829426653e-06, "loss": 1.6637623310089111, "step": 1782 }, { "epoch": 2.7278287461773703, "grad_norm": 0.7502315640449524, "learning_rate": 1.2034523079397264e-06, "loss": 1.550297737121582, "step": 1784 }, { "epoch": 2.7308868501529053, "grad_norm": 0.3677420914173126, "learning_rate": 1.1989649041648244e-06, "loss": 1.3913054466247559, "step": 1786 }, { "epoch": 2.7339449541284404, "grad_norm": 0.6194299459457397, "learning_rate": 1.1945264226600878e-06, "loss": 1.49534010887146, "step": 1788 }, { "epoch": 2.7370030581039755, "grad_norm": 0.42255425453186035, "learning_rate": 1.1901369139111737e-06, "loss": 1.5017262697219849, "step": 1790 }, { "epoch": 2.7400611620795106, "grad_norm": 0.39475998282432556, "learning_rate": 1.1857964278467003e-06, "loss": 1.4985376596450806, "step": 1792 }, { "epoch": 2.7431192660550456, "grad_norm": 0.4835125207901001, "learning_rate": 1.1815050138376731e-06, "loss": 1.513980746269226, "step": 1794 }, { "epoch": 2.746177370030581, "grad_norm": 0.27400922775268555, "learning_rate": 1.1772627206969286e-06, "loss": 1.5117716789245605, "step": 1796 }, { "epoch": 2.7492354740061162, "grad_norm": 0.35452115535736084, "learning_rate": 1.1730695966785726e-06, "loss": 1.3024158477783203, "step": 1798 }, { "epoch": 2.7522935779816513, "grad_norm": 0.45254552364349365, "learning_rate": 1.1689256894774384e-06, "loss": 1.3760697841644287, "step": 1800 }, { "epoch": 2.7553516819571864, "grad_norm": 0.6041072010993958, "learning_rate": 1.1648310462285386e-06, "loss": 1.298436164855957, "step": 1802 }, { "epoch": 2.758409785932722, "grad_norm": 0.555728554725647, "learning_rate": 1.1607857135065337e-06, "loss": 1.3885629177093506, "step": 1804 }, { "epoch": 2.761467889908257, "grad_norm": 0.5937597751617432, "learning_rate": 1.1567897373251967e-06, "loss": 1.3754394054412842, "step": 1806 }, { "epoch": 2.764525993883792, "grad_norm": 0.35898932814598083, "learning_rate": 1.1528431631368957e-06, "loss": 1.2469127178192139, "step": 1808 }, { "epoch": 2.767584097859327, "grad_norm": 0.24282048642635345, "learning_rate": 1.1489460358320728e-06, "loss": 0.9015558958053589, "step": 1810 }, { "epoch": 2.770642201834862, "grad_norm": 0.27484798431396484, "learning_rate": 1.1450983997387365e-06, "loss": 1.2076148986816406, "step": 1812 }, { "epoch": 2.7737003058103973, "grad_norm": 0.29970651865005493, "learning_rate": 1.1413002986219528e-06, "loss": 1.2744965553283691, "step": 1814 }, { "epoch": 2.776758409785933, "grad_norm": 0.26047366857528687, "learning_rate": 1.1375517756833534e-06, "loss": 1.3271204233169556, "step": 1816 }, { "epoch": 2.779816513761468, "grad_norm": 0.3544829785823822, "learning_rate": 1.1338528735606391e-06, "loss": 1.3407413959503174, "step": 1818 }, { "epoch": 2.782874617737003, "grad_norm": 0.24868814647197723, "learning_rate": 1.1302036343270996e-06, "loss": 1.4030461311340332, "step": 1820 }, { "epoch": 2.7859327217125385, "grad_norm": 0.30862292647361755, "learning_rate": 1.12660409949113e-06, "loss": 1.3144700527191162, "step": 1822 }, { "epoch": 2.7889908256880735, "grad_norm": 0.9225071668624878, "learning_rate": 1.1230543099957608e-06, "loss": 1.338538646697998, "step": 1824 }, { "epoch": 2.7920489296636086, "grad_norm": 0.32354745268821716, "learning_rate": 1.1195543062181954e-06, "loss": 1.310173749923706, "step": 1826 }, { "epoch": 2.7951070336391437, "grad_norm": 0.24064457416534424, "learning_rate": 1.1161041279693445e-06, "loss": 1.3204376697540283, "step": 1828 }, { "epoch": 2.7981651376146788, "grad_norm": 0.23651309311389923, "learning_rate": 1.1127038144933787e-06, "loss": 1.281717300415039, "step": 1830 }, { "epoch": 2.801223241590214, "grad_norm": 0.21533581614494324, "learning_rate": 1.1093534044672796e-06, "loss": 1.3252437114715576, "step": 1832 }, { "epoch": 2.8042813455657494, "grad_norm": 0.38182252645492554, "learning_rate": 1.1060529360004003e-06, "loss": 1.27931809425354, "step": 1834 }, { "epoch": 2.8073394495412844, "grad_norm": 0.12391169369220734, "learning_rate": 1.1028024466340305e-06, "loss": 1.1552488803863525, "step": 1836 }, { "epoch": 2.8103975535168195, "grad_norm": 0.17293956875801086, "learning_rate": 1.0996019733409732e-06, "loss": 1.2036254405975342, "step": 1838 }, { "epoch": 2.8134556574923546, "grad_norm": 0.21059419214725494, "learning_rate": 1.096451552525121e-06, "loss": 0.9850409030914307, "step": 1840 }, { "epoch": 2.81651376146789, "grad_norm": 0.2714180648326874, "learning_rate": 1.093351220021043e-06, "loss": 1.2215778827667236, "step": 1842 }, { "epoch": 2.819571865443425, "grad_norm": 0.22156941890716553, "learning_rate": 1.090301011093575e-06, "loss": 1.2629544734954834, "step": 1844 }, { "epoch": 2.8226299694189603, "grad_norm": 0.20625340938568115, "learning_rate": 1.0873009604374246e-06, "loss": 1.2778034210205078, "step": 1846 }, { "epoch": 2.8256880733944953, "grad_norm": 0.29442811012268066, "learning_rate": 1.084351102176769e-06, "loss": 1.2413357496261597, "step": 1848 }, { "epoch": 2.8287461773700304, "grad_norm": 0.18544712662696838, "learning_rate": 1.081451469864872e-06, "loss": 1.2637240886688232, "step": 1850 }, { "epoch": 2.8318042813455655, "grad_norm": 0.22874392569065094, "learning_rate": 1.0786020964836991e-06, "loss": 1.2410205602645874, "step": 1852 }, { "epoch": 2.834862385321101, "grad_norm": 0.2457342892885208, "learning_rate": 1.075803014443546e-06, "loss": 1.2094589471817017, "step": 1854 }, { "epoch": 2.837920489296636, "grad_norm": 0.22759026288986206, "learning_rate": 1.0730542555826654e-06, "loss": 1.274350643157959, "step": 1856 }, { "epoch": 2.840978593272171, "grad_norm": 0.206235870718956, "learning_rate": 1.07035585116691e-06, "loss": 1.245356559753418, "step": 1858 }, { "epoch": 2.8440366972477067, "grad_norm": 0.49194467067718506, "learning_rate": 1.0677078318893716e-06, "loss": 1.2151732444763184, "step": 1860 }, { "epoch": 2.8470948012232418, "grad_norm": 0.33920061588287354, "learning_rate": 1.0651102278700364e-06, "loss": 1.2073887586593628, "step": 1862 }, { "epoch": 2.850152905198777, "grad_norm": 0.25718092918395996, "learning_rate": 1.062563068655439e-06, "loss": 1.2325494289398193, "step": 1864 }, { "epoch": 2.853211009174312, "grad_norm": 0.24365228414535522, "learning_rate": 1.0600663832183293e-06, "loss": 1.2226455211639404, "step": 1866 }, { "epoch": 2.856269113149847, "grad_norm": 0.19332216680049896, "learning_rate": 1.0576201999573405e-06, "loss": 1.1831451654434204, "step": 1868 }, { "epoch": 2.859327217125382, "grad_norm": 0.25319862365722656, "learning_rate": 1.0552245466966678e-06, "loss": 1.2440452575683594, "step": 1870 }, { "epoch": 2.8623853211009176, "grad_norm": 0.27022072672843933, "learning_rate": 1.0528794506857508e-06, "loss": 1.2725245952606201, "step": 1872 }, { "epoch": 2.8654434250764527, "grad_norm": 0.3112826943397522, "learning_rate": 1.050584938598963e-06, "loss": 1.282654047012329, "step": 1874 }, { "epoch": 2.8685015290519877, "grad_norm": 0.2421792596578598, "learning_rate": 1.048341036535311e-06, "loss": 1.273242712020874, "step": 1876 }, { "epoch": 2.871559633027523, "grad_norm": 0.23541022837162018, "learning_rate": 1.0461477700181355e-06, "loss": 1.2899906635284424, "step": 1878 }, { "epoch": 2.8746177370030583, "grad_norm": 0.2772025167942047, "learning_rate": 1.044005163994821e-06, "loss": 1.2756202220916748, "step": 1880 }, { "epoch": 2.8776758409785934, "grad_norm": 0.47361937165260315, "learning_rate": 1.0419132428365116e-06, "loss": 1.2930552959442139, "step": 1882 }, { "epoch": 2.8807339449541285, "grad_norm": 0.18241485953330994, "learning_rate": 1.0398720303378374e-06, "loss": 1.223031997680664, "step": 1884 }, { "epoch": 2.8837920489296636, "grad_norm": 0.40437427163124084, "learning_rate": 1.0378815497166385e-06, "loss": 1.2670063972473145, "step": 1886 }, { "epoch": 2.8868501529051986, "grad_norm": 0.22389701008796692, "learning_rate": 1.0359418236137047e-06, "loss": 1.2270456552505493, "step": 1888 }, { "epoch": 2.8899082568807337, "grad_norm": 0.29309970140457153, "learning_rate": 1.0340528740925169e-06, "loss": 1.2563271522521973, "step": 1890 }, { "epoch": 2.8929663608562692, "grad_norm": 0.24637004733085632, "learning_rate": 1.0322147226389952e-06, "loss": 1.2668583393096924, "step": 1892 }, { "epoch": 2.8960244648318043, "grad_norm": 0.5765001177787781, "learning_rate": 1.0304273901612566e-06, "loss": 1.2873437404632568, "step": 1894 }, { "epoch": 2.8990825688073394, "grad_norm": 0.3287610411643982, "learning_rate": 1.028690896989375e-06, "loss": 1.274024248123169, "step": 1896 }, { "epoch": 2.9021406727828745, "grad_norm": 0.2688363492488861, "learning_rate": 1.027005262875151e-06, "loss": 1.20585036277771, "step": 1898 }, { "epoch": 2.90519877675841, "grad_norm": 0.3984238803386688, "learning_rate": 1.0253705069918865e-06, "loss": 1.2360919713974, "step": 1900 }, { "epoch": 2.908256880733945, "grad_norm": 0.27637046575546265, "learning_rate": 1.0237866479341687e-06, "loss": 1.2752952575683594, "step": 1902 }, { "epoch": 2.91131498470948, "grad_norm": 0.5071486234664917, "learning_rate": 1.0222537037176572e-06, "loss": 1.2954089641571045, "step": 1904 }, { "epoch": 2.914373088685015, "grad_norm": 0.22012606263160706, "learning_rate": 1.0207716917788768e-06, "loss": 1.2765629291534424, "step": 1906 }, { "epoch": 2.9174311926605503, "grad_norm": 0.20149464905261993, "learning_rate": 1.019340628975023e-06, "loss": 1.2535219192504883, "step": 1908 }, { "epoch": 2.9204892966360854, "grad_norm": 0.227265864610672, "learning_rate": 1.0179605315837695e-06, "loss": 1.2175259590148926, "step": 1910 }, { "epoch": 2.923547400611621, "grad_norm": 0.2566111087799072, "learning_rate": 1.0166314153030799e-06, "loss": 1.255599856376648, "step": 1912 }, { "epoch": 2.926605504587156, "grad_norm": 0.38341450691223145, "learning_rate": 1.0153532952510328e-06, "loss": 1.2794301509857178, "step": 1914 }, { "epoch": 2.929663608562691, "grad_norm": 0.28000977635383606, "learning_rate": 1.0141261859656484e-06, "loss": 1.2272768020629883, "step": 1916 }, { "epoch": 2.9327217125382266, "grad_norm": 0.2550158202648163, "learning_rate": 1.0129501014047236e-06, "loss": 1.2561171054840088, "step": 1918 }, { "epoch": 2.9357798165137616, "grad_norm": 0.21566316485404968, "learning_rate": 1.0118250549456717e-06, "loss": 1.2545552253723145, "step": 1920 }, { "epoch": 2.9388379204892967, "grad_norm": 0.36798691749572754, "learning_rate": 1.0107510593853716e-06, "loss": 1.3016841411590576, "step": 1922 }, { "epoch": 2.941896024464832, "grad_norm": 0.29115161299705505, "learning_rate": 1.0097281269400234e-06, "loss": 1.3122904300689697, "step": 1924 }, { "epoch": 2.944954128440367, "grad_norm": 0.42286819219589233, "learning_rate": 1.0087562692450062e-06, "loss": 1.2751294374465942, "step": 1926 }, { "epoch": 2.948012232415902, "grad_norm": 0.29917454719543457, "learning_rate": 1.0078354973547484e-06, "loss": 1.2971951961517334, "step": 1928 }, { "epoch": 2.9510703363914375, "grad_norm": 0.28312069177627563, "learning_rate": 1.0069658217426017e-06, "loss": 1.2662827968597412, "step": 1930 }, { "epoch": 2.9541284403669725, "grad_norm": 0.2748239040374756, "learning_rate": 1.0061472523007213e-06, "loss": 1.209917664527893, "step": 1932 }, { "epoch": 2.9571865443425076, "grad_norm": 0.36147835850715637, "learning_rate": 1.0053797983399524e-06, "loss": 1.2387361526489258, "step": 1934 }, { "epoch": 2.9602446483180427, "grad_norm": 0.34865546226501465, "learning_rate": 1.004663468589726e-06, "loss": 1.2596259117126465, "step": 1936 }, { "epoch": 2.963302752293578, "grad_norm": 0.23798368871212006, "learning_rate": 1.0039982711979603e-06, "loss": 1.239612340927124, "step": 1938 }, { "epoch": 2.9663608562691133, "grad_norm": 0.31115320324897766, "learning_rate": 1.0033842137309649e-06, "loss": 1.2498747110366821, "step": 1940 }, { "epoch": 2.9694189602446484, "grad_norm": 0.37815067172050476, "learning_rate": 1.0028213031733578e-06, "loss": 1.3014090061187744, "step": 1942 }, { "epoch": 2.9724770642201834, "grad_norm": 0.26476937532424927, "learning_rate": 1.0023095459279838e-06, "loss": 1.2854735851287842, "step": 1944 }, { "epoch": 2.9755351681957185, "grad_norm": 0.3802984952926636, "learning_rate": 1.0018489478158434e-06, "loss": 1.3032188415527344, "step": 1946 }, { "epoch": 2.9785932721712536, "grad_norm": 0.3544924855232239, "learning_rate": 1.0014395140760255e-06, "loss": 1.2610487937927246, "step": 1948 }, { "epoch": 2.981651376146789, "grad_norm": 0.30221831798553467, "learning_rate": 1.0010812493656488e-06, "loss": 1.2582671642303467, "step": 1950 }, { "epoch": 2.984709480122324, "grad_norm": 0.2731051743030548, "learning_rate": 1.000774157759806e-06, "loss": 1.2794151306152344, "step": 1952 }, { "epoch": 2.9877675840978593, "grad_norm": 0.3089560270309448, "learning_rate": 1.0005182427515222e-06, "loss": 1.334507703781128, "step": 1954 }, { "epoch": 2.9908256880733948, "grad_norm": 0.31155917048454285, "learning_rate": 1.0003135072517108e-06, "loss": 1.3732435703277588, "step": 1956 }, { "epoch": 2.99388379204893, "grad_norm": 0.3963629901409149, "learning_rate": 1.000159953589143e-06, "loss": 1.6014021635055542, "step": 1958 }, { "epoch": 2.996941896024465, "grad_norm": 0.8739917278289795, "learning_rate": 1.00005758351042e-06, "loss": 1.5767264366149902, "step": 1960 }, { "epoch": 3.0, "grad_norm": 1.2575660943984985, "learning_rate": 1.0000063981799541e-06, "loss": 1.7074545621871948, "step": 1962 }, { "epoch": 3.0, "step": 1962, "total_flos": 2.4882019125669396e+18, "train_loss": 1.4736498374943825, "train_runtime": 8380.6004, "train_samples_per_second": 3.746, "train_steps_per_second": 0.234 } ], "logging_steps": 2, "max_steps": 1962, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 9999999, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4882019125669396e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }