9b-26 / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
fe1fab0 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1962,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0030581039755351682,
"grad_norm": 0.6435028314590454,
"learning_rate": 1.0101010101010103e-07,
"loss": 1.8936554193496704,
"step": 2
},
{
"epoch": 0.0061162079510703364,
"grad_norm": 0.5548882484436035,
"learning_rate": 3.0303030303030305e-07,
"loss": 1.8550586700439453,
"step": 4
},
{
"epoch": 0.009174311926605505,
"grad_norm": 0.27108362317085266,
"learning_rate": 5.05050505050505e-07,
"loss": 1.890197992324829,
"step": 6
},
{
"epoch": 0.012232415902140673,
"grad_norm": 0.24754057824611664,
"learning_rate": 7.070707070707071e-07,
"loss": 1.8445472717285156,
"step": 8
},
{
"epoch": 0.01529051987767584,
"grad_norm": 0.39890649914741516,
"learning_rate": 9.090909090909091e-07,
"loss": 2.010572910308838,
"step": 10
},
{
"epoch": 0.01834862385321101,
"grad_norm": 0.23249551653862,
"learning_rate": 1.111111111111111e-06,
"loss": 1.8801705837249756,
"step": 12
},
{
"epoch": 0.021406727828746176,
"grad_norm": 0.4299562871456146,
"learning_rate": 1.3131313131313134e-06,
"loss": 1.8805203437805176,
"step": 14
},
{
"epoch": 0.024464831804281346,
"grad_norm": 0.5231528282165527,
"learning_rate": 1.5151515151515152e-06,
"loss": 1.9465537071228027,
"step": 16
},
{
"epoch": 0.027522935779816515,
"grad_norm": 0.3482355773448944,
"learning_rate": 1.7171717171717173e-06,
"loss": 1.8298053741455078,
"step": 18
},
{
"epoch": 0.03058103975535168,
"grad_norm": 0.3003389239311218,
"learning_rate": 1.9191919191919192e-06,
"loss": 1.853845238685608,
"step": 20
},
{
"epoch": 0.03363914373088685,
"grad_norm": 0.5087025165557861,
"learning_rate": 2.1212121212121216e-06,
"loss": 1.9923889636993408,
"step": 22
},
{
"epoch": 0.03669724770642202,
"grad_norm": 2.0046560764312744,
"learning_rate": 2.3232323232323234e-06,
"loss": 2.008021354675293,
"step": 24
},
{
"epoch": 0.039755351681957186,
"grad_norm": 0.2651369571685791,
"learning_rate": 2.5252525252525258e-06,
"loss": 1.7058303356170654,
"step": 26
},
{
"epoch": 0.04281345565749235,
"grad_norm": 0.5547925233840942,
"learning_rate": 2.7272727272727272e-06,
"loss": 1.8821287155151367,
"step": 28
},
{
"epoch": 0.045871559633027525,
"grad_norm": 0.5607280731201172,
"learning_rate": 2.9292929292929295e-06,
"loss": 2.1788079738616943,
"step": 30
},
{
"epoch": 0.04892966360856269,
"grad_norm": 0.36416563391685486,
"learning_rate": 3.131313131313132e-06,
"loss": 1.8534326553344727,
"step": 32
},
{
"epoch": 0.05198776758409786,
"grad_norm": 0.4965146481990814,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.9557833671569824,
"step": 34
},
{
"epoch": 0.05504587155963303,
"grad_norm": 0.3163432776927948,
"learning_rate": 3.5353535353535356e-06,
"loss": 1.7984235286712646,
"step": 36
},
{
"epoch": 0.0581039755351682,
"grad_norm": 0.3063645362854004,
"learning_rate": 3.737373737373738e-06,
"loss": 1.8264985084533691,
"step": 38
},
{
"epoch": 0.06116207951070336,
"grad_norm": 0.30639225244522095,
"learning_rate": 3.93939393939394e-06,
"loss": 1.8241571187973022,
"step": 40
},
{
"epoch": 0.06422018348623854,
"grad_norm": 0.3971042335033417,
"learning_rate": 4.141414141414142e-06,
"loss": 1.874243974685669,
"step": 42
},
{
"epoch": 0.0672782874617737,
"grad_norm": 0.6156560182571411,
"learning_rate": 4.343434343434344e-06,
"loss": 1.965466022491455,
"step": 44
},
{
"epoch": 0.07033639143730887,
"grad_norm": 0.5533192753791809,
"learning_rate": 4.5454545454545455e-06,
"loss": 2.0693740844726562,
"step": 46
},
{
"epoch": 0.07339449541284404,
"grad_norm": 1.9126055240631104,
"learning_rate": 4.747474747474748e-06,
"loss": 2.060253143310547,
"step": 48
},
{
"epoch": 0.0764525993883792,
"grad_norm": 0.3860923647880554,
"learning_rate": 4.94949494949495e-06,
"loss": 1.8577625751495361,
"step": 50
},
{
"epoch": 0.07951070336391437,
"grad_norm": 0.4684409499168396,
"learning_rate": 5.151515151515152e-06,
"loss": 1.8510971069335938,
"step": 52
},
{
"epoch": 0.08256880733944955,
"grad_norm": 0.4307204484939575,
"learning_rate": 5.353535353535354e-06,
"loss": 1.9931628704071045,
"step": 54
},
{
"epoch": 0.0856269113149847,
"grad_norm": 0.3140373229980469,
"learning_rate": 5.555555555555557e-06,
"loss": 1.925836443901062,
"step": 56
},
{
"epoch": 0.08868501529051988,
"grad_norm": 0.36317509412765503,
"learning_rate": 5.7575757575757586e-06,
"loss": 1.9616905450820923,
"step": 58
},
{
"epoch": 0.09174311926605505,
"grad_norm": 0.21478985249996185,
"learning_rate": 5.95959595959596e-06,
"loss": 1.895378589630127,
"step": 60
},
{
"epoch": 0.09480122324159021,
"grad_norm": 0.2936638593673706,
"learning_rate": 6.1616161616161615e-06,
"loss": 1.8279492855072021,
"step": 62
},
{
"epoch": 0.09785932721712538,
"grad_norm": 0.3114721179008484,
"learning_rate": 6.363636363636364e-06,
"loss": 1.715104103088379,
"step": 64
},
{
"epoch": 0.10091743119266056,
"grad_norm": 0.32813334465026855,
"learning_rate": 6.565656565656566e-06,
"loss": 1.852712631225586,
"step": 66
},
{
"epoch": 0.10397553516819572,
"grad_norm": 0.37994885444641113,
"learning_rate": 6.767676767676769e-06,
"loss": 1.9753448963165283,
"step": 68
},
{
"epoch": 0.10703363914373089,
"grad_norm": 0.5206537246704102,
"learning_rate": 6.969696969696971e-06,
"loss": 1.8388103246688843,
"step": 70
},
{
"epoch": 0.11009174311926606,
"grad_norm": 0.6430595517158508,
"learning_rate": 7.171717171717172e-06,
"loss": 2.0399489402770996,
"step": 72
},
{
"epoch": 0.11314984709480122,
"grad_norm": 0.5809399485588074,
"learning_rate": 7.373737373737374e-06,
"loss": 2.1389784812927246,
"step": 74
},
{
"epoch": 0.1162079510703364,
"grad_norm": 1.2094364166259766,
"learning_rate": 7.5757575757575764e-06,
"loss": 1.9202568531036377,
"step": 76
},
{
"epoch": 0.11926605504587157,
"grad_norm": 0.7485645413398743,
"learning_rate": 7.77777777777778e-06,
"loss": 2.2573585510253906,
"step": 78
},
{
"epoch": 0.12232415902140673,
"grad_norm": 0.47476136684417725,
"learning_rate": 7.97979797979798e-06,
"loss": 1.8947498798370361,
"step": 80
},
{
"epoch": 0.12538226299694188,
"grad_norm": 0.24537041783332825,
"learning_rate": 8.181818181818183e-06,
"loss": 1.636450171470642,
"step": 82
},
{
"epoch": 0.12844036697247707,
"grad_norm": 0.4732670783996582,
"learning_rate": 8.383838383838384e-06,
"loss": 1.818341612815857,
"step": 84
},
{
"epoch": 0.13149847094801223,
"grad_norm": 0.37070026993751526,
"learning_rate": 8.585858585858587e-06,
"loss": 1.845613718032837,
"step": 86
},
{
"epoch": 0.1345565749235474,
"grad_norm": 0.3881911635398865,
"learning_rate": 8.787878787878788e-06,
"loss": 1.7559518814086914,
"step": 88
},
{
"epoch": 0.13761467889908258,
"grad_norm": 0.45207998156547546,
"learning_rate": 8.98989898989899e-06,
"loss": 1.7992792129516602,
"step": 90
},
{
"epoch": 0.14067278287461774,
"grad_norm": 0.1907433420419693,
"learning_rate": 9.191919191919193e-06,
"loss": 1.8380980491638184,
"step": 92
},
{
"epoch": 0.1437308868501529,
"grad_norm": 0.2265041321516037,
"learning_rate": 9.393939393939396e-06,
"loss": 1.9353697299957275,
"step": 94
},
{
"epoch": 0.14678899082568808,
"grad_norm": 0.5571039319038391,
"learning_rate": 9.595959595959597e-06,
"loss": 1.861445665359497,
"step": 96
},
{
"epoch": 0.14984709480122324,
"grad_norm": 0.318570613861084,
"learning_rate": 9.797979797979798e-06,
"loss": 1.7963485717773438,
"step": 98
},
{
"epoch": 0.1529051987767584,
"grad_norm": 0.35685858130455017,
"learning_rate": 1e-05,
"loss": 1.955026626586914,
"step": 100
},
{
"epoch": 0.1559633027522936,
"grad_norm": 0.7966809272766113,
"learning_rate": 9.99997440729838e-06,
"loss": 1.8856327533721924,
"step": 102
},
{
"epoch": 0.15902140672782875,
"grad_norm": 0.2650541663169861,
"learning_rate": 9.999897629484621e-06,
"loss": 1.814586877822876,
"step": 104
},
{
"epoch": 0.1620795107033639,
"grad_norm": 0.36088353395462036,
"learning_rate": 9.999769667432037e-06,
"loss": 1.8607715368270874,
"step": 106
},
{
"epoch": 0.1651376146788991,
"grad_norm": 0.6270299553871155,
"learning_rate": 9.999590522596136e-06,
"loss": 1.9078267812728882,
"step": 108
},
{
"epoch": 0.16819571865443425,
"grad_norm": 0.27504709362983704,
"learning_rate": 9.999360197014607e-06,
"loss": 1.9029535055160522,
"step": 110
},
{
"epoch": 0.1712538226299694,
"grad_norm": 0.5007109642028809,
"learning_rate": 9.999078693307296e-06,
"loss": 1.7704020738601685,
"step": 112
},
{
"epoch": 0.1743119266055046,
"grad_norm": 0.5426493883132935,
"learning_rate": 9.99874601467618e-06,
"loss": 1.8907287120819092,
"step": 114
},
{
"epoch": 0.17737003058103976,
"grad_norm": 0.26077231764793396,
"learning_rate": 9.998362164905318e-06,
"loss": 1.760542869567871,
"step": 116
},
{
"epoch": 0.18042813455657492,
"grad_norm": 0.37686067819595337,
"learning_rate": 9.997927148360824e-06,
"loss": 1.995668649673462,
"step": 118
},
{
"epoch": 0.1834862385321101,
"grad_norm": 0.4259154498577118,
"learning_rate": 9.99744096999081e-06,
"loss": 1.8606561422348022,
"step": 120
},
{
"epoch": 0.18654434250764526,
"grad_norm": 0.3365345299243927,
"learning_rate": 9.996903635325326e-06,
"loss": 1.909229040145874,
"step": 122
},
{
"epoch": 0.18960244648318042,
"grad_norm": 0.25919589400291443,
"learning_rate": 9.996315150476308e-06,
"loss": 1.9200305938720703,
"step": 124
},
{
"epoch": 0.1926605504587156,
"grad_norm": 0.2932458221912384,
"learning_rate": 9.995675522137492e-06,
"loss": 1.8696832656860352,
"step": 126
},
{
"epoch": 0.19571865443425077,
"grad_norm": 0.38474535942077637,
"learning_rate": 9.994984757584353e-06,
"loss": 1.828667402267456,
"step": 128
},
{
"epoch": 0.19877675840978593,
"grad_norm": 0.3214952349662781,
"learning_rate": 9.994242864674021e-06,
"loss": 1.8718284368515015,
"step": 130
},
{
"epoch": 0.2018348623853211,
"grad_norm": 0.33034268021583557,
"learning_rate": 9.993449851845176e-06,
"loss": 1.8226697444915771,
"step": 132
},
{
"epoch": 0.20489296636085627,
"grad_norm": 0.8973183631896973,
"learning_rate": 9.992605728117972e-06,
"loss": 1.9453703165054321,
"step": 134
},
{
"epoch": 0.20795107033639143,
"grad_norm": 0.6750196218490601,
"learning_rate": 9.991710503093923e-06,
"loss": 1.820605993270874,
"step": 136
},
{
"epoch": 0.21100917431192662,
"grad_norm": 0.2680327594280243,
"learning_rate": 9.990764186955797e-06,
"loss": 1.711888074874878,
"step": 138
},
{
"epoch": 0.21406727828746178,
"grad_norm": 0.3089163899421692,
"learning_rate": 9.989766790467498e-06,
"loss": 1.668878197669983,
"step": 140
},
{
"epoch": 0.21712538226299694,
"grad_norm": 0.5638787746429443,
"learning_rate": 9.988718324973947e-06,
"loss": 1.7612136602401733,
"step": 142
},
{
"epoch": 0.22018348623853212,
"grad_norm": 0.24349473416805267,
"learning_rate": 9.98761880240095e-06,
"loss": 1.6873559951782227,
"step": 144
},
{
"epoch": 0.22324159021406728,
"grad_norm": 0.3549518585205078,
"learning_rate": 9.986468235255065e-06,
"loss": 1.743373990058899,
"step": 146
},
{
"epoch": 0.22629969418960244,
"grad_norm": 0.44438421726226807,
"learning_rate": 9.985266636623457e-06,
"loss": 1.6509066820144653,
"step": 148
},
{
"epoch": 0.22935779816513763,
"grad_norm": 0.46152663230895996,
"learning_rate": 9.984014020173748e-06,
"loss": 1.8014967441558838,
"step": 150
},
{
"epoch": 0.2324159021406728,
"grad_norm": 0.278169184923172,
"learning_rate": 9.98271040015387e-06,
"loss": 1.8622685670852661,
"step": 152
},
{
"epoch": 0.23547400611620795,
"grad_norm": 0.3168479800224304,
"learning_rate": 9.981355791391891e-06,
"loss": 1.8940097093582153,
"step": 154
},
{
"epoch": 0.23853211009174313,
"grad_norm": 0.3639688491821289,
"learning_rate": 9.979950209295855e-06,
"loss": 1.7917258739471436,
"step": 156
},
{
"epoch": 0.2415902140672783,
"grad_norm": 0.40860888361930847,
"learning_rate": 9.978493669853606e-06,
"loss": 1.8766049146652222,
"step": 158
},
{
"epoch": 0.24464831804281345,
"grad_norm": 0.315494179725647,
"learning_rate": 9.976986189632597e-06,
"loss": 1.7932193279266357,
"step": 160
},
{
"epoch": 0.24770642201834864,
"grad_norm": 0.3525390923023224,
"learning_rate": 9.975427785779717e-06,
"loss": 1.9470767974853516,
"step": 162
},
{
"epoch": 0.25076452599388377,
"grad_norm": 0.33575552701950073,
"learning_rate": 9.97381847602108e-06,
"loss": 1.7163609266281128,
"step": 164
},
{
"epoch": 0.25382262996941896,
"grad_norm": 1.193529725074768,
"learning_rate": 9.972158278661838e-06,
"loss": 1.877960205078125,
"step": 166
},
{
"epoch": 0.25688073394495414,
"grad_norm": 0.348765105009079,
"learning_rate": 9.970447212585961e-06,
"loss": 1.6149842739105225,
"step": 168
},
{
"epoch": 0.2599388379204893,
"grad_norm": 0.5527969598770142,
"learning_rate": 9.968685297256027e-06,
"loss": 1.8597733974456787,
"step": 170
},
{
"epoch": 0.26299694189602446,
"grad_norm": 0.656193196773529,
"learning_rate": 9.966872552713006e-06,
"loss": 1.5253994464874268,
"step": 172
},
{
"epoch": 0.26605504587155965,
"grad_norm": 0.7701634764671326,
"learning_rate": 9.965008999576018e-06,
"loss": 1.5178442001342773,
"step": 174
},
{
"epoch": 0.2691131498470948,
"grad_norm": 0.3889455795288086,
"learning_rate": 9.963094659042113e-06,
"loss": 1.7432003021240234,
"step": 176
},
{
"epoch": 0.27217125382262997,
"grad_norm": 0.7660208344459534,
"learning_rate": 9.961129552886024e-06,
"loss": 1.655880331993103,
"step": 178
},
{
"epoch": 0.27522935779816515,
"grad_norm": 0.7760636210441589,
"learning_rate": 9.959113703459917e-06,
"loss": 1.9860963821411133,
"step": 180
},
{
"epoch": 0.2782874617737003,
"grad_norm": 1.5110101699829102,
"learning_rate": 9.957047133693141e-06,
"loss": 1.9139325618743896,
"step": 182
},
{
"epoch": 0.28134556574923547,
"grad_norm": 1.1153804063796997,
"learning_rate": 9.954929867091961e-06,
"loss": 1.7500460147857666,
"step": 184
},
{
"epoch": 0.28440366972477066,
"grad_norm": 0.3268054723739624,
"learning_rate": 9.952761927739303e-06,
"loss": 1.5284479856491089,
"step": 186
},
{
"epoch": 0.2874617737003058,
"grad_norm": 0.2701658308506012,
"learning_rate": 9.95054334029446e-06,
"loss": 1.5575287342071533,
"step": 188
},
{
"epoch": 0.290519877675841,
"grad_norm": 0.5897979140281677,
"learning_rate": 9.948274129992838e-06,
"loss": 1.5360642671585083,
"step": 190
},
{
"epoch": 0.29357798165137616,
"grad_norm": 3.0125443935394287,
"learning_rate": 9.945954322645643e-06,
"loss": 1.7250124216079712,
"step": 192
},
{
"epoch": 0.2966360856269113,
"grad_norm": 0.22849687933921814,
"learning_rate": 9.9435839446396e-06,
"loss": 1.7317864894866943,
"step": 194
},
{
"epoch": 0.2996941896024465,
"grad_norm": 0.41497474908828735,
"learning_rate": 9.941163022936659e-06,
"loss": 1.7118513584136963,
"step": 196
},
{
"epoch": 0.30275229357798167,
"grad_norm": 0.43153518438339233,
"learning_rate": 9.938691585073677e-06,
"loss": 1.4813673496246338,
"step": 198
},
{
"epoch": 0.3058103975535168,
"grad_norm": 0.2877158522605896,
"learning_rate": 9.936169659162105e-06,
"loss": 1.5152385234832764,
"step": 200
},
{
"epoch": 0.308868501529052,
"grad_norm": 0.319741427898407,
"learning_rate": 9.933597273887676e-06,
"loss": 1.657623291015625,
"step": 202
},
{
"epoch": 0.3119266055045872,
"grad_norm": 0.4885481894016266,
"learning_rate": 9.930974458510074e-06,
"loss": 1.8340609073638916,
"step": 204
},
{
"epoch": 0.3149847094801223,
"grad_norm": 0.3470771312713623,
"learning_rate": 9.9283012428626e-06,
"loss": 1.8779006004333496,
"step": 206
},
{
"epoch": 0.3180428134556575,
"grad_norm": 0.21095849573612213,
"learning_rate": 9.92557765735184e-06,
"loss": 1.946405053138733,
"step": 208
},
{
"epoch": 0.3211009174311927,
"grad_norm": 0.4015672504901886,
"learning_rate": 9.922803732957309e-06,
"loss": 1.5457347631454468,
"step": 210
},
{
"epoch": 0.3241590214067278,
"grad_norm": 0.2712498903274536,
"learning_rate": 9.919979501231102e-06,
"loss": 1.6519064903259277,
"step": 212
},
{
"epoch": 0.327217125382263,
"grad_norm": 0.24934278428554535,
"learning_rate": 9.917104994297543e-06,
"loss": 1.4617292881011963,
"step": 214
},
{
"epoch": 0.3302752293577982,
"grad_norm": 0.22483140230178833,
"learning_rate": 9.914180244852804e-06,
"loss": 1.3875129222869873,
"step": 216
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.6217460632324219,
"learning_rate": 9.911205286164553e-06,
"loss": 1.8669204711914062,
"step": 218
},
{
"epoch": 0.3363914373088685,
"grad_norm": 0.4357741177082062,
"learning_rate": 9.908180152071553e-06,
"loss": 1.666574239730835,
"step": 220
},
{
"epoch": 0.3394495412844037,
"grad_norm": 0.29025763273239136,
"learning_rate": 9.9051048769833e-06,
"loss": 1.810868263244629,
"step": 222
},
{
"epoch": 0.3425076452599388,
"grad_norm": 0.7838276624679565,
"learning_rate": 9.901979495879612e-06,
"loss": 1.3125014305114746,
"step": 224
},
{
"epoch": 0.345565749235474,
"grad_norm": 0.2543538212776184,
"learning_rate": 9.898804044310245e-06,
"loss": 1.6106175184249878,
"step": 226
},
{
"epoch": 0.3486238532110092,
"grad_norm": 0.4557286500930786,
"learning_rate": 9.89557855839448e-06,
"loss": 1.886078953742981,
"step": 228
},
{
"epoch": 0.3516819571865443,
"grad_norm": 0.2689090073108673,
"learning_rate": 9.892303074820712e-06,
"loss": 1.631593108177185,
"step": 230
},
{
"epoch": 0.3547400611620795,
"grad_norm": 0.25291207432746887,
"learning_rate": 9.888977630846048e-06,
"loss": 1.7156798839569092,
"step": 232
},
{
"epoch": 0.3577981651376147,
"grad_norm": 0.3357708752155304,
"learning_rate": 9.88560226429586e-06,
"loss": 1.6416988372802734,
"step": 234
},
{
"epoch": 0.36085626911314983,
"grad_norm": 0.3246925473213196,
"learning_rate": 9.88217701356337e-06,
"loss": 1.5658977031707764,
"step": 236
},
{
"epoch": 0.363914373088685,
"grad_norm": 0.2840614318847656,
"learning_rate": 9.878701917609208e-06,
"loss": 1.6534138917922974,
"step": 238
},
{
"epoch": 0.3669724770642202,
"grad_norm": 0.5397573709487915,
"learning_rate": 9.875177015960973e-06,
"loss": 1.7614964246749878,
"step": 240
},
{
"epoch": 0.37003058103975534,
"grad_norm": 0.28763291239738464,
"learning_rate": 9.871602348712777e-06,
"loss": 1.5937902927398682,
"step": 242
},
{
"epoch": 0.3730886850152905,
"grad_norm": 0.21111302077770233,
"learning_rate": 9.867977956524798e-06,
"loss": 1.6914631128311157,
"step": 244
},
{
"epoch": 0.3761467889908257,
"grad_norm": 0.5114771723747253,
"learning_rate": 9.864303880622806e-06,
"loss": 1.8919175863265991,
"step": 246
},
{
"epoch": 0.37920489296636084,
"grad_norm": 0.4698966145515442,
"learning_rate": 9.8605801627977e-06,
"loss": 2.395404815673828,
"step": 248
},
{
"epoch": 0.382262996941896,
"grad_norm": 0.604468047618866,
"learning_rate": 9.85680684540504e-06,
"loss": 1.523594617843628,
"step": 250
},
{
"epoch": 0.3853211009174312,
"grad_norm": 0.295039564371109,
"learning_rate": 9.852983971364549e-06,
"loss": 1.520268440246582,
"step": 252
},
{
"epoch": 0.38837920489296635,
"grad_norm": 0.2590586245059967,
"learning_rate": 9.84911158415964e-06,
"loss": 1.5712318420410156,
"step": 254
},
{
"epoch": 0.39143730886850153,
"grad_norm": 0.9178432822227478,
"learning_rate": 9.845189727836914e-06,
"loss": 1.7512378692626953,
"step": 256
},
{
"epoch": 0.3944954128440367,
"grad_norm": 0.512359619140625,
"learning_rate": 9.841218447005657e-06,
"loss": 1.677209496498108,
"step": 258
},
{
"epoch": 0.39755351681957185,
"grad_norm": 0.8242136240005493,
"learning_rate": 9.837197786837341e-06,
"loss": 1.52079439163208,
"step": 260
},
{
"epoch": 0.40061162079510704,
"grad_norm": 0.5057528614997864,
"learning_rate": 9.833127793065098e-06,
"loss": 1.3776154518127441,
"step": 262
},
{
"epoch": 0.4036697247706422,
"grad_norm": 0.287590891122818,
"learning_rate": 9.829008511983214e-06,
"loss": 1.313464879989624,
"step": 264
},
{
"epoch": 0.40672782874617736,
"grad_norm": 0.22291725873947144,
"learning_rate": 9.82483999044659e-06,
"loss": 1.4770923852920532,
"step": 266
},
{
"epoch": 0.40978593272171254,
"grad_norm": 0.4278978109359741,
"learning_rate": 9.820622275870219e-06,
"loss": 1.713256597518921,
"step": 268
},
{
"epoch": 0.41284403669724773,
"grad_norm": 0.7735996246337891,
"learning_rate": 9.816355416228636e-06,
"loss": 1.7301435470581055,
"step": 270
},
{
"epoch": 0.41590214067278286,
"grad_norm": 0.36943763494491577,
"learning_rate": 9.812039460055383e-06,
"loss": 1.746875286102295,
"step": 272
},
{
"epoch": 0.41896024464831805,
"grad_norm": 0.30427658557891846,
"learning_rate": 9.807674456442448e-06,
"loss": 1.7644126415252686,
"step": 274
},
{
"epoch": 0.42201834862385323,
"grad_norm": 0.2680354416370392,
"learning_rate": 9.80326045503972e-06,
"loss": 1.6075056791305542,
"step": 276
},
{
"epoch": 0.42507645259938837,
"grad_norm": 0.5165081024169922,
"learning_rate": 9.798797506054398e-06,
"loss": 1.7466685771942139,
"step": 278
},
{
"epoch": 0.42813455657492355,
"grad_norm": 0.46960580348968506,
"learning_rate": 9.794285660250457e-06,
"loss": 1.6852364540100098,
"step": 280
},
{
"epoch": 0.43119266055045874,
"grad_norm": 0.3378291130065918,
"learning_rate": 9.789724968948034e-06,
"loss": 1.5493333339691162,
"step": 282
},
{
"epoch": 0.43425076452599387,
"grad_norm": 0.2972247004508972,
"learning_rate": 9.78511548402287e-06,
"loss": 1.5161151885986328,
"step": 284
},
{
"epoch": 0.43730886850152906,
"grad_norm": 0.3610173165798187,
"learning_rate": 9.780457257905708e-06,
"loss": 1.698796272277832,
"step": 286
},
{
"epoch": 0.44036697247706424,
"grad_norm": 0.4165475070476532,
"learning_rate": 9.775750343581702e-06,
"loss": 1.4344041347503662,
"step": 288
},
{
"epoch": 0.4434250764525994,
"grad_norm": 0.565291702747345,
"learning_rate": 9.770994794589804e-06,
"loss": 1.6736053228378296,
"step": 290
},
{
"epoch": 0.44648318042813456,
"grad_norm": 0.22272102534770966,
"learning_rate": 9.766190665022173e-06,
"loss": 1.515446424484253,
"step": 292
},
{
"epoch": 0.44954128440366975,
"grad_norm": 0.292961061000824,
"learning_rate": 9.761338009523542e-06,
"loss": 1.5677558183670044,
"step": 294
},
{
"epoch": 0.4525993883792049,
"grad_norm": 0.22576913237571716,
"learning_rate": 9.756436883290608e-06,
"loss": 1.6895636320114136,
"step": 296
},
{
"epoch": 0.45565749235474007,
"grad_norm": 0.514447808265686,
"learning_rate": 9.751487342071394e-06,
"loss": 1.6961359977722168,
"step": 298
},
{
"epoch": 0.45871559633027525,
"grad_norm": 0.4707038402557373,
"learning_rate": 9.74648944216463e-06,
"loss": 1.5364969968795776,
"step": 300
},
{
"epoch": 0.4617737003058104,
"grad_norm": 0.3324492871761322,
"learning_rate": 9.741443240419096e-06,
"loss": 1.4445494413375854,
"step": 302
},
{
"epoch": 0.4648318042813456,
"grad_norm": 0.40139055252075195,
"learning_rate": 9.736348794232986e-06,
"loss": 1.631695032119751,
"step": 304
},
{
"epoch": 0.46788990825688076,
"grad_norm": 0.32826143503189087,
"learning_rate": 9.731206161553253e-06,
"loss": 1.5630545616149902,
"step": 306
},
{
"epoch": 0.4709480122324159,
"grad_norm": 0.7137564420700073,
"learning_rate": 9.726015400874945e-06,
"loss": 1.7077264785766602,
"step": 308
},
{
"epoch": 0.4740061162079511,
"grad_norm": 0.5834897756576538,
"learning_rate": 9.72077657124055e-06,
"loss": 1.541429877281189,
"step": 310
},
{
"epoch": 0.47706422018348627,
"grad_norm": 0.30517715215682983,
"learning_rate": 9.715489732239309e-06,
"loss": 1.486952781677246,
"step": 312
},
{
"epoch": 0.4801223241590214,
"grad_norm": 0.39915895462036133,
"learning_rate": 9.710154944006558e-06,
"loss": 1.4761033058166504,
"step": 314
},
{
"epoch": 0.4831804281345566,
"grad_norm": 0.24902665615081787,
"learning_rate": 9.70477226722302e-06,
"loss": 1.555905818939209,
"step": 316
},
{
"epoch": 0.48623853211009177,
"grad_norm": 0.27528202533721924,
"learning_rate": 9.699341763114142e-06,
"loss": 1.5418330430984497,
"step": 318
},
{
"epoch": 0.4892966360856269,
"grad_norm": 0.37373027205467224,
"learning_rate": 9.693863493449376e-06,
"loss": 1.5460388660430908,
"step": 320
},
{
"epoch": 0.4923547400611621,
"grad_norm": 0.3926723301410675,
"learning_rate": 9.688337520541487e-06,
"loss": 1.7003178596496582,
"step": 322
},
{
"epoch": 0.4954128440366973,
"grad_norm": 0.2708083987236023,
"learning_rate": 9.68276390724584e-06,
"loss": 1.8639323711395264,
"step": 324
},
{
"epoch": 0.4984709480122324,
"grad_norm": 0.3522673547267914,
"learning_rate": 9.67714271695969e-06,
"loss": 1.7603111267089844,
"step": 326
},
{
"epoch": 0.5015290519877675,
"grad_norm": 0.2736775279045105,
"learning_rate": 9.671474013621461e-06,
"loss": 1.7426960468292236,
"step": 328
},
{
"epoch": 0.5045871559633027,
"grad_norm": 0.34006989002227783,
"learning_rate": 9.665757861710008e-06,
"loss": 1.6802008152008057,
"step": 330
},
{
"epoch": 0.5076452599388379,
"grad_norm": 0.7181631922721863,
"learning_rate": 9.659994326243897e-06,
"loss": 1.3610038757324219,
"step": 332
},
{
"epoch": 0.5107033639143731,
"grad_norm": 0.3209435045719147,
"learning_rate": 9.654183472780655e-06,
"loss": 1.3310749530792236,
"step": 334
},
{
"epoch": 0.5137614678899083,
"grad_norm": 0.3394523561000824,
"learning_rate": 9.64832536741604e-06,
"loss": 1.7552449703216553,
"step": 336
},
{
"epoch": 0.5168195718654435,
"grad_norm": 0.26636433601379395,
"learning_rate": 9.642420076783266e-06,
"loss": 1.7648036479949951,
"step": 338
},
{
"epoch": 0.5198776758409785,
"grad_norm": 0.4860476553440094,
"learning_rate": 9.636467668052263e-06,
"loss": 1.8371148109436035,
"step": 340
},
{
"epoch": 0.5229357798165137,
"grad_norm": 0.3957999050617218,
"learning_rate": 9.630468208928906e-06,
"loss": 1.7691468000411987,
"step": 342
},
{
"epoch": 0.5259938837920489,
"grad_norm": 0.29553869366645813,
"learning_rate": 9.624421767654247e-06,
"loss": 1.8050150871276855,
"step": 344
},
{
"epoch": 0.5290519877675841,
"grad_norm": 0.8523488640785217,
"learning_rate": 9.618328413003742e-06,
"loss": 1.7548258304595947,
"step": 346
},
{
"epoch": 0.5321100917431193,
"grad_norm": 0.30288758873939514,
"learning_rate": 9.612188214286457e-06,
"loss": 1.652245044708252,
"step": 348
},
{
"epoch": 0.5351681957186545,
"grad_norm": 0.44331154227256775,
"learning_rate": 9.606001241344293e-06,
"loss": 1.5749201774597168,
"step": 350
},
{
"epoch": 0.5382262996941896,
"grad_norm": 0.3775594234466553,
"learning_rate": 9.599767564551185e-06,
"loss": 1.8136138916015625,
"step": 352
},
{
"epoch": 0.5412844036697247,
"grad_norm": 0.6260164976119995,
"learning_rate": 9.593487254812298e-06,
"loss": 1.753260850906372,
"step": 354
},
{
"epoch": 0.5443425076452599,
"grad_norm": 0.21940867602825165,
"learning_rate": 9.587160383563235e-06,
"loss": 1.2595834732055664,
"step": 356
},
{
"epoch": 0.5474006116207951,
"grad_norm": 0.45921286940574646,
"learning_rate": 9.580787022769205e-06,
"loss": 1.8687834739685059,
"step": 358
},
{
"epoch": 0.5504587155963303,
"grad_norm": 0.25323811173439026,
"learning_rate": 9.574367244924216e-06,
"loss": 1.87260901927948,
"step": 360
},
{
"epoch": 0.5535168195718655,
"grad_norm": 0.3825606405735016,
"learning_rate": 9.567901123050255e-06,
"loss": 1.9380344152450562,
"step": 362
},
{
"epoch": 0.5565749235474006,
"grad_norm": 0.8433843851089478,
"learning_rate": 9.56138873069644e-06,
"loss": 1.854411005973816,
"step": 364
},
{
"epoch": 0.5596330275229358,
"grad_norm": 0.5623306035995483,
"learning_rate": 9.554830141938201e-06,
"loss": 1.8307363986968994,
"step": 366
},
{
"epoch": 0.5626911314984709,
"grad_norm": 0.5833460688591003,
"learning_rate": 9.54822543137643e-06,
"loss": 1.691839575767517,
"step": 368
},
{
"epoch": 0.5657492354740061,
"grad_norm": 0.7582941651344299,
"learning_rate": 9.541574674136634e-06,
"loss": 1.5816738605499268,
"step": 370
},
{
"epoch": 0.5688073394495413,
"grad_norm": 0.5991274118423462,
"learning_rate": 9.534877945868075e-06,
"loss": 1.141850471496582,
"step": 372
},
{
"epoch": 0.5718654434250765,
"grad_norm": 0.27493157982826233,
"learning_rate": 9.528135322742916e-06,
"loss": 1.1190171241760254,
"step": 374
},
{
"epoch": 0.5749235474006116,
"grad_norm": 0.20014670491218567,
"learning_rate": 9.521346881455356e-06,
"loss": 1.4172542095184326,
"step": 376
},
{
"epoch": 0.5779816513761468,
"grad_norm": 0.45737189054489136,
"learning_rate": 9.514512699220751e-06,
"loss": 1.3267741203308105,
"step": 378
},
{
"epoch": 0.581039755351682,
"grad_norm": 0.342574805021286,
"learning_rate": 9.507632853774738e-06,
"loss": 1.2848198413848877,
"step": 380
},
{
"epoch": 0.5840978593272171,
"grad_norm": 0.2764483690261841,
"learning_rate": 9.500707423372354e-06,
"loss": 1.2696105241775513,
"step": 382
},
{
"epoch": 0.5871559633027523,
"grad_norm": 0.5538342595100403,
"learning_rate": 9.493736486787145e-06,
"loss": 1.5733320713043213,
"step": 384
},
{
"epoch": 0.5902140672782875,
"grad_norm": 0.5002435445785522,
"learning_rate": 9.486720123310264e-06,
"loss": 1.4811735153198242,
"step": 386
},
{
"epoch": 0.5932721712538226,
"grad_norm": 0.2729179561138153,
"learning_rate": 9.479658412749575e-06,
"loss": 1.2759473323822021,
"step": 388
},
{
"epoch": 0.5963302752293578,
"grad_norm": 0.422869473695755,
"learning_rate": 9.472551435428751e-06,
"loss": 1.6186537742614746,
"step": 390
},
{
"epoch": 0.599388379204893,
"grad_norm": 0.18889868259429932,
"learning_rate": 9.465399272186341e-06,
"loss": 1.5904256105422974,
"step": 392
},
{
"epoch": 0.6024464831804281,
"grad_norm": 0.4715130925178528,
"learning_rate": 9.458202004374875e-06,
"loss": 1.3664047718048096,
"step": 394
},
{
"epoch": 0.6055045871559633,
"grad_norm": 0.3192538321018219,
"learning_rate": 9.450959713859918e-06,
"loss": 1.5540097951889038,
"step": 396
},
{
"epoch": 0.6085626911314985,
"grad_norm": 0.48479557037353516,
"learning_rate": 9.443672483019146e-06,
"loss": 1.7298085689544678,
"step": 398
},
{
"epoch": 0.6116207951070336,
"grad_norm": 0.40212106704711914,
"learning_rate": 9.436340394741424e-06,
"loss": 1.2515219449996948,
"step": 400
},
{
"epoch": 0.6146788990825688,
"grad_norm": 0.31416311860084534,
"learning_rate": 9.428963532425832e-06,
"loss": 1.5272061824798584,
"step": 402
},
{
"epoch": 0.617737003058104,
"grad_norm": 0.39595550298690796,
"learning_rate": 9.421541979980743e-06,
"loss": 1.584099531173706,
"step": 404
},
{
"epoch": 0.6207951070336392,
"grad_norm": 0.3684428632259369,
"learning_rate": 9.414075821822862e-06,
"loss": 1.5516374111175537,
"step": 406
},
{
"epoch": 0.6238532110091743,
"grad_norm": 0.2936325669288635,
"learning_rate": 9.406565142876252e-06,
"loss": 1.3937046527862549,
"step": 408
},
{
"epoch": 0.6269113149847095,
"grad_norm": 0.8210769295692444,
"learning_rate": 9.399010028571394e-06,
"loss": 1.0384480953216553,
"step": 410
},
{
"epoch": 0.6299694189602446,
"grad_norm": 0.31836938858032227,
"learning_rate": 9.391410564844189e-06,
"loss": 1.6605589389801025,
"step": 412
},
{
"epoch": 0.6330275229357798,
"grad_norm": 0.4151877164840698,
"learning_rate": 9.383766838134997e-06,
"loss": 1.5902981758117676,
"step": 414
},
{
"epoch": 0.636085626911315,
"grad_norm": 0.29467517137527466,
"learning_rate": 9.376078935387647e-06,
"loss": 1.511544942855835,
"step": 416
},
{
"epoch": 0.6391437308868502,
"grad_norm": 0.4552344083786011,
"learning_rate": 9.36834694404845e-06,
"loss": 1.6092697381973267,
"step": 418
},
{
"epoch": 0.6422018348623854,
"grad_norm": 0.3086092174053192,
"learning_rate": 9.360570952065205e-06,
"loss": 1.5458872318267822,
"step": 420
},
{
"epoch": 0.6452599388379205,
"grad_norm": 0.29464077949523926,
"learning_rate": 9.3527510478862e-06,
"loss": 1.5201151371002197,
"step": 422
},
{
"epoch": 0.6483180428134556,
"grad_norm": 0.35874319076538086,
"learning_rate": 9.3448873204592e-06,
"loss": 1.7184113264083862,
"step": 424
},
{
"epoch": 0.6513761467889908,
"grad_norm": 0.6177545189857483,
"learning_rate": 9.336979859230438e-06,
"loss": 1.425230860710144,
"step": 426
},
{
"epoch": 0.654434250764526,
"grad_norm": 0.4207315742969513,
"learning_rate": 9.329028754143606e-06,
"loss": 1.1580491065979004,
"step": 428
},
{
"epoch": 0.6574923547400612,
"grad_norm": 0.40215086936950684,
"learning_rate": 9.321034095638816e-06,
"loss": 1.776092767715454,
"step": 430
},
{
"epoch": 0.6605504587155964,
"grad_norm": 0.48207205533981323,
"learning_rate": 9.312995974651581e-06,
"loss": 1.5432982444763184,
"step": 432
},
{
"epoch": 0.6636085626911316,
"grad_norm": 0.9188543558120728,
"learning_rate": 9.304914482611788e-06,
"loss": 1.6913204193115234,
"step": 434
},
{
"epoch": 0.6666666666666666,
"grad_norm": 2.0712273120880127,
"learning_rate": 9.296789711442641e-06,
"loss": 1.5286757946014404,
"step": 436
},
{
"epoch": 0.6697247706422018,
"grad_norm": 0.4487042725086212,
"learning_rate": 9.288621753559624e-06,
"loss": 1.7271997928619385,
"step": 438
},
{
"epoch": 0.672782874617737,
"grad_norm": 0.4550405442714691,
"learning_rate": 9.280410701869456e-06,
"loss": 1.5852614641189575,
"step": 440
},
{
"epoch": 0.6758409785932722,
"grad_norm": 0.8099808692932129,
"learning_rate": 9.27215664976902e-06,
"loss": 1.6332128047943115,
"step": 442
},
{
"epoch": 0.6788990825688074,
"grad_norm": 0.5566719174385071,
"learning_rate": 9.263859691144315e-06,
"loss": 1.5285072326660156,
"step": 444
},
{
"epoch": 0.6819571865443425,
"grad_norm": 0.3996361196041107,
"learning_rate": 9.25551992036938e-06,
"loss": 1.181262731552124,
"step": 446
},
{
"epoch": 0.6850152905198776,
"grad_norm": 0.7320879697799683,
"learning_rate": 9.247137432305221e-06,
"loss": 1.6381134986877441,
"step": 448
},
{
"epoch": 0.6880733944954128,
"grad_norm": 0.5473281741142273,
"learning_rate": 9.238712322298733e-06,
"loss": 1.623387098312378,
"step": 450
},
{
"epoch": 0.691131498470948,
"grad_norm": 0.2673215866088867,
"learning_rate": 9.230244686181616e-06,
"loss": 1.6147091388702393,
"step": 452
},
{
"epoch": 0.6941896024464832,
"grad_norm": 0.41044941544532776,
"learning_rate": 9.22173462026929e-06,
"loss": 1.6174466609954834,
"step": 454
},
{
"epoch": 0.6972477064220184,
"grad_norm": 0.3210803270339966,
"learning_rate": 9.213182221359785e-06,
"loss": 1.4634352922439575,
"step": 456
},
{
"epoch": 0.7003058103975535,
"grad_norm": 0.4366549551486969,
"learning_rate": 9.204587586732653e-06,
"loss": 1.6598728895187378,
"step": 458
},
{
"epoch": 0.7033639143730887,
"grad_norm": 0.6817240118980408,
"learning_rate": 9.195950814147862e-06,
"loss": 1.7457971572875977,
"step": 460
},
{
"epoch": 0.7064220183486238,
"grad_norm": 1.429196834564209,
"learning_rate": 9.187272001844673e-06,
"loss": 1.4895765781402588,
"step": 462
},
{
"epoch": 0.709480122324159,
"grad_norm": 0.33415424823760986,
"learning_rate": 9.178551248540534e-06,
"loss": 1.7249622344970703,
"step": 464
},
{
"epoch": 0.7125382262996942,
"grad_norm": 0.5185303092002869,
"learning_rate": 9.169788653429949e-06,
"loss": 1.5071038007736206,
"step": 466
},
{
"epoch": 0.7155963302752294,
"grad_norm": 0.703040599822998,
"learning_rate": 9.160984316183354e-06,
"loss": 1.6332056522369385,
"step": 468
},
{
"epoch": 0.7186544342507645,
"grad_norm": 0.2760729491710663,
"learning_rate": 9.152138336945985e-06,
"loss": 1.5567004680633545,
"step": 470
},
{
"epoch": 0.7217125382262997,
"grad_norm": 0.26987555623054504,
"learning_rate": 9.143250816336733e-06,
"loss": 1.6896016597747803,
"step": 472
},
{
"epoch": 0.7247706422018348,
"grad_norm": 0.4577353894710541,
"learning_rate": 9.134321855447004e-06,
"loss": 1.780794620513916,
"step": 474
},
{
"epoch": 0.72782874617737,
"grad_norm": 0.3506152629852295,
"learning_rate": 9.125351555839568e-06,
"loss": 1.676330327987671,
"step": 476
},
{
"epoch": 0.7308868501529052,
"grad_norm": 0.3420753479003906,
"learning_rate": 9.116340019547403e-06,
"loss": 1.53602933883667,
"step": 478
},
{
"epoch": 0.7339449541284404,
"grad_norm": 0.615734875202179,
"learning_rate": 9.107287349072535e-06,
"loss": 1.6315178871154785,
"step": 480
},
{
"epoch": 0.7370030581039755,
"grad_norm": 0.3383826017379761,
"learning_rate": 9.098193647384872e-06,
"loss": 1.646344542503357,
"step": 482
},
{
"epoch": 0.7400611620795107,
"grad_norm": 0.40700384974479675,
"learning_rate": 9.089059017921034e-06,
"loss": 1.6499868631362915,
"step": 484
},
{
"epoch": 0.7431192660550459,
"grad_norm": 0.4302765727043152,
"learning_rate": 9.079883564583176e-06,
"loss": 1.6223028898239136,
"step": 486
},
{
"epoch": 0.746177370030581,
"grad_norm": 0.2995837330818176,
"learning_rate": 9.070667391737804e-06,
"loss": 1.639768123626709,
"step": 488
},
{
"epoch": 0.7492354740061162,
"grad_norm": 0.3183751702308655,
"learning_rate": 9.061410604214588e-06,
"loss": 1.4172444343566895,
"step": 490
},
{
"epoch": 0.7522935779816514,
"grad_norm": 0.41883519291877747,
"learning_rate": 9.052113307305178e-06,
"loss": 1.5172092914581299,
"step": 492
},
{
"epoch": 0.7553516819571865,
"grad_norm": 0.4170067310333252,
"learning_rate": 9.04277560676199e-06,
"loss": 1.4581788778305054,
"step": 494
},
{
"epoch": 0.7584097859327217,
"grad_norm": 0.4589844346046448,
"learning_rate": 9.033397608797015e-06,
"loss": 1.5675625801086426,
"step": 496
},
{
"epoch": 0.7614678899082569,
"grad_norm": 0.4775915741920471,
"learning_rate": 9.023979420080614e-06,
"loss": 1.5760972499847412,
"step": 498
},
{
"epoch": 0.764525993883792,
"grad_norm": 0.4255703389644623,
"learning_rate": 9.014521147740295e-06,
"loss": 1.4211878776550293,
"step": 500
},
{
"epoch": 0.7675840978593272,
"grad_norm": 0.2350740283727646,
"learning_rate": 9.005022899359498e-06,
"loss": 1.0600173473358154,
"step": 502
},
{
"epoch": 0.7706422018348624,
"grad_norm": 0.25523892045021057,
"learning_rate": 8.995484782976372e-06,
"loss": 1.3498680591583252,
"step": 504
},
{
"epoch": 0.7737003058103975,
"grad_norm": 0.25793585181236267,
"learning_rate": 8.985906907082548e-06,
"loss": 1.4128957986831665,
"step": 506
},
{
"epoch": 0.7767584097859327,
"grad_norm": 0.2672351002693176,
"learning_rate": 8.9762893806219e-06,
"loss": 1.4579813480377197,
"step": 508
},
{
"epoch": 0.7798165137614679,
"grad_norm": 0.3467871844768524,
"learning_rate": 8.96663231298931e-06,
"loss": 1.469613790512085,
"step": 510
},
{
"epoch": 0.7828746177370031,
"grad_norm": 0.2631012797355652,
"learning_rate": 8.956935814029426e-06,
"loss": 1.5352952480316162,
"step": 512
},
{
"epoch": 0.7859327217125383,
"grad_norm": 0.42967817187309265,
"learning_rate": 8.947199994035402e-06,
"loss": 1.448859691619873,
"step": 514
},
{
"epoch": 0.7889908256880734,
"grad_norm": 0.18720397353172302,
"learning_rate": 8.937424963747656e-06,
"loss": 1.4682276248931885,
"step": 516
},
{
"epoch": 0.7920489296636085,
"grad_norm": 0.2571136951446533,
"learning_rate": 8.9276108343526e-06,
"loss": 1.430220365524292,
"step": 518
},
{
"epoch": 0.7951070336391437,
"grad_norm": 0.49666231870651245,
"learning_rate": 8.917757717481388e-06,
"loss": 1.4388704299926758,
"step": 520
},
{
"epoch": 0.7981651376146789,
"grad_norm": 0.18454308807849884,
"learning_rate": 8.90786572520863e-06,
"loss": 1.3887765407562256,
"step": 522
},
{
"epoch": 0.8012232415902141,
"grad_norm": 0.19775497913360596,
"learning_rate": 8.897934970051128e-06,
"loss": 1.4397857189178467,
"step": 524
},
{
"epoch": 0.8042813455657493,
"grad_norm": 0.24946311116218567,
"learning_rate": 8.8879655649666e-06,
"loss": 1.3772547245025635,
"step": 526
},
{
"epoch": 0.8073394495412844,
"grad_norm": 0.1347188949584961,
"learning_rate": 8.877957623352376e-06,
"loss": 1.2148081064224243,
"step": 528
},
{
"epoch": 0.8103975535168195,
"grad_norm": 0.17375752329826355,
"learning_rate": 8.867911259044134e-06,
"loss": 1.2351716756820679,
"step": 530
},
{
"epoch": 0.8134556574923547,
"grad_norm": 0.12528319656848907,
"learning_rate": 8.857826586314586e-06,
"loss": 1.0168347358703613,
"step": 532
},
{
"epoch": 0.8165137614678899,
"grad_norm": 0.22279202938079834,
"learning_rate": 8.847703719872184e-06,
"loss": 1.3256959915161133,
"step": 534
},
{
"epoch": 0.8195718654434251,
"grad_norm": 0.22974777221679688,
"learning_rate": 8.837542774859819e-06,
"loss": 1.3868855237960815,
"step": 536
},
{
"epoch": 0.8226299694189603,
"grad_norm": 0.2833384871482849,
"learning_rate": 8.827343866853505e-06,
"loss": 1.4037737846374512,
"step": 538
},
{
"epoch": 0.8256880733944955,
"grad_norm": 0.20462170243263245,
"learning_rate": 8.817107111861068e-06,
"loss": 1.3688358068466187,
"step": 540
},
{
"epoch": 0.8287461773700305,
"grad_norm": 0.21328498423099518,
"learning_rate": 8.806832626320828e-06,
"loss": 1.3812446594238281,
"step": 542
},
{
"epoch": 0.8318042813455657,
"grad_norm": 0.2749079465866089,
"learning_rate": 8.796520527100268e-06,
"loss": 1.3695695400238037,
"step": 544
},
{
"epoch": 0.8348623853211009,
"grad_norm": 0.17869983613491058,
"learning_rate": 8.786170931494714e-06,
"loss": 1.3381950855255127,
"step": 546
},
{
"epoch": 0.8379204892966361,
"grad_norm": 0.23981167376041412,
"learning_rate": 8.775783957225991e-06,
"loss": 1.409177541732788,
"step": 548
},
{
"epoch": 0.8409785932721713,
"grad_norm": 0.4634632170200348,
"learning_rate": 8.765359722441096e-06,
"loss": 1.3826044797897339,
"step": 550
},
{
"epoch": 0.8440366972477065,
"grad_norm": 0.19470739364624023,
"learning_rate": 8.754898345710839e-06,
"loss": 1.3529078960418701,
"step": 552
},
{
"epoch": 0.8470948012232415,
"grad_norm": 0.21753935515880585,
"learning_rate": 8.744399946028506e-06,
"loss": 1.3324353694915771,
"step": 554
},
{
"epoch": 0.8501529051987767,
"grad_norm": 0.24797090888023376,
"learning_rate": 8.733864642808505e-06,
"loss": 1.3469841480255127,
"step": 556
},
{
"epoch": 0.8532110091743119,
"grad_norm": 0.2123066782951355,
"learning_rate": 8.723292555884997e-06,
"loss": 1.343614101409912,
"step": 558
},
{
"epoch": 0.8562691131498471,
"grad_norm": 0.25072529911994934,
"learning_rate": 8.712683805510547e-06,
"loss": 1.305376648902893,
"step": 560
},
{
"epoch": 0.8593272171253823,
"grad_norm": 0.3219304382801056,
"learning_rate": 8.702038512354746e-06,
"loss": 1.3584821224212646,
"step": 562
},
{
"epoch": 0.8623853211009175,
"grad_norm": 0.3253892660140991,
"learning_rate": 8.691356797502846e-06,
"loss": 1.3929443359375,
"step": 564
},
{
"epoch": 0.8654434250764526,
"grad_norm": 0.22387385368347168,
"learning_rate": 8.680638782454373e-06,
"loss": 1.3898614645004272,
"step": 566
},
{
"epoch": 0.8685015290519877,
"grad_norm": 0.2767902612686157,
"learning_rate": 8.669884589121756e-06,
"loss": 1.3842121362686157,
"step": 568
},
{
"epoch": 0.8715596330275229,
"grad_norm": 0.2403760552406311,
"learning_rate": 8.659094339828934e-06,
"loss": 1.3873755931854248,
"step": 570
},
{
"epoch": 0.8746177370030581,
"grad_norm": 0.30079615116119385,
"learning_rate": 8.648268157309964e-06,
"loss": 1.3781442642211914,
"step": 572
},
{
"epoch": 0.8776758409785933,
"grad_norm": 0.24510778486728668,
"learning_rate": 8.637406164707628e-06,
"loss": 1.4003241062164307,
"step": 574
},
{
"epoch": 0.8807339449541285,
"grad_norm": 0.19053591787815094,
"learning_rate": 8.62650848557203e-06,
"loss": 1.318782091140747,
"step": 576
},
{
"epoch": 0.8837920489296636,
"grad_norm": 0.5118341445922852,
"learning_rate": 8.615575243859194e-06,
"loss": 1.3740344047546387,
"step": 578
},
{
"epoch": 0.8868501529051988,
"grad_norm": 0.2653733193874359,
"learning_rate": 8.604606563929649e-06,
"loss": 1.3240249156951904,
"step": 580
},
{
"epoch": 0.8899082568807339,
"grad_norm": 0.2646930515766144,
"learning_rate": 8.59360257054702e-06,
"loss": 1.3533198833465576,
"step": 582
},
{
"epoch": 0.8929663608562691,
"grad_norm": 0.21842285990715027,
"learning_rate": 8.582563388876602e-06,
"loss": 1.3596748113632202,
"step": 584
},
{
"epoch": 0.8960244648318043,
"grad_norm": 0.2090519517660141,
"learning_rate": 8.571489144483945e-06,
"loss": 1.3835537433624268,
"step": 586
},
{
"epoch": 0.8990825688073395,
"grad_norm": 0.2362383008003235,
"learning_rate": 8.560379963333416e-06,
"loss": 1.368111252784729,
"step": 588
},
{
"epoch": 0.9021406727828746,
"grad_norm": 0.4883694350719452,
"learning_rate": 8.549235971786777e-06,
"loss": 1.3067984580993652,
"step": 590
},
{
"epoch": 0.9051987767584098,
"grad_norm": 0.3407292366027832,
"learning_rate": 8.538057296601739e-06,
"loss": 1.3290581703186035,
"step": 592
},
{
"epoch": 0.908256880733945,
"grad_norm": 0.21036434173583984,
"learning_rate": 8.526844064930523e-06,
"loss": 1.3695251941680908,
"step": 594
},
{
"epoch": 0.9113149847094801,
"grad_norm": 0.22752052545547485,
"learning_rate": 8.515596404318415e-06,
"loss": 1.3922007083892822,
"step": 596
},
{
"epoch": 0.9143730886850153,
"grad_norm": 0.23141705989837646,
"learning_rate": 8.504314442702315e-06,
"loss": 1.371009111404419,
"step": 598
},
{
"epoch": 0.9174311926605505,
"grad_norm": 0.18458011746406555,
"learning_rate": 8.492998308409275e-06,
"loss": 1.3468807935714722,
"step": 600
},
{
"epoch": 0.9204892966360856,
"grad_norm": 0.2277638018131256,
"learning_rate": 8.481648130155054e-06,
"loss": 1.3067777156829834,
"step": 602
},
{
"epoch": 0.9235474006116208,
"grad_norm": 0.2761037051677704,
"learning_rate": 8.470264037042639e-06,
"loss": 1.3436920642852783,
"step": 604
},
{
"epoch": 0.926605504587156,
"grad_norm": 0.2718355059623718,
"learning_rate": 8.458846158560787e-06,
"loss": 1.368149995803833,
"step": 606
},
{
"epoch": 0.9296636085626911,
"grad_norm": 0.471161812543869,
"learning_rate": 8.447394624582544e-06,
"loss": 1.3190257549285889,
"step": 608
},
{
"epoch": 0.9327217125382263,
"grad_norm": 0.24170783162117004,
"learning_rate": 8.435909565363772e-06,
"loss": 1.3419578075408936,
"step": 610
},
{
"epoch": 0.9357798165137615,
"grad_norm": 0.26485109329223633,
"learning_rate": 8.424391111541673e-06,
"loss": 1.338409662246704,
"step": 612
},
{
"epoch": 0.9388379204892966,
"grad_norm": 0.23220610618591309,
"learning_rate": 8.412839394133285e-06,
"loss": 1.3877780437469482,
"step": 614
},
{
"epoch": 0.9418960244648318,
"grad_norm": 0.24310626089572906,
"learning_rate": 8.401254544534018e-06,
"loss": 1.4051454067230225,
"step": 616
},
{
"epoch": 0.944954128440367,
"grad_norm": 0.299958735704422,
"learning_rate": 8.389636694516134e-06,
"loss": 1.3702571392059326,
"step": 618
},
{
"epoch": 0.9480122324159022,
"grad_norm": 0.449929803609848,
"learning_rate": 8.377985976227265e-06,
"loss": 1.379606008529663,
"step": 620
},
{
"epoch": 0.9510703363914373,
"grad_norm": 0.24171197414398193,
"learning_rate": 8.366302522188902e-06,
"loss": 1.350182294845581,
"step": 622
},
{
"epoch": 0.9541284403669725,
"grad_norm": 0.2935427129268646,
"learning_rate": 8.354586465294894e-06,
"loss": 1.2931137084960938,
"step": 624
},
{
"epoch": 0.9571865443425076,
"grad_norm": 0.23755374550819397,
"learning_rate": 8.342837938809925e-06,
"loss": 1.3183162212371826,
"step": 626
},
{
"epoch": 0.9602446483180428,
"grad_norm": 0.3486945331096649,
"learning_rate": 8.331057076368012e-06,
"loss": 1.3358354568481445,
"step": 628
},
{
"epoch": 0.963302752293578,
"grad_norm": 0.3866771459579468,
"learning_rate": 8.319244011970975e-06,
"loss": 1.3079657554626465,
"step": 630
},
{
"epoch": 0.9663608562691132,
"grad_norm": 0.23048752546310425,
"learning_rate": 8.307398879986917e-06,
"loss": 1.323075294494629,
"step": 632
},
{
"epoch": 0.9694189602446484,
"grad_norm": 0.2808099687099457,
"learning_rate": 8.295521815148697e-06,
"loss": 1.376133918762207,
"step": 634
},
{
"epoch": 0.9724770642201835,
"grad_norm": 0.3424737751483917,
"learning_rate": 8.283612952552393e-06,
"loss": 1.363619327545166,
"step": 636
},
{
"epoch": 0.9755351681957186,
"grad_norm": 0.23272113502025604,
"learning_rate": 8.271672427655765e-06,
"loss": 1.3780806064605713,
"step": 638
},
{
"epoch": 0.9785932721712538,
"grad_norm": 0.33965811133384705,
"learning_rate": 8.259700376276724e-06,
"loss": 1.3397910594940186,
"step": 640
},
{
"epoch": 0.981651376146789,
"grad_norm": 0.25269240140914917,
"learning_rate": 8.247696934591774e-06,
"loss": 1.3255189657211304,
"step": 642
},
{
"epoch": 0.9847094801223242,
"grad_norm": 1.2317392826080322,
"learning_rate": 8.235662239134473e-06,
"loss": 1.347729206085205,
"step": 644
},
{
"epoch": 0.9877675840978594,
"grad_norm": 0.37982505559921265,
"learning_rate": 8.22359642679387e-06,
"loss": 1.3894901275634766,
"step": 646
},
{
"epoch": 0.9908256880733946,
"grad_norm": 0.2849336564540863,
"learning_rate": 8.211499634812966e-06,
"loss": 1.429058313369751,
"step": 648
},
{
"epoch": 0.9938837920489296,
"grad_norm": 0.6233349442481995,
"learning_rate": 8.199372000787126e-06,
"loss": 2.095426082611084,
"step": 650
},
{
"epoch": 0.9969418960244648,
"grad_norm": 0.6541375517845154,
"learning_rate": 8.187213662662539e-06,
"loss": 2.1073060035705566,
"step": 652
},
{
"epoch": 1.0,
"grad_norm": 11.037178039550781,
"learning_rate": 8.175024758734636e-06,
"loss": 2.095914840698242,
"step": 654
},
{
"epoch": 1.003058103975535,
"grad_norm": 0.3948424160480499,
"learning_rate": 8.16280542764652e-06,
"loss": 1.4957305192947388,
"step": 656
},
{
"epoch": 1.0061162079510704,
"grad_norm": 0.310005784034729,
"learning_rate": 8.150555808387389e-06,
"loss": 1.455479383468628,
"step": 658
},
{
"epoch": 1.0091743119266054,
"grad_norm": 0.26789844036102295,
"learning_rate": 8.138276040290952e-06,
"loss": 1.4779293537139893,
"step": 660
},
{
"epoch": 1.0122324159021407,
"grad_norm": 0.19781345129013062,
"learning_rate": 8.125966263033852e-06,
"loss": 1.4063279628753662,
"step": 662
},
{
"epoch": 1.0152905198776758,
"grad_norm": 0.21764519810676575,
"learning_rate": 8.11362661663407e-06,
"loss": 1.5875146389007568,
"step": 664
},
{
"epoch": 1.018348623853211,
"grad_norm": 0.25749847292900085,
"learning_rate": 8.101257241449332e-06,
"loss": 1.480888843536377,
"step": 666
},
{
"epoch": 1.0214067278287462,
"grad_norm": 0.26426374912261963,
"learning_rate": 8.08885827817552e-06,
"loss": 1.4235765933990479,
"step": 668
},
{
"epoch": 1.0244648318042813,
"grad_norm": 0.25188708305358887,
"learning_rate": 8.07642986784506e-06,
"loss": 1.5084459781646729,
"step": 670
},
{
"epoch": 1.0275229357798166,
"grad_norm": 0.6583337783813477,
"learning_rate": 8.063972151825332e-06,
"loss": 1.369026780128479,
"step": 672
},
{
"epoch": 1.0305810397553516,
"grad_norm": 0.21123117208480835,
"learning_rate": 8.05148527181705e-06,
"loss": 1.4445654153823853,
"step": 674
},
{
"epoch": 1.033639143730887,
"grad_norm": 0.293588787317276,
"learning_rate": 8.038969369852654e-06,
"loss": 1.555469274520874,
"step": 676
},
{
"epoch": 1.036697247706422,
"grad_norm": 0.27872779965400696,
"learning_rate": 8.026424588294701e-06,
"loss": 1.4869214296340942,
"step": 678
},
{
"epoch": 1.039755351681957,
"grad_norm": 0.23042356967926025,
"learning_rate": 8.013851069834233e-06,
"loss": 1.279091238975525,
"step": 680
},
{
"epoch": 1.0428134556574924,
"grad_norm": 0.289106547832489,
"learning_rate": 8.001248957489164e-06,
"loss": 1.4306490421295166,
"step": 682
},
{
"epoch": 1.0458715596330275,
"grad_norm": 0.5272045135498047,
"learning_rate": 7.988618394602653e-06,
"loss": 1.6781132221221924,
"step": 684
},
{
"epoch": 1.0489296636085628,
"grad_norm": 0.22576113045215607,
"learning_rate": 7.975959524841464e-06,
"loss": 1.3457372188568115,
"step": 686
},
{
"epoch": 1.0519877675840978,
"grad_norm": 0.5630601644515991,
"learning_rate": 7.963272492194344e-06,
"loss": 1.4807915687561035,
"step": 688
},
{
"epoch": 1.0550458715596331,
"grad_norm": 0.34389057755470276,
"learning_rate": 7.950557440970377e-06,
"loss": 1.368910789489746,
"step": 690
},
{
"epoch": 1.0581039755351682,
"grad_norm": 0.21063481271266937,
"learning_rate": 7.937814515797348e-06,
"loss": 1.360002040863037,
"step": 692
},
{
"epoch": 1.0611620795107033,
"grad_norm": 0.20320424437522888,
"learning_rate": 7.92504386162009e-06,
"loss": 1.3675504922866821,
"step": 694
},
{
"epoch": 1.0642201834862386,
"grad_norm": 0.2813395857810974,
"learning_rate": 7.912245623698846e-06,
"loss": 1.395061731338501,
"step": 696
},
{
"epoch": 1.0672782874617737,
"grad_norm": 0.4647752046585083,
"learning_rate": 7.899419947607611e-06,
"loss": 1.5662283897399902,
"step": 698
},
{
"epoch": 1.070336391437309,
"grad_norm": 0.3765999972820282,
"learning_rate": 7.886566979232471e-06,
"loss": 1.5935697555541992,
"step": 700
},
{
"epoch": 1.073394495412844,
"grad_norm": 0.29083383083343506,
"learning_rate": 7.873686864769955e-06,
"loss": 1.434537649154663,
"step": 702
},
{
"epoch": 1.0764525993883791,
"grad_norm": 0.4763205349445343,
"learning_rate": 7.860779750725362e-06,
"loss": 1.4121177196502686,
"step": 704
},
{
"epoch": 1.0795107033639144,
"grad_norm": 0.33439531922340393,
"learning_rate": 7.8478457839111e-06,
"loss": 1.3943579196929932,
"step": 706
},
{
"epoch": 1.0825688073394495,
"grad_norm": 0.342690110206604,
"learning_rate": 7.834885111445017e-06,
"loss": 1.4776759147644043,
"step": 708
},
{
"epoch": 1.0856269113149848,
"grad_norm": 0.29185494780540466,
"learning_rate": 7.82189788074872e-06,
"loss": 1.4435069561004639,
"step": 710
},
{
"epoch": 1.0886850152905199,
"grad_norm": 1.3288284540176392,
"learning_rate": 7.80888423954591e-06,
"loss": 1.4731531143188477,
"step": 712
},
{
"epoch": 1.091743119266055,
"grad_norm": 0.2119162380695343,
"learning_rate": 7.795844335860691e-06,
"loss": 1.4626476764678955,
"step": 714
},
{
"epoch": 1.0948012232415902,
"grad_norm": 0.20571930706501007,
"learning_rate": 7.782778318015892e-06,
"loss": 1.342850685119629,
"step": 716
},
{
"epoch": 1.0978593272171253,
"grad_norm": 0.22236645221710205,
"learning_rate": 7.769686334631375e-06,
"loss": 1.286208152770996,
"step": 718
},
{
"epoch": 1.1009174311926606,
"grad_norm": 0.18384046852588654,
"learning_rate": 7.756568534622355e-06,
"loss": 1.4446015357971191,
"step": 720
},
{
"epoch": 1.1039755351681957,
"grad_norm": 0.2486264407634735,
"learning_rate": 7.743425067197693e-06,
"loss": 1.5612818002700806,
"step": 722
},
{
"epoch": 1.107033639143731,
"grad_norm": 0.23211126029491425,
"learning_rate": 7.730256081858207e-06,
"loss": 1.3999545574188232,
"step": 724
},
{
"epoch": 1.110091743119266,
"grad_norm": 0.41483980417251587,
"learning_rate": 7.717061728394968e-06,
"loss": 1.591150164604187,
"step": 726
},
{
"epoch": 1.1131498470948011,
"grad_norm": 0.3113287091255188,
"learning_rate": 7.7038421568876e-06,
"loss": 1.620883584022522,
"step": 728
},
{
"epoch": 1.1162079510703364,
"grad_norm": 0.5611585378646851,
"learning_rate": 7.690597517702569e-06,
"loss": 1.3835599422454834,
"step": 730
},
{
"epoch": 1.1192660550458715,
"grad_norm": 0.5187618732452393,
"learning_rate": 7.677327961491475e-06,
"loss": 1.3614990711212158,
"step": 732
},
{
"epoch": 1.1223241590214068,
"grad_norm": 0.34465184807777405,
"learning_rate": 7.664033639189336e-06,
"loss": 1.467517614364624,
"step": 734
},
{
"epoch": 1.1253822629969419,
"grad_norm": 0.22211050987243652,
"learning_rate": 7.650714702012876e-06,
"loss": 1.287433385848999,
"step": 736
},
{
"epoch": 1.1284403669724772,
"grad_norm": 0.36259227991104126,
"learning_rate": 7.637371301458797e-06,
"loss": 1.367175817489624,
"step": 738
},
{
"epoch": 1.1314984709480123,
"grad_norm": 0.44571414589881897,
"learning_rate": 7.6240035893020625e-06,
"loss": 1.3308281898498535,
"step": 740
},
{
"epoch": 1.1345565749235473,
"grad_norm": 0.26124662160873413,
"learning_rate": 7.610611717594173e-06,
"loss": 1.3915913105010986,
"step": 742
},
{
"epoch": 1.1376146788990826,
"grad_norm": 0.3137398064136505,
"learning_rate": 7.597195838661426e-06,
"loss": 1.3188378810882568,
"step": 744
},
{
"epoch": 1.1406727828746177,
"grad_norm": 0.3484938144683838,
"learning_rate": 7.583756105103195e-06,
"loss": 1.3703608512878418,
"step": 746
},
{
"epoch": 1.143730886850153,
"grad_norm": 0.3699035942554474,
"learning_rate": 7.570292669790186e-06,
"loss": 1.5115067958831787,
"step": 748
},
{
"epoch": 1.146788990825688,
"grad_norm": 0.24170878529548645,
"learning_rate": 7.556805685862703e-06,
"loss": 1.3954684734344482,
"step": 750
},
{
"epoch": 1.1498470948012232,
"grad_norm": 0.20038793981075287,
"learning_rate": 7.543295306728904e-06,
"loss": 1.345947027206421,
"step": 752
},
{
"epoch": 1.1529051987767585,
"grad_norm": 0.38949868083000183,
"learning_rate": 7.529761686063056e-06,
"loss": 1.5590949058532715,
"step": 754
},
{
"epoch": 1.1559633027522935,
"grad_norm": 0.33645766973495483,
"learning_rate": 7.516204977803789e-06,
"loss": 1.446972370147705,
"step": 756
},
{
"epoch": 1.1590214067278288,
"grad_norm": 0.18463970720767975,
"learning_rate": 7.5026253361523435e-06,
"loss": 1.3630192279815674,
"step": 758
},
{
"epoch": 1.162079510703364,
"grad_norm": 0.33572879433631897,
"learning_rate": 7.489022915570813e-06,
"loss": 1.457106113433838,
"step": 760
},
{
"epoch": 1.165137614678899,
"grad_norm": 0.2753995954990387,
"learning_rate": 7.475397870780397e-06,
"loss": 1.4502360820770264,
"step": 762
},
{
"epoch": 1.1681957186544343,
"grad_norm": 0.35596194863319397,
"learning_rate": 7.4617503567596295e-06,
"loss": 1.4977834224700928,
"step": 764
},
{
"epoch": 1.1712538226299694,
"grad_norm": 0.4726940095424652,
"learning_rate": 7.448080528742624e-06,
"loss": 1.3764468431472778,
"step": 766
},
{
"epoch": 1.1743119266055047,
"grad_norm": 0.26225268840789795,
"learning_rate": 7.434388542217303e-06,
"loss": 1.4741466045379639,
"step": 768
},
{
"epoch": 1.1773700305810397,
"grad_norm": 0.27619338035583496,
"learning_rate": 7.420674552923638e-06,
"loss": 1.3593350648880005,
"step": 770
},
{
"epoch": 1.1804281345565748,
"grad_norm": 0.3182947635650635,
"learning_rate": 7.4069387168518615e-06,
"loss": 1.673621654510498,
"step": 772
},
{
"epoch": 1.18348623853211,
"grad_norm": 0.28721779584884644,
"learning_rate": 7.393181190240714e-06,
"loss": 1.4450278282165527,
"step": 774
},
{
"epoch": 1.1865443425076452,
"grad_norm": 0.2768658399581909,
"learning_rate": 7.379402129575645e-06,
"loss": 1.5032843351364136,
"step": 776
},
{
"epoch": 1.1896024464831805,
"grad_norm": 0.3218024969100952,
"learning_rate": 7.3656016915870545e-06,
"loss": 1.4965013265609741,
"step": 778
},
{
"epoch": 1.1926605504587156,
"grad_norm": 0.4919971227645874,
"learning_rate": 7.351780033248491e-06,
"loss": 1.4509224891662598,
"step": 780
},
{
"epoch": 1.1957186544342508,
"grad_norm": 0.3981909155845642,
"learning_rate": 7.33793731177488e-06,
"loss": 1.4464759826660156,
"step": 782
},
{
"epoch": 1.198776758409786,
"grad_norm": 0.3076995611190796,
"learning_rate": 7.324073684620726e-06,
"loss": 1.4577126502990723,
"step": 784
},
{
"epoch": 1.2018348623853212,
"grad_norm": 0.28227174282073975,
"learning_rate": 7.310189309478331e-06,
"loss": 1.439997911453247,
"step": 786
},
{
"epoch": 1.2048929663608563,
"grad_norm": 0.26599401235580444,
"learning_rate": 7.296284344275991e-06,
"loss": 1.531783103942871,
"step": 788
},
{
"epoch": 1.2079510703363914,
"grad_norm": 0.69685959815979,
"learning_rate": 7.282358947176207e-06,
"loss": 1.4577662944793701,
"step": 790
},
{
"epoch": 1.2110091743119267,
"grad_norm": 0.25103896856307983,
"learning_rate": 7.268413276573881e-06,
"loss": 1.3561824560165405,
"step": 792
},
{
"epoch": 1.2140672782874617,
"grad_norm": 0.21765579283237457,
"learning_rate": 7.25444749109452e-06,
"loss": 1.3165652751922607,
"step": 794
},
{
"epoch": 1.217125382262997,
"grad_norm": 0.2564055919647217,
"learning_rate": 7.2404617495924254e-06,
"loss": 1.383346676826477,
"step": 796
},
{
"epoch": 1.2201834862385321,
"grad_norm": 0.40797773003578186,
"learning_rate": 7.226456211148891e-06,
"loss": 1.3315465450286865,
"step": 798
},
{
"epoch": 1.2232415902140672,
"grad_norm": 0.31532490253448486,
"learning_rate": 7.212431035070391e-06,
"loss": 1.3896580934524536,
"step": 800
},
{
"epoch": 1.2262996941896025,
"grad_norm": 0.25705334544181824,
"learning_rate": 7.198386380886765e-06,
"loss": 1.3460421562194824,
"step": 802
},
{
"epoch": 1.2293577981651376,
"grad_norm": 0.31377753615379333,
"learning_rate": 7.1843224083494154e-06,
"loss": 1.595191240310669,
"step": 804
},
{
"epoch": 1.2324159021406729,
"grad_norm": 0.2853119969367981,
"learning_rate": 7.170239277429474e-06,
"loss": 1.6170880794525146,
"step": 806
},
{
"epoch": 1.235474006116208,
"grad_norm": 0.44243165850639343,
"learning_rate": 7.156137148315993e-06,
"loss": 1.6550755500793457,
"step": 808
},
{
"epoch": 1.238532110091743,
"grad_norm": 0.3517357110977173,
"learning_rate": 7.14201618141412e-06,
"loss": 1.566192865371704,
"step": 810
},
{
"epoch": 1.2415902140672783,
"grad_norm": 0.2986673414707184,
"learning_rate": 7.127876537343277e-06,
"loss": 1.63118314743042,
"step": 812
},
{
"epoch": 1.2446483180428134,
"grad_norm": 0.3479074537754059,
"learning_rate": 7.1137183769353225e-06,
"loss": 1.5168559551239014,
"step": 814
},
{
"epoch": 1.2477064220183487,
"grad_norm": 0.4152420461177826,
"learning_rate": 7.099541861232736e-06,
"loss": 1.6398264169692993,
"step": 816
},
{
"epoch": 1.2507645259938838,
"grad_norm": 0.384573370218277,
"learning_rate": 7.085347151486779e-06,
"loss": 1.4128949642181396,
"step": 818
},
{
"epoch": 1.2538226299694188,
"grad_norm": 0.3804616630077362,
"learning_rate": 7.071134409155659e-06,
"loss": 1.557448148727417,
"step": 820
},
{
"epoch": 1.2568807339449541,
"grad_norm": 0.6236130595207214,
"learning_rate": 7.056903795902701e-06,
"loss": 1.3184959888458252,
"step": 822
},
{
"epoch": 1.2599388379204892,
"grad_norm": 0.7443933486938477,
"learning_rate": 7.042655473594495e-06,
"loss": 1.537932276725769,
"step": 824
},
{
"epoch": 1.2629969418960245,
"grad_norm": 0.5472233891487122,
"learning_rate": 7.028389604299074e-06,
"loss": 1.1561626195907593,
"step": 826
},
{
"epoch": 1.2660550458715596,
"grad_norm": 0.847542941570282,
"learning_rate": 7.01410635028405e-06,
"loss": 1.1249284744262695,
"step": 828
},
{
"epoch": 1.2691131498470947,
"grad_norm": 0.3495579957962036,
"learning_rate": 6.9998058740147835e-06,
"loss": 1.3474421501159668,
"step": 830
},
{
"epoch": 1.27217125382263,
"grad_norm": 0.4069005846977234,
"learning_rate": 6.985488338152529e-06,
"loss": 1.3892837762832642,
"step": 832
},
{
"epoch": 1.2752293577981653,
"grad_norm": 0.6165335178375244,
"learning_rate": 6.971153905552587e-06,
"loss": 1.524814248085022,
"step": 834
},
{
"epoch": 1.2782874617737003,
"grad_norm": 0.6481596827507019,
"learning_rate": 6.956802739262446e-06,
"loss": 1.464059829711914,
"step": 836
},
{
"epoch": 1.2813455657492354,
"grad_norm": 0.3051135241985321,
"learning_rate": 6.942435002519938e-06,
"loss": 1.212691307067871,
"step": 838
},
{
"epoch": 1.2844036697247707,
"grad_norm": 0.31896138191223145,
"learning_rate": 6.9280508587513725e-06,
"loss": 1.179284691810608,
"step": 840
},
{
"epoch": 1.2874617737003058,
"grad_norm": 0.2261551022529602,
"learning_rate": 6.913650471569684e-06,
"loss": 1.38997220993042,
"step": 842
},
{
"epoch": 1.290519877675841,
"grad_norm": 0.3368714451789856,
"learning_rate": 6.899234004772566e-06,
"loss": 1.3169426918029785,
"step": 844
},
{
"epoch": 1.2935779816513762,
"grad_norm": 0.49499788880348206,
"learning_rate": 6.884801622340612e-06,
"loss": 1.293768048286438,
"step": 846
},
{
"epoch": 1.2966360856269112,
"grad_norm": 0.2904210686683655,
"learning_rate": 6.870353488435447e-06,
"loss": 1.5008976459503174,
"step": 848
},
{
"epoch": 1.2996941896024465,
"grad_norm": 0.4230108857154846,
"learning_rate": 6.855889767397863e-06,
"loss": 1.4707106351852417,
"step": 850
},
{
"epoch": 1.3027522935779816,
"grad_norm": 0.2836777865886688,
"learning_rate": 6.841410623745944e-06,
"loss": 1.182532548904419,
"step": 852
},
{
"epoch": 1.305810397553517,
"grad_norm": 0.3048684895038605,
"learning_rate": 6.826916222173205e-06,
"loss": 1.373314380645752,
"step": 854
},
{
"epoch": 1.308868501529052,
"grad_norm": 0.38874655961990356,
"learning_rate": 6.812406727546713e-06,
"loss": 1.5207183361053467,
"step": 856
},
{
"epoch": 1.311926605504587,
"grad_norm": 0.541847288608551,
"learning_rate": 6.7978823049052046e-06,
"loss": 1.6546745300292969,
"step": 858
},
{
"epoch": 1.3149847094801224,
"grad_norm": 0.3354927897453308,
"learning_rate": 6.783343119457221e-06,
"loss": 1.6852827072143555,
"step": 860
},
{
"epoch": 1.3180428134556574,
"grad_norm": 0.22799281775951385,
"learning_rate": 6.768789336579224e-06,
"loss": 1.7998615503311157,
"step": 862
},
{
"epoch": 1.3211009174311927,
"grad_norm": 0.2829393446445465,
"learning_rate": 6.754221121813707e-06,
"loss": 1.3555914163589478,
"step": 864
},
{
"epoch": 1.3241590214067278,
"grad_norm": 0.2552604377269745,
"learning_rate": 6.739638640867332e-06,
"loss": 1.44038724899292,
"step": 866
},
{
"epoch": 1.3272171253822629,
"grad_norm": 0.2328341007232666,
"learning_rate": 6.72504205960902e-06,
"loss": 1.2792387008666992,
"step": 868
},
{
"epoch": 1.3302752293577982,
"grad_norm": 0.19776956737041473,
"learning_rate": 6.710431544068085e-06,
"loss": 1.2014856338500977,
"step": 870
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.2862965762615204,
"learning_rate": 6.695807260432332e-06,
"loss": 1.612195372581482,
"step": 872
},
{
"epoch": 1.3363914373088686,
"grad_norm": 0.2737024426460266,
"learning_rate": 6.681169375046173e-06,
"loss": 1.4856352806091309,
"step": 874
},
{
"epoch": 1.3394495412844036,
"grad_norm": 0.33617132902145386,
"learning_rate": 6.666518054408734e-06,
"loss": 1.6690922975540161,
"step": 876
},
{
"epoch": 1.3425076452599387,
"grad_norm": 0.33230748772621155,
"learning_rate": 6.65185346517196e-06,
"loss": 1.134220838546753,
"step": 878
},
{
"epoch": 1.345565749235474,
"grad_norm": 0.34520813822746277,
"learning_rate": 6.637175774138722e-06,
"loss": 1.3939542770385742,
"step": 880
},
{
"epoch": 1.3486238532110093,
"grad_norm": 0.3193676471710205,
"learning_rate": 6.622485148260916e-06,
"loss": 1.6689043045043945,
"step": 882
},
{
"epoch": 1.3516819571865444,
"grad_norm": 0.2586718499660492,
"learning_rate": 6.607781754637567e-06,
"loss": 1.3927881717681885,
"step": 884
},
{
"epoch": 1.3547400611620795,
"grad_norm": 0.36470475792884827,
"learning_rate": 6.593065760512924e-06,
"loss": 1.5524687767028809,
"step": 886
},
{
"epoch": 1.3577981651376148,
"grad_norm": 0.5333327054977417,
"learning_rate": 6.578337333274566e-06,
"loss": 1.4335553646087646,
"step": 888
},
{
"epoch": 1.3608562691131498,
"grad_norm": 0.24828922748565674,
"learning_rate": 6.563596640451489e-06,
"loss": 1.3478354215621948,
"step": 890
},
{
"epoch": 1.3639143730886851,
"grad_norm": 0.2684786021709442,
"learning_rate": 6.548843849712206e-06,
"loss": 1.4221248626708984,
"step": 892
},
{
"epoch": 1.3669724770642202,
"grad_norm": 0.2922813594341278,
"learning_rate": 6.534079128862835e-06,
"loss": 1.4792616367340088,
"step": 894
},
{
"epoch": 1.3700305810397553,
"grad_norm": 0.21960243582725525,
"learning_rate": 6.5193026458452006e-06,
"loss": 1.3363940715789795,
"step": 896
},
{
"epoch": 1.3730886850152906,
"grad_norm": 0.41456371545791626,
"learning_rate": 6.50451456873491e-06,
"loss": 1.4480544328689575,
"step": 898
},
{
"epoch": 1.3761467889908257,
"grad_norm": 0.6222192049026489,
"learning_rate": 6.489715065739448e-06,
"loss": 1.7465565204620361,
"step": 900
},
{
"epoch": 1.379204892966361,
"grad_norm": 0.5998108983039856,
"learning_rate": 6.474904305196268e-06,
"loss": 2.144679546356201,
"step": 902
},
{
"epoch": 1.382262996941896,
"grad_norm": 0.5612609386444092,
"learning_rate": 6.4600824555708695e-06,
"loss": 1.378048300743103,
"step": 904
},
{
"epoch": 1.385321100917431,
"grad_norm": 0.32021385431289673,
"learning_rate": 6.445249685454885e-06,
"loss": 1.361167073249817,
"step": 906
},
{
"epoch": 1.3883792048929664,
"grad_norm": 0.36393630504608154,
"learning_rate": 6.4304061635641645e-06,
"loss": 1.433903694152832,
"step": 908
},
{
"epoch": 1.3914373088685015,
"grad_norm": 0.7985405325889587,
"learning_rate": 6.415552058736854e-06,
"loss": 1.5466125011444092,
"step": 910
},
{
"epoch": 1.3944954128440368,
"grad_norm": 0.30912530422210693,
"learning_rate": 6.4006875399314705e-06,
"loss": 1.463235855102539,
"step": 912
},
{
"epoch": 1.3975535168195719,
"grad_norm": 0.2953026294708252,
"learning_rate": 6.3858127762249945e-06,
"loss": 1.3276557922363281,
"step": 914
},
{
"epoch": 1.400611620795107,
"grad_norm": 0.19828742742538452,
"learning_rate": 6.3709279368109264e-06,
"loss": 1.2300511598587036,
"step": 916
},
{
"epoch": 1.4036697247706422,
"grad_norm": 0.21878407895565033,
"learning_rate": 6.356033190997386e-06,
"loss": 1.1606783866882324,
"step": 918
},
{
"epoch": 1.4067278287461773,
"grad_norm": 0.19046013057231903,
"learning_rate": 6.341128708205162e-06,
"loss": 1.3056751489639282,
"step": 920
},
{
"epoch": 1.4097859327217126,
"grad_norm": 0.40108954906463623,
"learning_rate": 6.326214657965804e-06,
"loss": 1.5421757698059082,
"step": 922
},
{
"epoch": 1.4128440366972477,
"grad_norm": 0.46537211537361145,
"learning_rate": 6.311291209919682e-06,
"loss": 1.5684192180633545,
"step": 924
},
{
"epoch": 1.4159021406727827,
"grad_norm": 0.5733487606048584,
"learning_rate": 6.296358533814065e-06,
"loss": 1.5650339126586914,
"step": 926
},
{
"epoch": 1.418960244648318,
"grad_norm": 0.4306733310222626,
"learning_rate": 6.281416799501188e-06,
"loss": 1.5992372035980225,
"step": 928
},
{
"epoch": 1.4220183486238533,
"grad_norm": 0.407654732465744,
"learning_rate": 6.266466176936313e-06,
"loss": 1.4283607006072998,
"step": 930
},
{
"epoch": 1.4250764525993884,
"grad_norm": 4.419346332550049,
"learning_rate": 6.251506836175807e-06,
"loss": 1.5659562349319458,
"step": 932
},
{
"epoch": 1.4281345565749235,
"grad_norm": 0.7012003064155579,
"learning_rate": 6.236538947375203e-06,
"loss": 1.4677741527557373,
"step": 934
},
{
"epoch": 1.4311926605504588,
"grad_norm": 0.22764644026756287,
"learning_rate": 6.221562680787258e-06,
"loss": 1.374863624572754,
"step": 936
},
{
"epoch": 1.4342507645259939,
"grad_norm": 0.4946407973766327,
"learning_rate": 6.20657820676003e-06,
"loss": 1.3795430660247803,
"step": 938
},
{
"epoch": 1.4373088685015292,
"grad_norm": 1.4666649103164673,
"learning_rate": 6.191585695734925e-06,
"loss": 1.584106683731079,
"step": 940
},
{
"epoch": 1.4403669724770642,
"grad_norm": 0.9116813540458679,
"learning_rate": 6.176585318244775e-06,
"loss": 1.3207650184631348,
"step": 942
},
{
"epoch": 1.4434250764525993,
"grad_norm": 0.4549460709095001,
"learning_rate": 6.161577244911883e-06,
"loss": 1.5188086032867432,
"step": 944
},
{
"epoch": 1.4464831804281346,
"grad_norm": 0.6293279528617859,
"learning_rate": 6.146561646446088e-06,
"loss": 1.40483558177948,
"step": 946
},
{
"epoch": 1.4495412844036697,
"grad_norm": 0.5348030924797058,
"learning_rate": 6.131538693642828e-06,
"loss": 1.4180057048797607,
"step": 948
},
{
"epoch": 1.452599388379205,
"grad_norm": 0.7010774612426758,
"learning_rate": 6.116508557381191e-06,
"loss": 1.5555238723754883,
"step": 950
},
{
"epoch": 1.45565749235474,
"grad_norm": 0.3996182382106781,
"learning_rate": 6.1014714086219725e-06,
"loss": 1.5635944604873657,
"step": 952
},
{
"epoch": 1.4587155963302751,
"grad_norm": 0.3819827139377594,
"learning_rate": 6.086427418405735e-06,
"loss": 1.3868696689605713,
"step": 954
},
{
"epoch": 1.4617737003058104,
"grad_norm": 0.24838334321975708,
"learning_rate": 6.071376757850858e-06,
"loss": 1.3217381238937378,
"step": 956
},
{
"epoch": 1.4648318042813455,
"grad_norm": 0.5527139902114868,
"learning_rate": 6.0563195981515885e-06,
"loss": 1.456415057182312,
"step": 958
},
{
"epoch": 1.4678899082568808,
"grad_norm": 0.2822090983390808,
"learning_rate": 6.0412561105761055e-06,
"loss": 1.3990404605865479,
"step": 960
},
{
"epoch": 1.470948012232416,
"grad_norm": 0.370832234621048,
"learning_rate": 6.026186466464562e-06,
"loss": 1.5524400472640991,
"step": 962
},
{
"epoch": 1.474006116207951,
"grad_norm": 0.30970191955566406,
"learning_rate": 6.011110837227138e-06,
"loss": 1.4143943786621094,
"step": 964
},
{
"epoch": 1.4770642201834863,
"grad_norm": 0.3659932613372803,
"learning_rate": 5.996029394342089e-06,
"loss": 1.3726913928985596,
"step": 966
},
{
"epoch": 1.4801223241590213,
"grad_norm": 0.40378639101982117,
"learning_rate": 5.980942309353803e-06,
"loss": 1.3403112888336182,
"step": 968
},
{
"epoch": 1.4831804281345566,
"grad_norm": 0.2668818235397339,
"learning_rate": 5.965849753870841e-06,
"loss": 1.4581551551818848,
"step": 970
},
{
"epoch": 1.4862385321100917,
"grad_norm": 0.39147576689720154,
"learning_rate": 5.950751899563989e-06,
"loss": 1.4426075220108032,
"step": 972
},
{
"epoch": 1.4892966360856268,
"grad_norm": 0.4053312838077545,
"learning_rate": 5.935648918164308e-06,
"loss": 1.429807424545288,
"step": 974
},
{
"epoch": 1.492354740061162,
"grad_norm": 0.2912329435348511,
"learning_rate": 5.9205409814611694e-06,
"loss": 1.6015820503234863,
"step": 976
},
{
"epoch": 1.4954128440366974,
"grad_norm": 0.39581140875816345,
"learning_rate": 5.9054282613003165e-06,
"loss": 1.7901129722595215,
"step": 978
},
{
"epoch": 1.4984709480122325,
"grad_norm": 5.4772210121154785,
"learning_rate": 5.890310929581899e-06,
"loss": 1.665008544921875,
"step": 980
},
{
"epoch": 1.5015290519877675,
"grad_norm": 0.32753488421440125,
"learning_rate": 5.875189158258521e-06,
"loss": 1.658569574356079,
"step": 982
},
{
"epoch": 1.5045871559633026,
"grad_norm": 0.3322629928588867,
"learning_rate": 5.860063119333287e-06,
"loss": 1.568853735923767,
"step": 984
},
{
"epoch": 1.507645259938838,
"grad_norm": 0.3625146746635437,
"learning_rate": 5.844932984857841e-06,
"loss": 1.2555010318756104,
"step": 986
},
{
"epoch": 1.5107033639143732,
"grad_norm": 0.3967174291610718,
"learning_rate": 5.829798926930411e-06,
"loss": 1.2352030277252197,
"step": 988
},
{
"epoch": 1.5137614678899083,
"grad_norm": 0.92249995470047,
"learning_rate": 5.814661117693856e-06,
"loss": 1.6529834270477295,
"step": 990
},
{
"epoch": 1.5168195718654434,
"grad_norm": 0.43264713883399963,
"learning_rate": 5.799519729333702e-06,
"loss": 1.6510822772979736,
"step": 992
},
{
"epoch": 1.5198776758409784,
"grad_norm": 0.48226049542427063,
"learning_rate": 5.784374934076188e-06,
"loss": 1.7469120025634766,
"step": 994
},
{
"epoch": 1.5229357798165137,
"grad_norm": 0.6006577014923096,
"learning_rate": 5.769226904186301e-06,
"loss": 1.6751326322555542,
"step": 996
},
{
"epoch": 1.525993883792049,
"grad_norm": 0.417524129152298,
"learning_rate": 5.754075811965826e-06,
"loss": 1.7241541147232056,
"step": 998
},
{
"epoch": 1.529051987767584,
"grad_norm": 0.4846678674221039,
"learning_rate": 5.738921829751374e-06,
"loss": 1.5894498825073242,
"step": 1000
},
{
"epoch": 1.5321100917431192,
"grad_norm": 0.37620386481285095,
"learning_rate": 5.723765129912433e-06,
"loss": 1.5567536354064941,
"step": 1002
},
{
"epoch": 1.5351681957186545,
"grad_norm": 0.9559251070022583,
"learning_rate": 5.708605884849402e-06,
"loss": 1.444126844406128,
"step": 1004
},
{
"epoch": 1.5382262996941896,
"grad_norm": 0.4608314335346222,
"learning_rate": 5.6934442669916315e-06,
"loss": 1.7045128345489502,
"step": 1006
},
{
"epoch": 1.5412844036697249,
"grad_norm": 0.5580506920814514,
"learning_rate": 5.678280448795457e-06,
"loss": 1.576319932937622,
"step": 1008
},
{
"epoch": 1.54434250764526,
"grad_norm": 0.414983332157135,
"learning_rate": 5.663114602742247e-06,
"loss": 1.1866123676300049,
"step": 1010
},
{
"epoch": 1.547400611620795,
"grad_norm": 0.5494526624679565,
"learning_rate": 5.647946901336433e-06,
"loss": 1.7420477867126465,
"step": 1012
},
{
"epoch": 1.5504587155963303,
"grad_norm": 0.6842697262763977,
"learning_rate": 5.632777517103552e-06,
"loss": 1.7904109954833984,
"step": 1014
},
{
"epoch": 1.5535168195718656,
"grad_norm": 0.43980666995048523,
"learning_rate": 5.617606622588282e-06,
"loss": 1.862006425857544,
"step": 1016
},
{
"epoch": 1.5565749235474007,
"grad_norm": 0.3990402817726135,
"learning_rate": 5.602434390352476e-06,
"loss": 1.7830100059509277,
"step": 1018
},
{
"epoch": 1.5596330275229358,
"grad_norm": 0.4031524360179901,
"learning_rate": 5.58726099297321e-06,
"loss": 1.7594141960144043,
"step": 1020
},
{
"epoch": 1.5626911314984708,
"grad_norm": 0.6580591797828674,
"learning_rate": 5.572086603040809e-06,
"loss": 1.6219829320907593,
"step": 1022
},
{
"epoch": 1.5657492354740061,
"grad_norm": 0.36656439304351807,
"learning_rate": 5.556911393156885e-06,
"loss": 1.4893901348114014,
"step": 1024
},
{
"epoch": 1.5688073394495414,
"grad_norm": 0.6261524558067322,
"learning_rate": 5.541735535932383e-06,
"loss": 1.058058261871338,
"step": 1026
},
{
"epoch": 1.5718654434250765,
"grad_norm": 0.3441345691680908,
"learning_rate": 5.526559203985605e-06,
"loss": 1.0509142875671387,
"step": 1028
},
{
"epoch": 1.5749235474006116,
"grad_norm": 0.2408900260925293,
"learning_rate": 5.511382569940258e-06,
"loss": 1.2871123552322388,
"step": 1030
},
{
"epoch": 1.5779816513761467,
"grad_norm": 0.45723816752433777,
"learning_rate": 5.496205806423481e-06,
"loss": 1.2235673666000366,
"step": 1032
},
{
"epoch": 1.581039755351682,
"grad_norm": 0.3109905421733856,
"learning_rate": 5.481029086063887e-06,
"loss": 1.177577018737793,
"step": 1034
},
{
"epoch": 1.5840978593272173,
"grad_norm": 0.20282985270023346,
"learning_rate": 5.4658525814896014e-06,
"loss": 1.2040612697601318,
"step": 1036
},
{
"epoch": 1.5871559633027523,
"grad_norm": 0.43076759576797485,
"learning_rate": 5.45067646532629e-06,
"loss": 1.4584531784057617,
"step": 1038
},
{
"epoch": 1.5902140672782874,
"grad_norm": 0.472885400056839,
"learning_rate": 5.435500910195203e-06,
"loss": 1.387641429901123,
"step": 1040
},
{
"epoch": 1.5932721712538225,
"grad_norm": 3.1532437801361084,
"learning_rate": 5.420326088711209e-06,
"loss": 1.221092700958252,
"step": 1042
},
{
"epoch": 1.5963302752293578,
"grad_norm": 0.6743189692497253,
"learning_rate": 5.405152173480833e-06,
"loss": 1.4836219549179077,
"step": 1044
},
{
"epoch": 1.599388379204893,
"grad_norm": 0.20277228951454163,
"learning_rate": 5.389979337100289e-06,
"loss": 1.5031371116638184,
"step": 1046
},
{
"epoch": 1.6024464831804281,
"grad_norm": 0.5120447874069214,
"learning_rate": 5.374807752153522e-06,
"loss": 1.282975673675537,
"step": 1048
},
{
"epoch": 1.6055045871559632,
"grad_norm": 0.35753709077835083,
"learning_rate": 5.359637591210242e-06,
"loss": 1.4665361642837524,
"step": 1050
},
{
"epoch": 1.6085626911314985,
"grad_norm": 0.7353309988975525,
"learning_rate": 5.344469026823959e-06,
"loss": 1.6730611324310303,
"step": 1052
},
{
"epoch": 1.6116207951070336,
"grad_norm": 0.4338257610797882,
"learning_rate": 5.329302231530029e-06,
"loss": 1.186348795890808,
"step": 1054
},
{
"epoch": 1.614678899082569,
"grad_norm": 0.42416566610336304,
"learning_rate": 5.31413737784368e-06,
"loss": 1.4430310726165771,
"step": 1056
},
{
"epoch": 1.617737003058104,
"grad_norm": 0.2432592213153839,
"learning_rate": 5.298974638258055e-06,
"loss": 1.518967866897583,
"step": 1058
},
{
"epoch": 1.620795107033639,
"grad_norm": 0.408245712518692,
"learning_rate": 5.283814185242252e-06,
"loss": 1.426690697669983,
"step": 1060
},
{
"epoch": 1.6238532110091743,
"grad_norm": 0.2117079198360443,
"learning_rate": 5.2686561912393606e-06,
"loss": 1.2693121433258057,
"step": 1062
},
{
"epoch": 1.6269113149847096,
"grad_norm": 4.30716609954834,
"learning_rate": 5.253500828664501e-06,
"loss": 0.9013931155204773,
"step": 1064
},
{
"epoch": 1.6299694189602447,
"grad_norm": 0.38770049810409546,
"learning_rate": 5.23834826990286e-06,
"loss": 1.5694489479064941,
"step": 1066
},
{
"epoch": 1.6330275229357798,
"grad_norm": 0.6700468063354492,
"learning_rate": 5.223198687307733e-06,
"loss": 1.503030776977539,
"step": 1068
},
{
"epoch": 1.6360856269113149,
"grad_norm": 0.2767106294631958,
"learning_rate": 5.208052253198564e-06,
"loss": 1.3917062282562256,
"step": 1070
},
{
"epoch": 1.6391437308868502,
"grad_norm": 0.3463125228881836,
"learning_rate": 5.192909139858981e-06,
"loss": 1.5068938732147217,
"step": 1072
},
{
"epoch": 1.6422018348623855,
"grad_norm": 0.3212260603904724,
"learning_rate": 5.177769519534846e-06,
"loss": 1.4421181678771973,
"step": 1074
},
{
"epoch": 1.6452599388379205,
"grad_norm": 0.4484805762767792,
"learning_rate": 5.162633564432285e-06,
"loss": 1.408212661743164,
"step": 1076
},
{
"epoch": 1.6483180428134556,
"grad_norm": 0.4805358350276947,
"learning_rate": 5.1475014467157325e-06,
"loss": 1.6133791208267212,
"step": 1078
},
{
"epoch": 1.6513761467889907,
"grad_norm": 0.5775420665740967,
"learning_rate": 5.132373338505978e-06,
"loss": 1.2856450080871582,
"step": 1080
},
{
"epoch": 1.654434250764526,
"grad_norm": 0.32906994223594666,
"learning_rate": 5.117249411878204e-06,
"loss": 1.04205322265625,
"step": 1082
},
{
"epoch": 1.6574923547400613,
"grad_norm": 0.5074779987335205,
"learning_rate": 5.10212983886003e-06,
"loss": 1.6698901653289795,
"step": 1084
},
{
"epoch": 1.6605504587155964,
"grad_norm": 0.36449626088142395,
"learning_rate": 5.087014791429552e-06,
"loss": 1.449878215789795,
"step": 1086
},
{
"epoch": 1.6636085626911314,
"grad_norm": 1.0477646589279175,
"learning_rate": 5.071904441513393e-06,
"loss": 1.5865240097045898,
"step": 1088
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.3797400891780853,
"learning_rate": 5.056798960984741e-06,
"loss": 1.4271771907806396,
"step": 1090
},
{
"epoch": 1.6697247706422018,
"grad_norm": 0.3018883466720581,
"learning_rate": 5.041698521661401e-06,
"loss": 1.6418373584747314,
"step": 1092
},
{
"epoch": 1.6727828746177371,
"grad_norm": 0.5908496379852295,
"learning_rate": 5.026603295303833e-06,
"loss": 1.5063586235046387,
"step": 1094
},
{
"epoch": 1.6758409785932722,
"grad_norm": 0.5799764394760132,
"learning_rate": 5.011513453613205e-06,
"loss": 1.5312390327453613,
"step": 1096
},
{
"epoch": 1.6788990825688073,
"grad_norm": 0.4648537337779999,
"learning_rate": 4.996429168229432e-06,
"loss": 1.4155495166778564,
"step": 1098
},
{
"epoch": 1.6819571865443423,
"grad_norm": 0.3357274830341339,
"learning_rate": 4.981350610729234e-06,
"loss": 1.07462477684021,
"step": 1100
},
{
"epoch": 1.6850152905198776,
"grad_norm": 0.8209952712059021,
"learning_rate": 4.966277952624179e-06,
"loss": 1.532288670539856,
"step": 1102
},
{
"epoch": 1.688073394495413,
"grad_norm": 0.6916195750236511,
"learning_rate": 4.951211365358723e-06,
"loss": 1.5015881061553955,
"step": 1104
},
{
"epoch": 1.691131498470948,
"grad_norm": 0.6677690148353577,
"learning_rate": 4.936151020308282e-06,
"loss": 1.5166327953338623,
"step": 1106
},
{
"epoch": 1.694189602446483,
"grad_norm": 0.7889437675476074,
"learning_rate": 4.921097088777261e-06,
"loss": 1.5232961177825928,
"step": 1108
},
{
"epoch": 1.6972477064220184,
"grad_norm": 0.5421835780143738,
"learning_rate": 4.906049741997119e-06,
"loss": 1.3370258808135986,
"step": 1110
},
{
"epoch": 1.7003058103975535,
"grad_norm": 0.28672778606414795,
"learning_rate": 4.8910091511244115e-06,
"loss": 1.5552886724472046,
"step": 1112
},
{
"epoch": 1.7033639143730888,
"grad_norm": 0.8609727025032043,
"learning_rate": 4.875975487238853e-06,
"loss": 1.6477062702178955,
"step": 1114
},
{
"epoch": 1.7064220183486238,
"grad_norm": 0.46577727794647217,
"learning_rate": 4.860948921341366e-06,
"loss": 1.3554713726043701,
"step": 1116
},
{
"epoch": 1.709480122324159,
"grad_norm": 0.4357546865940094,
"learning_rate": 4.845929624352136e-06,
"loss": 1.616469383239746,
"step": 1118
},
{
"epoch": 1.7125382262996942,
"grad_norm": 0.8016573786735535,
"learning_rate": 4.830917767108666e-06,
"loss": 1.4049677848815918,
"step": 1120
},
{
"epoch": 1.7155963302752295,
"grad_norm": 0.34570103883743286,
"learning_rate": 4.8159135203638394e-06,
"loss": 1.5350430011749268,
"step": 1122
},
{
"epoch": 1.7186544342507646,
"grad_norm": 0.6164813041687012,
"learning_rate": 4.800917054783971e-06,
"loss": 1.4737257957458496,
"step": 1124
},
{
"epoch": 1.7217125382262997,
"grad_norm": 0.30021098256111145,
"learning_rate": 4.785928540946869e-06,
"loss": 1.59697425365448,
"step": 1126
},
{
"epoch": 1.7247706422018347,
"grad_norm": 0.3294142782688141,
"learning_rate": 4.770948149339897e-06,
"loss": 1.6918811798095703,
"step": 1128
},
{
"epoch": 1.72782874617737,
"grad_norm": 0.33221927285194397,
"learning_rate": 4.755976050358026e-06,
"loss": 1.581977128982544,
"step": 1130
},
{
"epoch": 1.7308868501529053,
"grad_norm": 0.27995747327804565,
"learning_rate": 4.741012414301907e-06,
"loss": 1.42479407787323,
"step": 1132
},
{
"epoch": 1.7339449541284404,
"grad_norm": 0.4526294767856598,
"learning_rate": 4.726057411375927e-06,
"loss": 1.5270183086395264,
"step": 1134
},
{
"epoch": 1.7370030581039755,
"grad_norm": 0.6458525657653809,
"learning_rate": 4.711111211686279e-06,
"loss": 1.5350821018218994,
"step": 1136
},
{
"epoch": 1.7400611620795106,
"grad_norm": 0.40516841411590576,
"learning_rate": 4.6961739852390175e-06,
"loss": 1.5310497283935547,
"step": 1138
},
{
"epoch": 1.7431192660550459,
"grad_norm": 1.3104746341705322,
"learning_rate": 4.681245901938134e-06,
"loss": 1.5385562181472778,
"step": 1140
},
{
"epoch": 1.7461773700305812,
"grad_norm": 0.40381914377212524,
"learning_rate": 4.666327131583621e-06,
"loss": 1.5392662286758423,
"step": 1142
},
{
"epoch": 1.7492354740061162,
"grad_norm": 0.8844152688980103,
"learning_rate": 4.65141784386954e-06,
"loss": 1.333682894706726,
"step": 1144
},
{
"epoch": 1.7522935779816513,
"grad_norm": 0.423922061920166,
"learning_rate": 4.636518208382091e-06,
"loss": 1.4100391864776611,
"step": 1146
},
{
"epoch": 1.7553516819571864,
"grad_norm": 0.3589678406715393,
"learning_rate": 4.621628394597687e-06,
"loss": 1.341862440109253,
"step": 1148
},
{
"epoch": 1.7584097859327217,
"grad_norm": 0.6498292088508606,
"learning_rate": 4.606748571881018e-06,
"loss": 1.4297010898590088,
"step": 1150
},
{
"epoch": 1.761467889908257,
"grad_norm": 0.5506405234336853,
"learning_rate": 4.59187890948314e-06,
"loss": 1.4309487342834473,
"step": 1152
},
{
"epoch": 1.764525993883792,
"grad_norm": 0.45955854654312134,
"learning_rate": 4.577019576539527e-06,
"loss": 1.2851155996322632,
"step": 1154
},
{
"epoch": 1.7675840978593271,
"grad_norm": 0.28625011444091797,
"learning_rate": 4.562170742068175e-06,
"loss": 0.9397743940353394,
"step": 1156
},
{
"epoch": 1.7706422018348624,
"grad_norm": 0.22773736715316772,
"learning_rate": 4.547332574967653e-06,
"loss": 1.237460732460022,
"step": 1158
},
{
"epoch": 1.7737003058103975,
"grad_norm": 0.25427719950675964,
"learning_rate": 4.5325052440151985e-06,
"loss": 1.3028910160064697,
"step": 1160
},
{
"epoch": 1.7767584097859328,
"grad_norm": 0.2875189781188965,
"learning_rate": 4.517688917864794e-06,
"loss": 1.3547457456588745,
"step": 1162
},
{
"epoch": 1.7798165137614679,
"grad_norm": 0.21899199485778809,
"learning_rate": 4.502883765045244e-06,
"loss": 1.36411714553833,
"step": 1164
},
{
"epoch": 1.782874617737003,
"grad_norm": 0.21183030307292938,
"learning_rate": 4.488089953958264e-06,
"loss": 1.4323028326034546,
"step": 1166
},
{
"epoch": 1.7859327217125383,
"grad_norm": 0.22526955604553223,
"learning_rate": 4.473307652876563e-06,
"loss": 1.3429040908813477,
"step": 1168
},
{
"epoch": 1.7889908256880735,
"grad_norm": 0.266107439994812,
"learning_rate": 4.458537029941926e-06,
"loss": 1.3663442134857178,
"step": 1170
},
{
"epoch": 1.7920489296636086,
"grad_norm": 0.490496963262558,
"learning_rate": 4.4437782531633074e-06,
"loss": 1.3354597091674805,
"step": 1172
},
{
"epoch": 1.7951070336391437,
"grad_norm": 0.1854841560125351,
"learning_rate": 4.429031490414919e-06,
"loss": 1.3446393013000488,
"step": 1174
},
{
"epoch": 1.7981651376146788,
"grad_norm": 0.1960364729166031,
"learning_rate": 4.414296909434311e-06,
"loss": 1.3029416799545288,
"step": 1176
},
{
"epoch": 1.801223241590214,
"grad_norm": 0.35048866271972656,
"learning_rate": 4.399574677820481e-06,
"loss": 1.348449945449829,
"step": 1178
},
{
"epoch": 1.8042813455657494,
"grad_norm": 0.3793323040008545,
"learning_rate": 4.384864963031952e-06,
"loss": 1.297593593597412,
"step": 1180
},
{
"epoch": 1.8073394495412844,
"grad_norm": 0.14626124501228333,
"learning_rate": 4.370167932384873e-06,
"loss": 1.1695170402526855,
"step": 1182
},
{
"epoch": 1.8103975535168195,
"grad_norm": 0.16865181922912598,
"learning_rate": 4.355483753051125e-06,
"loss": 1.2123092412948608,
"step": 1184
},
{
"epoch": 1.8134556574923546,
"grad_norm": 0.1931789070367813,
"learning_rate": 4.340812592056401e-06,
"loss": 0.9932126998901367,
"step": 1186
},
{
"epoch": 1.81651376146789,
"grad_norm": 0.2547837793827057,
"learning_rate": 4.326154616278326e-06,
"loss": 1.2431546449661255,
"step": 1188
},
{
"epoch": 1.8195718654434252,
"grad_norm": 0.23825769126415253,
"learning_rate": 4.311509992444539e-06,
"loss": 1.286515712738037,
"step": 1190
},
{
"epoch": 1.8226299694189603,
"grad_norm": 0.25244706869125366,
"learning_rate": 4.296878887130819e-06,
"loss": 1.3000450134277344,
"step": 1192
},
{
"epoch": 1.8256880733944953,
"grad_norm": 0.23451480269432068,
"learning_rate": 4.282261466759165e-06,
"loss": 1.2664532661437988,
"step": 1194
},
{
"epoch": 1.8287461773700304,
"grad_norm": 0.2735919952392578,
"learning_rate": 4.267657897595929e-06,
"loss": 1.288360834121704,
"step": 1196
},
{
"epoch": 1.8318042813455657,
"grad_norm": 0.18107269704341888,
"learning_rate": 4.253068345749903e-06,
"loss": 1.2625651359558105,
"step": 1198
},
{
"epoch": 1.834862385321101,
"grad_norm": 0.2293253242969513,
"learning_rate": 4.238492977170439e-06,
"loss": 1.234043836593628,
"step": 1200
},
{
"epoch": 1.837920489296636,
"grad_norm": 0.27160146832466125,
"learning_rate": 4.223931957645566e-06,
"loss": 1.300539493560791,
"step": 1202
},
{
"epoch": 1.8409785932721712,
"grad_norm": 0.25112462043762207,
"learning_rate": 4.2093854528000955e-06,
"loss": 1.2719401121139526,
"step": 1204
},
{
"epoch": 1.8440366972477065,
"grad_norm": 0.33997592329978943,
"learning_rate": 4.194853628093742e-06,
"loss": 1.2453508377075195,
"step": 1206
},
{
"epoch": 1.8470948012232415,
"grad_norm": 0.6576793789863586,
"learning_rate": 4.180336648819242e-06,
"loss": 1.233917236328125,
"step": 1208
},
{
"epoch": 1.8501529051987768,
"grad_norm": 0.26551222801208496,
"learning_rate": 4.165834680100469e-06,
"loss": 1.2595276832580566,
"step": 1210
},
{
"epoch": 1.853211009174312,
"grad_norm": 0.2170596420764923,
"learning_rate": 4.151347886890562e-06,
"loss": 1.2505378723144531,
"step": 1212
},
{
"epoch": 1.856269113149847,
"grad_norm": 0.2974804937839508,
"learning_rate": 4.1368764339700404e-06,
"loss": 1.2092756032943726,
"step": 1214
},
{
"epoch": 1.8593272171253823,
"grad_norm": 0.2567199468612671,
"learning_rate": 4.1224204859449425e-06,
"loss": 1.2698951959609985,
"step": 1216
},
{
"epoch": 1.8623853211009176,
"grad_norm": 0.23152267932891846,
"learning_rate": 4.107980207244937e-06,
"loss": 1.3027379512786865,
"step": 1218
},
{
"epoch": 1.8654434250764527,
"grad_norm": 0.26830926537513733,
"learning_rate": 4.093555762121469e-06,
"loss": 1.308929443359375,
"step": 1220
},
{
"epoch": 1.8685015290519877,
"grad_norm": 0.2566030025482178,
"learning_rate": 4.07914731464588e-06,
"loss": 1.2964577674865723,
"step": 1222
},
{
"epoch": 1.8715596330275228,
"grad_norm": 0.4025701582431793,
"learning_rate": 4.064755028707546e-06,
"loss": 1.31220543384552,
"step": 1224
},
{
"epoch": 1.8746177370030581,
"grad_norm": 0.25386303663253784,
"learning_rate": 4.0503790680120136e-06,
"loss": 1.299830436706543,
"step": 1226
},
{
"epoch": 1.8776758409785934,
"grad_norm": 0.39947405457496643,
"learning_rate": 4.036019596079136e-06,
"loss": 1.3202039003372192,
"step": 1228
},
{
"epoch": 1.8807339449541285,
"grad_norm": 0.23179592192173004,
"learning_rate": 4.021676776241218e-06,
"loss": 1.2405881881713867,
"step": 1230
},
{
"epoch": 1.8837920489296636,
"grad_norm": 0.48796483874320984,
"learning_rate": 4.007350771641151e-06,
"loss": 1.288329005241394,
"step": 1232
},
{
"epoch": 1.8868501529051986,
"grad_norm": 0.26645490527153015,
"learning_rate": 3.993041745230562e-06,
"loss": 1.2443333864212036,
"step": 1234
},
{
"epoch": 1.889908256880734,
"grad_norm": 0.19715459644794464,
"learning_rate": 3.978749859767961e-06,
"loss": 1.2754254341125488,
"step": 1236
},
{
"epoch": 1.8929663608562692,
"grad_norm": 0.2424282431602478,
"learning_rate": 3.9644752778168836e-06,
"loss": 1.2853577136993408,
"step": 1238
},
{
"epoch": 1.8960244648318043,
"grad_norm": 0.22451399266719818,
"learning_rate": 3.950218161744049e-06,
"loss": 1.308832049369812,
"step": 1240
},
{
"epoch": 1.8990825688073394,
"grad_norm": 0.38970160484313965,
"learning_rate": 3.935978673717512e-06,
"loss": 1.2945680618286133,
"step": 1242
},
{
"epoch": 1.9021406727828745,
"grad_norm": 0.22287186980247498,
"learning_rate": 3.921756975704809e-06,
"loss": 1.2276027202606201,
"step": 1244
},
{
"epoch": 1.9051987767584098,
"grad_norm": 0.2538350820541382,
"learning_rate": 3.9075532294711326e-06,
"loss": 1.2546557188034058,
"step": 1246
},
{
"epoch": 1.908256880733945,
"grad_norm": 0.19810384511947632,
"learning_rate": 3.893367596577475e-06,
"loss": 1.2940235137939453,
"step": 1248
},
{
"epoch": 1.9113149847094801,
"grad_norm": 0.20586298406124115,
"learning_rate": 3.8792002383788044e-06,
"loss": 1.3136601448059082,
"step": 1250
},
{
"epoch": 1.9143730886850152,
"grad_norm": 0.2770041227340698,
"learning_rate": 3.865051316022215e-06,
"loss": 1.2952957153320312,
"step": 1252
},
{
"epoch": 1.9174311926605505,
"grad_norm": 0.22728121280670166,
"learning_rate": 3.85092099044511e-06,
"loss": 1.271630048751831,
"step": 1254
},
{
"epoch": 1.9204892966360856,
"grad_norm": 0.1984010934829712,
"learning_rate": 3.836809422373354e-06,
"loss": 1.2360022068023682,
"step": 1256
},
{
"epoch": 1.9235474006116209,
"grad_norm": 0.24555295705795288,
"learning_rate": 3.822716772319463e-06,
"loss": 1.271683692932129,
"step": 1258
},
{
"epoch": 1.926605504587156,
"grad_norm": 0.20771312713623047,
"learning_rate": 3.8086432005807616e-06,
"loss": 1.2962419986724854,
"step": 1260
},
{
"epoch": 1.929663608562691,
"grad_norm": 0.268265962600708,
"learning_rate": 3.794588867237574e-06,
"loss": 1.2458467483520508,
"step": 1262
},
{
"epoch": 1.9327217125382263,
"grad_norm": 0.3802253007888794,
"learning_rate": 3.780553932151392e-06,
"loss": 1.2733559608459473,
"step": 1264
},
{
"epoch": 1.9357798165137616,
"grad_norm": 0.6309070587158203,
"learning_rate": 3.766538554963062e-06,
"loss": 1.270596981048584,
"step": 1266
},
{
"epoch": 1.9388379204892967,
"grad_norm": 0.3053569793701172,
"learning_rate": 3.752542895090969e-06,
"loss": 1.3194211721420288,
"step": 1268
},
{
"epoch": 1.9418960244648318,
"grad_norm": 0.21923166513442993,
"learning_rate": 3.7385671117292245e-06,
"loss": 1.3323618173599243,
"step": 1270
},
{
"epoch": 1.9449541284403669,
"grad_norm": 0.2166883647441864,
"learning_rate": 3.72461136384585e-06,
"loss": 1.2965784072875977,
"step": 1272
},
{
"epoch": 1.9480122324159022,
"grad_norm": 0.2825508117675781,
"learning_rate": 3.710675810180977e-06,
"loss": 1.3159446716308594,
"step": 1274
},
{
"epoch": 1.9510703363914375,
"grad_norm": 0.299638956785202,
"learning_rate": 3.696760609245035e-06,
"loss": 1.2833199501037598,
"step": 1276
},
{
"epoch": 1.9541284403669725,
"grad_norm": 0.2223178744316101,
"learning_rate": 3.68286591931695e-06,
"loss": 1.22653329372406,
"step": 1278
},
{
"epoch": 1.9571865443425076,
"grad_norm": 0.2592408359050751,
"learning_rate": 3.668991898442347e-06,
"loss": 1.2542335987091064,
"step": 1280
},
{
"epoch": 1.9602446483180427,
"grad_norm": 0.2755810618400574,
"learning_rate": 3.6551387044317464e-06,
"loss": 1.2745262384414673,
"step": 1282
},
{
"epoch": 1.963302752293578,
"grad_norm": 0.21057268977165222,
"learning_rate": 3.6413064948587773e-06,
"loss": 1.2521765232086182,
"step": 1284
},
{
"epoch": 1.9663608562691133,
"grad_norm": 0.34427741169929504,
"learning_rate": 3.6274954270583797e-06,
"loss": 1.263521432876587,
"step": 1286
},
{
"epoch": 1.9694189602446484,
"grad_norm": 0.2196524441242218,
"learning_rate": 3.6137056581250142e-06,
"loss": 1.3154864311218262,
"step": 1288
},
{
"epoch": 1.9724770642201834,
"grad_norm": 0.3191309869289398,
"learning_rate": 3.599937344910872e-06,
"loss": 1.2999801635742188,
"step": 1290
},
{
"epoch": 1.9755351681957185,
"grad_norm": 0.22587168216705322,
"learning_rate": 3.5861906440241057e-06,
"loss": 1.3176116943359375,
"step": 1292
},
{
"epoch": 1.9785932721712538,
"grad_norm": 0.2769485414028168,
"learning_rate": 3.5724657118270344e-06,
"loss": 1.273116111755371,
"step": 1294
},
{
"epoch": 1.981651376146789,
"grad_norm": 0.3299882411956787,
"learning_rate": 3.558762704434361e-06,
"loss": 1.268465280532837,
"step": 1296
},
{
"epoch": 1.9847094801223242,
"grad_norm": 0.26859885454177856,
"learning_rate": 3.545081777711412e-06,
"loss": 1.2919847965240479,
"step": 1298
},
{
"epoch": 1.9877675840978593,
"grad_norm": 0.9502137899398804,
"learning_rate": 3.5314230872723564e-06,
"loss": 1.342604160308838,
"step": 1300
},
{
"epoch": 1.9908256880733946,
"grad_norm": 0.2677958011627197,
"learning_rate": 3.5177867884784334e-06,
"loss": 1.3786706924438477,
"step": 1302
},
{
"epoch": 1.9938837920489296,
"grad_norm": 0.40644171833992004,
"learning_rate": 3.504173036436186e-06,
"loss": 1.7326993942260742,
"step": 1304
},
{
"epoch": 1.996941896024465,
"grad_norm": 0.45419755578041077,
"learning_rate": 3.4905819859957002e-06,
"loss": 1.7214076519012451,
"step": 1306
},
{
"epoch": 2.0,
"grad_norm": 0.9430392980575562,
"learning_rate": 3.4770137917488454e-06,
"loss": 1.8467901945114136,
"step": 1308
},
{
"epoch": 2.003058103975535,
"grad_norm": 0.26824504137039185,
"learning_rate": 3.463468608027505e-06,
"loss": 1.4361066818237305,
"step": 1310
},
{
"epoch": 2.00611620795107,
"grad_norm": 0.22578075528144836,
"learning_rate": 3.4499465889018337e-06,
"loss": 1.394030213356018,
"step": 1312
},
{
"epoch": 2.0091743119266057,
"grad_norm": 0.26776137948036194,
"learning_rate": 3.4364478881785002e-06,
"loss": 1.4127156734466553,
"step": 1314
},
{
"epoch": 2.0122324159021407,
"grad_norm": 0.3707635998725891,
"learning_rate": 3.4229726593989353e-06,
"loss": 1.340601921081543,
"step": 1316
},
{
"epoch": 2.015290519877676,
"grad_norm": 0.23890726268291473,
"learning_rate": 3.409521055837586e-06,
"loss": 1.5300512313842773,
"step": 1318
},
{
"epoch": 2.018348623853211,
"grad_norm": 0.21163959801197052,
"learning_rate": 3.396093230500176e-06,
"loss": 1.4162603616714478,
"step": 1320
},
{
"epoch": 2.021406727828746,
"grad_norm": 0.3320009112358093,
"learning_rate": 3.3826893361219614e-06,
"loss": 1.3640984296798706,
"step": 1322
},
{
"epoch": 2.0244648318042815,
"grad_norm": 0.2645728886127472,
"learning_rate": 3.3693095251659975e-06,
"loss": 1.4446080923080444,
"step": 1324
},
{
"epoch": 2.0275229357798166,
"grad_norm": 0.2824868857860565,
"learning_rate": 3.3559539498213965e-06,
"loss": 1.3105710744857788,
"step": 1326
},
{
"epoch": 2.0305810397553516,
"grad_norm": 0.23126038908958435,
"learning_rate": 3.342622762001606e-06,
"loss": 1.3857829570770264,
"step": 1328
},
{
"epoch": 2.0336391437308867,
"grad_norm": 0.3670974671840668,
"learning_rate": 3.3293161133426777e-06,
"loss": 1.496924638748169,
"step": 1330
},
{
"epoch": 2.036697247706422,
"grad_norm": 0.3528394401073456,
"learning_rate": 3.3160341552015375e-06,
"loss": 1.4135003089904785,
"step": 1332
},
{
"epoch": 2.0397553516819573,
"grad_norm": 0.20478151738643646,
"learning_rate": 3.3027770386542706e-06,
"loss": 1.2156240940093994,
"step": 1334
},
{
"epoch": 2.0428134556574924,
"grad_norm": 0.46617865562438965,
"learning_rate": 3.289544914494403e-06,
"loss": 1.3763898611068726,
"step": 1336
},
{
"epoch": 2.0458715596330275,
"grad_norm": 0.3884037733078003,
"learning_rate": 3.276337933231179e-06,
"loss": 1.622403860092163,
"step": 1338
},
{
"epoch": 2.0489296636085625,
"grad_norm": 0.25180479884147644,
"learning_rate": 3.2631562450878597e-06,
"loss": 1.2860331535339355,
"step": 1340
},
{
"epoch": 2.051987767584098,
"grad_norm": 0.3756599426269531,
"learning_rate": 3.2500000000000015e-06,
"loss": 1.4189289808273315,
"step": 1342
},
{
"epoch": 2.055045871559633,
"grad_norm": 0.32630693912506104,
"learning_rate": 3.236869347613764e-06,
"loss": 1.308931827545166,
"step": 1344
},
{
"epoch": 2.058103975535168,
"grad_norm": 0.28512176871299744,
"learning_rate": 3.2237644372842016e-06,
"loss": 1.2988288402557373,
"step": 1346
},
{
"epoch": 2.0611620795107033,
"grad_norm": 0.19952069222927094,
"learning_rate": 3.2106854180735625e-06,
"loss": 1.3092859983444214,
"step": 1348
},
{
"epoch": 2.0642201834862384,
"grad_norm": 0.24031268060207367,
"learning_rate": 3.1976324387495948e-06,
"loss": 1.3389842510223389,
"step": 1350
},
{
"epoch": 2.067278287461774,
"grad_norm": 0.26569297909736633,
"learning_rate": 3.1846056477838572e-06,
"loss": 1.5241750478744507,
"step": 1352
},
{
"epoch": 2.070336391437309,
"grad_norm": 0.5251048803329468,
"learning_rate": 3.171605193350028e-06,
"loss": 1.542860507965088,
"step": 1354
},
{
"epoch": 2.073394495412844,
"grad_norm": 0.34643858671188354,
"learning_rate": 3.158631223322216e-06,
"loss": 1.3612843751907349,
"step": 1356
},
{
"epoch": 2.076452599388379,
"grad_norm": 0.2934923470020294,
"learning_rate": 3.145683885273288e-06,
"loss": 1.355604648590088,
"step": 1358
},
{
"epoch": 2.079510703363914,
"grad_norm": 0.743224024772644,
"learning_rate": 3.1327633264731806e-06,
"loss": 1.341210126876831,
"step": 1360
},
{
"epoch": 2.0825688073394497,
"grad_norm": 0.32269051671028137,
"learning_rate": 3.11986969388723e-06,
"loss": 1.4118154048919678,
"step": 1362
},
{
"epoch": 2.085626911314985,
"grad_norm": 0.29159843921661377,
"learning_rate": 3.1070031341744983e-06,
"loss": 1.389265775680542,
"step": 1364
},
{
"epoch": 2.08868501529052,
"grad_norm": 0.24911250174045563,
"learning_rate": 3.094163793686108e-06,
"loss": 1.422662377357483,
"step": 1366
},
{
"epoch": 2.091743119266055,
"grad_norm": 0.21826767921447754,
"learning_rate": 3.0813518184635737e-06,
"loss": 1.4053363800048828,
"step": 1368
},
{
"epoch": 2.09480122324159,
"grad_norm": 0.3076784610748291,
"learning_rate": 3.0685673542371465e-06,
"loss": 1.283433198928833,
"step": 1370
},
{
"epoch": 2.0978593272171255,
"grad_norm": 0.17591321468353271,
"learning_rate": 3.0558105464241466e-06,
"loss": 1.237450361251831,
"step": 1372
},
{
"epoch": 2.1009174311926606,
"grad_norm": 0.2663421332836151,
"learning_rate": 3.0430815401273206e-06,
"loss": 1.3944424390792847,
"step": 1374
},
{
"epoch": 2.1039755351681957,
"grad_norm": 0.26904943585395813,
"learning_rate": 3.030380480133186e-06,
"loss": 1.5187671184539795,
"step": 1376
},
{
"epoch": 2.1070336391437308,
"grad_norm": 0.6649749279022217,
"learning_rate": 3.017707510910378e-06,
"loss": 1.3504502773284912,
"step": 1378
},
{
"epoch": 2.1100917431192663,
"grad_norm": 0.37516942620277405,
"learning_rate": 3.0050627766080188e-06,
"loss": 1.5420799255371094,
"step": 1380
},
{
"epoch": 2.1131498470948014,
"grad_norm": 0.342439204454422,
"learning_rate": 2.9924464210540717e-06,
"loss": 1.5547534227371216,
"step": 1382
},
{
"epoch": 2.1162079510703364,
"grad_norm": 0.48497647047042847,
"learning_rate": 2.979858587753698e-06,
"loss": 1.3153679370880127,
"step": 1384
},
{
"epoch": 2.1192660550458715,
"grad_norm": 0.39512813091278076,
"learning_rate": 2.96729941988764e-06,
"loss": 1.2663487195968628,
"step": 1386
},
{
"epoch": 2.1223241590214066,
"grad_norm": 0.3283194899559021,
"learning_rate": 2.9547690603105774e-06,
"loss": 1.4247238636016846,
"step": 1388
},
{
"epoch": 2.1253822629969417,
"grad_norm": 0.3506661355495453,
"learning_rate": 2.942267651549513e-06,
"loss": 1.2393386363983154,
"step": 1390
},
{
"epoch": 2.128440366972477,
"grad_norm": 0.3594140112400055,
"learning_rate": 2.9297953358021487e-06,
"loss": 1.317380666732788,
"step": 1392
},
{
"epoch": 2.1314984709480123,
"grad_norm": 0.5971735715866089,
"learning_rate": 2.9173522549352608e-06,
"loss": 1.2773442268371582,
"step": 1394
},
{
"epoch": 2.1345565749235473,
"grad_norm": 0.3666265606880188,
"learning_rate": 2.9049385504830987e-06,
"loss": 1.34925377368927,
"step": 1396
},
{
"epoch": 2.1376146788990824,
"grad_norm": 0.31561410427093506,
"learning_rate": 2.892554363645766e-06,
"loss": 1.2674505710601807,
"step": 1398
},
{
"epoch": 2.140672782874618,
"grad_norm": 0.2038232684135437,
"learning_rate": 2.880199835287618e-06,
"loss": 1.3169916868209839,
"step": 1400
},
{
"epoch": 2.143730886850153,
"grad_norm": 0.25303685665130615,
"learning_rate": 2.867875105935658e-06,
"loss": 1.4587633609771729,
"step": 1402
},
{
"epoch": 2.146788990825688,
"grad_norm": 0.31143543124198914,
"learning_rate": 2.8555803157779384e-06,
"loss": 1.3396885395050049,
"step": 1404
},
{
"epoch": 2.149847094801223,
"grad_norm": 0.2281101942062378,
"learning_rate": 2.8433156046619705e-06,
"loss": 1.2936108112335205,
"step": 1406
},
{
"epoch": 2.1529051987767582,
"grad_norm": 0.3648523688316345,
"learning_rate": 2.831081112093129e-06,
"loss": 1.5100679397583008,
"step": 1408
},
{
"epoch": 2.1559633027522938,
"grad_norm": 0.278677374124527,
"learning_rate": 2.8188769772330637e-06,
"loss": 1.3869754076004028,
"step": 1410
},
{
"epoch": 2.159021406727829,
"grad_norm": 0.21437983214855194,
"learning_rate": 2.806703338898123e-06,
"loss": 1.3129749298095703,
"step": 1412
},
{
"epoch": 2.162079510703364,
"grad_norm": 0.24729043245315552,
"learning_rate": 2.794560335557771e-06,
"loss": 1.4099204540252686,
"step": 1414
},
{
"epoch": 2.165137614678899,
"grad_norm": 0.3120039701461792,
"learning_rate": 2.7824481053330154e-06,
"loss": 1.3897459506988525,
"step": 1416
},
{
"epoch": 2.168195718654434,
"grad_norm": 0.4525415897369385,
"learning_rate": 2.770366785994827e-06,
"loss": 1.445647954940796,
"step": 1418
},
{
"epoch": 2.1712538226299696,
"grad_norm": 0.4266716241836548,
"learning_rate": 2.758316514962585e-06,
"loss": 1.3233726024627686,
"step": 1420
},
{
"epoch": 2.1743119266055047,
"grad_norm": 0.28266647458076477,
"learning_rate": 2.7462974293025112e-06,
"loss": 1.4238274097442627,
"step": 1422
},
{
"epoch": 2.1773700305810397,
"grad_norm": 0.3248072564601898,
"learning_rate": 2.7343096657261e-06,
"loss": 1.3104677200317383,
"step": 1424
},
{
"epoch": 2.180428134556575,
"grad_norm": 0.3584449887275696,
"learning_rate": 2.7223533605885784e-06,
"loss": 1.6277508735656738,
"step": 1426
},
{
"epoch": 2.18348623853211,
"grad_norm": 0.35764527320861816,
"learning_rate": 2.710428649887348e-06,
"loss": 1.3882687091827393,
"step": 1428
},
{
"epoch": 2.1865443425076454,
"grad_norm": 0.24804551899433136,
"learning_rate": 2.6985356692604336e-06,
"loss": 1.4513651132583618,
"step": 1430
},
{
"epoch": 2.1896024464831805,
"grad_norm": 0.2202014923095703,
"learning_rate": 2.686674553984951e-06,
"loss": 1.4342420101165771,
"step": 1432
},
{
"epoch": 2.1926605504587156,
"grad_norm": 0.36250677704811096,
"learning_rate": 2.6748454389755576e-06,
"loss": 1.394620656967163,
"step": 1434
},
{
"epoch": 2.1957186544342506,
"grad_norm": 0.3232296109199524,
"learning_rate": 2.6630484587829265e-06,
"loss": 1.3978071212768555,
"step": 1436
},
{
"epoch": 2.198776758409786,
"grad_norm": 0.4420628547668457,
"learning_rate": 2.651283747592211e-06,
"loss": 1.4031468629837036,
"step": 1438
},
{
"epoch": 2.2018348623853212,
"grad_norm": 0.6229142546653748,
"learning_rate": 2.639551439221516e-06,
"loss": 1.3914484977722168,
"step": 1440
},
{
"epoch": 2.2048929663608563,
"grad_norm": 0.3233772814273834,
"learning_rate": 2.627851667120387e-06,
"loss": 1.476043701171875,
"step": 1442
},
{
"epoch": 2.2079510703363914,
"grad_norm": 0.35107681155204773,
"learning_rate": 2.6161845643682763e-06,
"loss": 1.407777190208435,
"step": 1444
},
{
"epoch": 2.2110091743119265,
"grad_norm": 0.3123028874397278,
"learning_rate": 2.6045502636730457e-06,
"loss": 1.3102259635925293,
"step": 1446
},
{
"epoch": 2.214067278287462,
"grad_norm": 0.2534146308898926,
"learning_rate": 2.5929488973694406e-06,
"loss": 1.2788276672363281,
"step": 1448
},
{
"epoch": 2.217125382262997,
"grad_norm": 0.24462664127349854,
"learning_rate": 2.581380597417599e-06,
"loss": 1.3362743854522705,
"step": 1450
},
{
"epoch": 2.220183486238532,
"grad_norm": 0.2978283166885376,
"learning_rate": 2.569845495401542e-06,
"loss": 1.2902576923370361,
"step": 1452
},
{
"epoch": 2.223241590214067,
"grad_norm": 0.299277126789093,
"learning_rate": 2.5583437225276818e-06,
"loss": 1.3449206352233887,
"step": 1454
},
{
"epoch": 2.2262996941896023,
"grad_norm": 0.36601486802101135,
"learning_rate": 2.546875409623324e-06,
"loss": 1.3038407564163208,
"step": 1456
},
{
"epoch": 2.229357798165138,
"grad_norm": 0.42299339175224304,
"learning_rate": 2.5354406871351833e-06,
"loss": 1.5554304122924805,
"step": 1458
},
{
"epoch": 2.232415902140673,
"grad_norm": 0.32388123869895935,
"learning_rate": 2.5240396851279043e-06,
"loss": 1.5746049880981445,
"step": 1460
},
{
"epoch": 2.235474006116208,
"grad_norm": 0.39095836877822876,
"learning_rate": 2.5126725332825675e-06,
"loss": 1.6094728708267212,
"step": 1462
},
{
"epoch": 2.238532110091743,
"grad_norm": 0.5842258930206299,
"learning_rate": 2.501339360895231e-06,
"loss": 1.5279463529586792,
"step": 1464
},
{
"epoch": 2.241590214067278,
"grad_norm": 0.3429890275001526,
"learning_rate": 2.4900402968754504e-06,
"loss": 1.5856099128723145,
"step": 1466
},
{
"epoch": 2.2446483180428136,
"grad_norm": 0.35519224405288696,
"learning_rate": 2.4787754697448153e-06,
"loss": 1.4757394790649414,
"step": 1468
},
{
"epoch": 2.2477064220183487,
"grad_norm": 0.46203580498695374,
"learning_rate": 2.4675450076354822e-06,
"loss": 1.584846019744873,
"step": 1470
},
{
"epoch": 2.2507645259938838,
"grad_norm": 0.8099899888038635,
"learning_rate": 2.4563490382887267e-06,
"loss": 1.367172360420227,
"step": 1472
},
{
"epoch": 2.253822629969419,
"grad_norm": 0.7287035584449768,
"learning_rate": 2.4451876890534847e-06,
"loss": 1.492293357849121,
"step": 1474
},
{
"epoch": 2.2568807339449544,
"grad_norm": 0.3203519284725189,
"learning_rate": 2.4340610868849e-06,
"loss": 1.2751667499542236,
"step": 1476
},
{
"epoch": 2.2599388379204894,
"grad_norm": 0.6493098139762878,
"learning_rate": 2.4229693583428916e-06,
"loss": 1.4823472499847412,
"step": 1478
},
{
"epoch": 2.2629969418960245,
"grad_norm": 0.4101910889148712,
"learning_rate": 2.4119126295906997e-06,
"loss": 1.09395170211792,
"step": 1480
},
{
"epoch": 2.2660550458715596,
"grad_norm": 0.4682796597480774,
"learning_rate": 2.400891026393464e-06,
"loss": 1.0601507425308228,
"step": 1482
},
{
"epoch": 2.2691131498470947,
"grad_norm": 0.5146844387054443,
"learning_rate": 2.3899046741167868e-06,
"loss": 1.2724342346191406,
"step": 1484
},
{
"epoch": 2.2721712538226297,
"grad_norm": 0.8610156178474426,
"learning_rate": 2.3789536977253034e-06,
"loss": 1.3352521657943726,
"step": 1486
},
{
"epoch": 2.2752293577981653,
"grad_norm": 1.053831696510315,
"learning_rate": 2.3680382217812685e-06,
"loss": 1.4391016960144043,
"step": 1488
},
{
"epoch": 2.2782874617737003,
"grad_norm": 0.6413374543190002,
"learning_rate": 2.3571583704431355e-06,
"loss": 1.3907897472381592,
"step": 1490
},
{
"epoch": 2.2813455657492354,
"grad_norm": 0.30044737458229065,
"learning_rate": 2.346314267464145e-06,
"loss": 1.1618599891662598,
"step": 1492
},
{
"epoch": 2.2844036697247705,
"grad_norm": 0.3427642285823822,
"learning_rate": 2.3355060361909134e-06,
"loss": 1.134230375289917,
"step": 1494
},
{
"epoch": 2.287461773700306,
"grad_norm": 0.28166523575782776,
"learning_rate": 2.3247337995620363e-06,
"loss": 1.357274055480957,
"step": 1496
},
{
"epoch": 2.290519877675841,
"grad_norm": 0.7598418593406677,
"learning_rate": 2.313997680106686e-06,
"loss": 1.2663555145263672,
"step": 1498
},
{
"epoch": 2.293577981651376,
"grad_norm": 1.0048569440841675,
"learning_rate": 2.3032977999432205e-06,
"loss": 1.2259790897369385,
"step": 1500
},
{
"epoch": 2.2966360856269112,
"grad_norm": 0.3067741096019745,
"learning_rate": 2.2926342807777886e-06,
"loss": 1.435164213180542,
"step": 1502
},
{
"epoch": 2.2996941896024463,
"grad_norm": 0.5623937249183655,
"learning_rate": 2.2820072439029524e-06,
"loss": 1.4023568630218506,
"step": 1504
},
{
"epoch": 2.302752293577982,
"grad_norm": 0.3359718918800354,
"learning_rate": 2.271416810196308e-06,
"loss": 1.1277801990509033,
"step": 1506
},
{
"epoch": 2.305810397553517,
"grad_norm": 0.3305533528327942,
"learning_rate": 2.2608631001190994e-06,
"loss": 1.3414134979248047,
"step": 1508
},
{
"epoch": 2.308868501529052,
"grad_norm": 0.28481531143188477,
"learning_rate": 2.2503462337148642e-06,
"loss": 1.4879052639007568,
"step": 1510
},
{
"epoch": 2.311926605504587,
"grad_norm": 0.28595951199531555,
"learning_rate": 2.239866330608057e-06,
"loss": 1.6209688186645508,
"step": 1512
},
{
"epoch": 2.314984709480122,
"grad_norm": 0.29558923840522766,
"learning_rate": 2.2294235100026933e-06,
"loss": 1.6481235027313232,
"step": 1514
},
{
"epoch": 2.3180428134556577,
"grad_norm": 0.5758782029151917,
"learning_rate": 2.21901789068099e-06,
"loss": 1.7679166793823242,
"step": 1516
},
{
"epoch": 2.3211009174311927,
"grad_norm": 0.3111439347267151,
"learning_rate": 2.2086495910020192e-06,
"loss": 1.3151183128356934,
"step": 1518
},
{
"epoch": 2.324159021406728,
"grad_norm": 0.44918501377105713,
"learning_rate": 2.1983187289003587e-06,
"loss": 1.3933916091918945,
"step": 1520
},
{
"epoch": 2.327217125382263,
"grad_norm": 0.3173042833805084,
"learning_rate": 2.188025421884754e-06,
"loss": 1.240437388420105,
"step": 1522
},
{
"epoch": 2.330275229357798,
"grad_norm": 0.2350539118051529,
"learning_rate": 2.1777697870367713e-06,
"loss": 1.1647779941558838,
"step": 1524
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.3137843906879425,
"learning_rate": 2.1675519410094803e-06,
"loss": 1.5445265769958496,
"step": 1526
},
{
"epoch": 2.3363914373088686,
"grad_norm": 0.5268841981887817,
"learning_rate": 2.157372000026119e-06,
"loss": 1.444595217704773,
"step": 1528
},
{
"epoch": 2.3394495412844036,
"grad_norm": 0.3506692349910736,
"learning_rate": 2.1472300798787746e-06,
"loss": 1.6354224681854248,
"step": 1530
},
{
"epoch": 2.3425076452599387,
"grad_norm": 0.3233583867549896,
"learning_rate": 2.1371262959270594e-06,
"loss": 1.1021732091903687,
"step": 1532
},
{
"epoch": 2.3455657492354742,
"grad_norm": 0.29296091198921204,
"learning_rate": 2.1270607630968104e-06,
"loss": 1.3453254699707031,
"step": 1534
},
{
"epoch": 2.3486238532110093,
"grad_norm": 0.3317727744579315,
"learning_rate": 2.1170335958787736e-06,
"loss": 1.607575535774231,
"step": 1536
},
{
"epoch": 2.3516819571865444,
"grad_norm": 0.2295382171869278,
"learning_rate": 2.1070449083273047e-06,
"loss": 1.3497262001037598,
"step": 1538
},
{
"epoch": 2.3547400611620795,
"grad_norm": 0.4568946957588196,
"learning_rate": 2.0970948140590672e-06,
"loss": 1.509822130203247,
"step": 1540
},
{
"epoch": 2.3577981651376145,
"grad_norm": 0.34416595101356506,
"learning_rate": 2.08718342625175e-06,
"loss": 1.385573148727417,
"step": 1542
},
{
"epoch": 2.3608562691131496,
"grad_norm": 0.33610644936561584,
"learning_rate": 2.077310857642772e-06,
"loss": 1.3133833408355713,
"step": 1544
},
{
"epoch": 2.363914373088685,
"grad_norm": 0.332163006067276,
"learning_rate": 2.067477220527998e-06,
"loss": 1.3794035911560059,
"step": 1546
},
{
"epoch": 2.36697247706422,
"grad_norm": 0.46091410517692566,
"learning_rate": 2.05768262676047e-06,
"loss": 1.4221172332763672,
"step": 1548
},
{
"epoch": 2.3700305810397553,
"grad_norm": 0.2670794427394867,
"learning_rate": 2.0479271877491278e-06,
"loss": 1.2908828258514404,
"step": 1550
},
{
"epoch": 2.3730886850152904,
"grad_norm": 0.31927385926246643,
"learning_rate": 2.038211014457546e-06,
"loss": 1.3988337516784668,
"step": 1552
},
{
"epoch": 2.376146788990826,
"grad_norm": 0.4126211404800415,
"learning_rate": 2.028534217402667e-06,
"loss": 1.7016716003417969,
"step": 1554
},
{
"epoch": 2.379204892966361,
"grad_norm": 0.6094360947608948,
"learning_rate": 2.0188969066535484e-06,
"loss": 2.0326876640319824,
"step": 1556
},
{
"epoch": 2.382262996941896,
"grad_norm": 0.40967652201652527,
"learning_rate": 2.0092991918301106e-06,
"loss": 1.3301377296447754,
"step": 1558
},
{
"epoch": 2.385321100917431,
"grad_norm": 0.6155174970626831,
"learning_rate": 1.9997411821018885e-06,
"loss": 1.319265604019165,
"step": 1560
},
{
"epoch": 2.388379204892966,
"grad_norm": 0.4441206455230713,
"learning_rate": 1.990222986186786e-06,
"loss": 1.3922169208526611,
"step": 1562
},
{
"epoch": 2.3914373088685017,
"grad_norm": 0.5924298167228699,
"learning_rate": 1.980744712349849e-06,
"loss": 1.4741730690002441,
"step": 1564
},
{
"epoch": 2.3944954128440368,
"grad_norm": 0.42252296209335327,
"learning_rate": 1.9713064684020262e-06,
"loss": 1.4076108932495117,
"step": 1566
},
{
"epoch": 2.397553516819572,
"grad_norm": 0.36031708121299744,
"learning_rate": 1.9619083616989457e-06,
"loss": 1.278861403465271,
"step": 1568
},
{
"epoch": 2.400611620795107,
"grad_norm": 0.24064381420612335,
"learning_rate": 1.952550499139689e-06,
"loss": 1.19804048538208,
"step": 1570
},
{
"epoch": 2.4036697247706424,
"grad_norm": 0.18197159469127655,
"learning_rate": 1.9432329871655837e-06,
"loss": 1.12447988986969,
"step": 1572
},
{
"epoch": 2.4067278287461775,
"grad_norm": 0.30438297986984253,
"learning_rate": 1.933955931758988e-06,
"loss": 1.2643486261367798,
"step": 1574
},
{
"epoch": 2.4097859327217126,
"grad_norm": 0.5426669120788574,
"learning_rate": 1.9247194384420855e-06,
"loss": 1.504340410232544,
"step": 1576
},
{
"epoch": 2.4128440366972477,
"grad_norm": 0.6118716597557068,
"learning_rate": 1.915523612275681e-06,
"loss": 1.5359920263290405,
"step": 1578
},
{
"epoch": 2.4159021406727827,
"grad_norm": 0.5290548801422119,
"learning_rate": 1.9063685578580137e-06,
"loss": 1.5219250917434692,
"step": 1580
},
{
"epoch": 2.418960244648318,
"grad_norm": 0.348886638879776,
"learning_rate": 1.8972543793235626e-06,
"loss": 1.5620722770690918,
"step": 1582
},
{
"epoch": 2.4220183486238533,
"grad_norm": 0.4480542838573456,
"learning_rate": 1.8881811803418624e-06,
"loss": 1.3870704174041748,
"step": 1584
},
{
"epoch": 2.4250764525993884,
"grad_norm": 0.6594481468200684,
"learning_rate": 1.8791490641163218e-06,
"loss": 1.5246330499649048,
"step": 1586
},
{
"epoch": 2.4281345565749235,
"grad_norm": 0.48964548110961914,
"learning_rate": 1.870158133383055e-06,
"loss": 1.4073295593261719,
"step": 1588
},
{
"epoch": 2.4311926605504586,
"grad_norm": 0.40440455079078674,
"learning_rate": 1.8612084904097117e-06,
"loss": 1.329315423965454,
"step": 1590
},
{
"epoch": 2.434250764525994,
"grad_norm": 0.3714819550514221,
"learning_rate": 1.852300236994308e-06,
"loss": 1.3444490432739258,
"step": 1592
},
{
"epoch": 2.437308868501529,
"grad_norm": 0.5145377516746521,
"learning_rate": 1.8434334744640763e-06,
"loss": 1.5467479228973389,
"step": 1594
},
{
"epoch": 2.4403669724770642,
"grad_norm": 0.46002912521362305,
"learning_rate": 1.8346083036743104e-06,
"loss": 1.289878249168396,
"step": 1596
},
{
"epoch": 2.4434250764525993,
"grad_norm": 0.793483555316925,
"learning_rate": 1.8258248250072158e-06,
"loss": 1.4660496711730957,
"step": 1598
},
{
"epoch": 2.4464831804281344,
"grad_norm": 0.44911351799964905,
"learning_rate": 1.8170831383707683e-06,
"loss": 1.3652875423431396,
"step": 1600
},
{
"epoch": 2.44954128440367,
"grad_norm": 0.38207677006721497,
"learning_rate": 1.8083833431975805e-06,
"loss": 1.3762791156768799,
"step": 1602
},
{
"epoch": 2.452599388379205,
"grad_norm": 0.4357513189315796,
"learning_rate": 1.7997255384437695e-06,
"loss": 1.5232503414154053,
"step": 1604
},
{
"epoch": 2.45565749235474,
"grad_norm": 0.3423779308795929,
"learning_rate": 1.7911098225878309e-06,
"loss": 1.5271486043930054,
"step": 1606
},
{
"epoch": 2.458715596330275,
"grad_norm": 5.960415363311768,
"learning_rate": 1.7825362936295171e-06,
"loss": 1.3485842943191528,
"step": 1608
},
{
"epoch": 2.46177370030581,
"grad_norm": 0.36111417412757874,
"learning_rate": 1.774005049088725e-06,
"loss": 1.2900433540344238,
"step": 1610
},
{
"epoch": 2.4648318042813457,
"grad_norm": 0.33147767186164856,
"learning_rate": 1.7655161860043873e-06,
"loss": 1.4210761785507202,
"step": 1612
},
{
"epoch": 2.467889908256881,
"grad_norm": 0.3786766231060028,
"learning_rate": 1.7570698009333664e-06,
"loss": 1.370017409324646,
"step": 1614
},
{
"epoch": 2.470948012232416,
"grad_norm": 1.8267617225646973,
"learning_rate": 1.7486659899493537e-06,
"loss": 1.5153461694717407,
"step": 1616
},
{
"epoch": 2.474006116207951,
"grad_norm": 0.3199278712272644,
"learning_rate": 1.740304848641787e-06,
"loss": 1.3838684558868408,
"step": 1618
},
{
"epoch": 2.477064220183486,
"grad_norm": 0.3670620322227478,
"learning_rate": 1.731986472114751e-06,
"loss": 1.33723783493042,
"step": 1620
},
{
"epoch": 2.4801223241590216,
"grad_norm": 0.36861374974250793,
"learning_rate": 1.7237109549859043e-06,
"loss": 1.2932226657867432,
"step": 1622
},
{
"epoch": 2.4831804281345566,
"grad_norm": 0.34438320994377136,
"learning_rate": 1.7154783913853968e-06,
"loss": 1.42689049243927,
"step": 1624
},
{
"epoch": 2.4862385321100917,
"grad_norm": 0.23838122189044952,
"learning_rate": 1.7072888749548033e-06,
"loss": 1.4100431203842163,
"step": 1626
},
{
"epoch": 2.489296636085627,
"grad_norm": 0.46484264731407166,
"learning_rate": 1.6991424988460592e-06,
"loss": 1.3829045295715332,
"step": 1628
},
{
"epoch": 2.4923547400611623,
"grad_norm": 0.3008574843406677,
"learning_rate": 1.6910393557203964e-06,
"loss": 1.5693084001541138,
"step": 1630
},
{
"epoch": 2.4954128440366974,
"grad_norm": 0.37115153670310974,
"learning_rate": 1.6829795377472908e-06,
"loss": 1.7590757608413696,
"step": 1632
},
{
"epoch": 2.4984709480122325,
"grad_norm": 0.616698682308197,
"learning_rate": 1.674963136603417e-06,
"loss": 1.6397650241851807,
"step": 1634
},
{
"epoch": 2.5015290519877675,
"grad_norm": 0.384959876537323,
"learning_rate": 1.6669902434716046e-06,
"loss": 1.6299896240234375,
"step": 1636
},
{
"epoch": 2.5045871559633026,
"grad_norm": 0.8294275403022766,
"learning_rate": 1.6590609490397958e-06,
"loss": 1.5394856929779053,
"step": 1638
},
{
"epoch": 2.5076452599388377,
"grad_norm": 0.40894415974617004,
"learning_rate": 1.6511753435000205e-06,
"loss": 1.2182371616363525,
"step": 1640
},
{
"epoch": 2.510703363914373,
"grad_norm": 0.45905759930610657,
"learning_rate": 1.6433335165473686e-06,
"loss": 1.2023439407348633,
"step": 1642
},
{
"epoch": 2.5137614678899083,
"grad_norm": 0.38532376289367676,
"learning_rate": 1.635535557378968e-06,
"loss": 1.6095008850097656,
"step": 1644
},
{
"epoch": 2.5168195718654434,
"grad_norm": 1.44415283203125,
"learning_rate": 1.6277815546929688e-06,
"loss": 1.6082322597503662,
"step": 1646
},
{
"epoch": 2.5198776758409784,
"grad_norm": 0.5093996524810791,
"learning_rate": 1.6200715966875394e-06,
"loss": 1.7141090631484985,
"step": 1648
},
{
"epoch": 2.522935779816514,
"grad_norm": 0.5241023898124695,
"learning_rate": 1.6124057710598603e-06,
"loss": 1.6450610160827637,
"step": 1650
},
{
"epoch": 2.525993883792049,
"grad_norm": 0.49204516410827637,
"learning_rate": 1.6047841650051272e-06,
"loss": 1.6974513530731201,
"step": 1652
},
{
"epoch": 2.529051987767584,
"grad_norm": 0.8506813049316406,
"learning_rate": 1.5972068652155554e-06,
"loss": 1.5313912630081177,
"step": 1654
},
{
"epoch": 2.532110091743119,
"grad_norm": 0.33754727244377136,
"learning_rate": 1.5896739578794e-06,
"loss": 1.5209699869155884,
"step": 1656
},
{
"epoch": 2.5351681957186543,
"grad_norm": 0.7774704694747925,
"learning_rate": 1.5821855286799742e-06,
"loss": 1.4035563468933105,
"step": 1658
},
{
"epoch": 2.5382262996941893,
"grad_norm": 0.6433319449424744,
"learning_rate": 1.5747416627946673e-06,
"loss": 1.665273666381836,
"step": 1660
},
{
"epoch": 2.541284403669725,
"grad_norm": 0.6971220970153809,
"learning_rate": 1.5673424448939887e-06,
"loss": 1.5019344091415405,
"step": 1662
},
{
"epoch": 2.54434250764526,
"grad_norm": 0.40314802527427673,
"learning_rate": 1.5599879591405917e-06,
"loss": 1.1620054244995117,
"step": 1664
},
{
"epoch": 2.547400611620795,
"grad_norm": 0.48018017411231995,
"learning_rate": 1.552678289188326e-06,
"loss": 1.6923828125,
"step": 1666
},
{
"epoch": 2.5504587155963305,
"grad_norm": 0.4809359312057495,
"learning_rate": 1.545413518181283e-06,
"loss": 1.7656713724136353,
"step": 1668
},
{
"epoch": 2.5535168195718656,
"grad_norm": 0.40401753783226013,
"learning_rate": 1.5381937287528449e-06,
"loss": 1.8313161134719849,
"step": 1670
},
{
"epoch": 2.5565749235474007,
"grad_norm": 0.4581202268600464,
"learning_rate": 1.5310190030247546e-06,
"loss": 1.7572789192199707,
"step": 1672
},
{
"epoch": 2.5596330275229358,
"grad_norm": 0.9305920600891113,
"learning_rate": 1.5238894226061737e-06,
"loss": 1.7307026386260986,
"step": 1674
},
{
"epoch": 2.562691131498471,
"grad_norm": 0.47380930185317993,
"learning_rate": 1.5168050685927566e-06,
"loss": 1.5947740077972412,
"step": 1676
},
{
"epoch": 2.565749235474006,
"grad_norm": 1.2263463735580444,
"learning_rate": 1.5097660215657306e-06,
"loss": 1.4555588960647583,
"step": 1678
},
{
"epoch": 2.5688073394495414,
"grad_norm": 0.43118909001350403,
"learning_rate": 1.5027723615909745e-06,
"loss": 1.0147868394851685,
"step": 1680
},
{
"epoch": 2.5718654434250765,
"grad_norm": 0.5391921401023865,
"learning_rate": 1.4958241682181137e-06,
"loss": 1.0223249197006226,
"step": 1682
},
{
"epoch": 2.5749235474006116,
"grad_norm": 0.2522028386592865,
"learning_rate": 1.4889215204796082e-06,
"loss": 1.250197172164917,
"step": 1684
},
{
"epoch": 2.5779816513761467,
"grad_norm": 0.29159918427467346,
"learning_rate": 1.4820644968898605e-06,
"loss": 1.1835776567459106,
"step": 1686
},
{
"epoch": 2.581039755351682,
"grad_norm": 0.2946909964084625,
"learning_rate": 1.47525317544432e-06,
"loss": 1.1374409198760986,
"step": 1688
},
{
"epoch": 2.5840978593272173,
"grad_norm": 0.19036340713500977,
"learning_rate": 1.468487633618594e-06,
"loss": 1.1817882061004639,
"step": 1690
},
{
"epoch": 2.5871559633027523,
"grad_norm": 1.4873279333114624,
"learning_rate": 1.4617679483675673e-06,
"loss": 1.4171775579452515,
"step": 1692
},
{
"epoch": 2.5902140672782874,
"grad_norm": 0.32151684165000916,
"learning_rate": 1.4550941961245288e-06,
"loss": 1.3625459671020508,
"step": 1694
},
{
"epoch": 2.5932721712538225,
"grad_norm": 0.26637983322143555,
"learning_rate": 1.4484664528003026e-06,
"loss": 1.2058180570602417,
"step": 1696
},
{
"epoch": 2.5963302752293576,
"grad_norm": 0.5087877511978149,
"learning_rate": 1.4418847937823784e-06,
"loss": 1.425114631652832,
"step": 1698
},
{
"epoch": 2.599388379204893,
"grad_norm": 0.9368872046470642,
"learning_rate": 1.4353492939340618e-06,
"loss": 1.4749643802642822,
"step": 1700
},
{
"epoch": 2.602446483180428,
"grad_norm": 0.48912081122398376,
"learning_rate": 1.4288600275936184e-06,
"loss": 1.245436668395996,
"step": 1702
},
{
"epoch": 2.6055045871559632,
"grad_norm": 0.4674423635005951,
"learning_rate": 1.4224170685734303e-06,
"loss": 1.4404422044754028,
"step": 1704
},
{
"epoch": 2.6085626911314987,
"grad_norm": 0.7305318117141724,
"learning_rate": 1.416020490159152e-06,
"loss": 1.6482999324798584,
"step": 1706
},
{
"epoch": 2.611620795107034,
"grad_norm": 0.5728065371513367,
"learning_rate": 1.4096703651088848e-06,
"loss": 1.1557910442352295,
"step": 1708
},
{
"epoch": 2.614678899082569,
"grad_norm": 0.6479355096817017,
"learning_rate": 1.4033667656523405e-06,
"loss": 1.4093899726867676,
"step": 1710
},
{
"epoch": 2.617737003058104,
"grad_norm": 1.1274484395980835,
"learning_rate": 1.3971097634900262e-06,
"loss": 1.4923943281173706,
"step": 1712
},
{
"epoch": 2.620795107033639,
"grad_norm": 0.5374640822410583,
"learning_rate": 1.3908994297924275e-06,
"loss": 1.3800336122512817,
"step": 1714
},
{
"epoch": 2.623853211009174,
"grad_norm": 0.6038364171981812,
"learning_rate": 1.3847358351991945e-06,
"loss": 1.2194199562072754,
"step": 1716
},
{
"epoch": 2.6269113149847096,
"grad_norm": 0.7064008712768555,
"learning_rate": 1.3786190498183446e-06,
"loss": 0.8604775667190552,
"step": 1718
},
{
"epoch": 2.6299694189602447,
"grad_norm": 0.3798482418060303,
"learning_rate": 1.3725491432254627e-06,
"loss": 1.5459158420562744,
"step": 1720
},
{
"epoch": 2.63302752293578,
"grad_norm": 0.47553232312202454,
"learning_rate": 1.3665261844629053e-06,
"loss": 1.466538429260254,
"step": 1722
},
{
"epoch": 2.636085626911315,
"grad_norm": 0.3397771716117859,
"learning_rate": 1.360550242039024e-06,
"loss": 1.3562582731246948,
"step": 1724
},
{
"epoch": 2.6391437308868504,
"grad_norm": 0.282279908657074,
"learning_rate": 1.354621383927379e-06,
"loss": 1.4752657413482666,
"step": 1726
},
{
"epoch": 2.6422018348623855,
"grad_norm": 0.3183048963546753,
"learning_rate": 1.3487396775659691e-06,
"loss": 1.4154858589172363,
"step": 1728
},
{
"epoch": 2.6452599388379205,
"grad_norm": 0.4210142493247986,
"learning_rate": 1.3429051898564623e-06,
"loss": 1.3750901222229004,
"step": 1730
},
{
"epoch": 2.6483180428134556,
"grad_norm": 0.6870266795158386,
"learning_rate": 1.337117987163439e-06,
"loss": 1.5814931392669678,
"step": 1732
},
{
"epoch": 2.6513761467889907,
"grad_norm": 0.4824894964694977,
"learning_rate": 1.3313781353136329e-06,
"loss": 1.2281584739685059,
"step": 1734
},
{
"epoch": 2.6544342507645258,
"grad_norm": 0.2543982267379761,
"learning_rate": 1.3256856995951852e-06,
"loss": 1.0042641162872314,
"step": 1736
},
{
"epoch": 2.6574923547400613,
"grad_norm": 0.39150846004486084,
"learning_rate": 1.3200407447568985e-06,
"loss": 1.6282243728637695,
"step": 1738
},
{
"epoch": 2.6605504587155964,
"grad_norm": 0.43744921684265137,
"learning_rate": 1.3144433350075045e-06,
"loss": 1.419670820236206,
"step": 1740
},
{
"epoch": 2.6636085626911314,
"grad_norm": 0.5169599652290344,
"learning_rate": 1.3088935340149312e-06,
"loss": 1.5492973327636719,
"step": 1742
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.3686998188495636,
"learning_rate": 1.3033914049055776e-06,
"loss": 1.390296459197998,
"step": 1744
},
{
"epoch": 2.669724770642202,
"grad_norm": 0.3961811363697052,
"learning_rate": 1.2979370102636001e-06,
"loss": 1.6185352802276611,
"step": 1746
},
{
"epoch": 2.672782874617737,
"grad_norm": 0.4181622266769409,
"learning_rate": 1.2925304121301956e-06,
"loss": 1.47446608543396,
"step": 1748
},
{
"epoch": 2.675840978593272,
"grad_norm": 0.5175849199295044,
"learning_rate": 1.2871716720029001e-06,
"loss": 1.4941065311431885,
"step": 1750
},
{
"epoch": 2.6788990825688073,
"grad_norm": 0.4671924412250519,
"learning_rate": 1.2818608508348831e-06,
"loss": 1.3738720417022705,
"step": 1752
},
{
"epoch": 2.6819571865443423,
"grad_norm": 0.31229135394096375,
"learning_rate": 1.2765980090342638e-06,
"loss": 1.0343739986419678,
"step": 1754
},
{
"epoch": 2.6850152905198774,
"grad_norm": 0.5780667662620544,
"learning_rate": 1.2713832064634127e-06,
"loss": 1.4987692832946777,
"step": 1756
},
{
"epoch": 2.688073394495413,
"grad_norm": 0.29605942964553833,
"learning_rate": 1.2662165024382813e-06,
"loss": 1.4711230993270874,
"step": 1758
},
{
"epoch": 2.691131498470948,
"grad_norm": 0.4572795629501343,
"learning_rate": 1.2610979557277186e-06,
"loss": 1.4898228645324707,
"step": 1760
},
{
"epoch": 2.694189602446483,
"grad_norm": 0.5139583945274353,
"learning_rate": 1.2560276245528099e-06,
"loss": 1.4924449920654297,
"step": 1762
},
{
"epoch": 2.6972477064220186,
"grad_norm": 0.3455151319503784,
"learning_rate": 1.251005566586209e-06,
"loss": 1.3008229732513428,
"step": 1764
},
{
"epoch": 2.7003058103975537,
"grad_norm": 0.5034812092781067,
"learning_rate": 1.2460318389514868e-06,
"loss": 1.5259795188903809,
"step": 1766
},
{
"epoch": 2.7033639143730888,
"grad_norm": 0.55739825963974,
"learning_rate": 1.241106498222476e-06,
"loss": 1.610971212387085,
"step": 1768
},
{
"epoch": 2.706422018348624,
"grad_norm": 0.3922676146030426,
"learning_rate": 1.2362296004226327e-06,
"loss": 1.3188968896865845,
"step": 1770
},
{
"epoch": 2.709480122324159,
"grad_norm": 0.4953126311302185,
"learning_rate": 1.2314012010243973e-06,
"loss": 1.5828558206558228,
"step": 1772
},
{
"epoch": 2.712538226299694,
"grad_norm": 0.6791023015975952,
"learning_rate": 1.2266213549485638e-06,
"loss": 1.3703022003173828,
"step": 1774
},
{
"epoch": 2.7155963302752295,
"grad_norm": 0.37211811542510986,
"learning_rate": 1.2218901165636526e-06,
"loss": 1.504420280456543,
"step": 1776
},
{
"epoch": 2.7186544342507646,
"grad_norm": 0.2997111678123474,
"learning_rate": 1.2172075396852972e-06,
"loss": 1.442054271697998,
"step": 1778
},
{
"epoch": 2.7217125382262997,
"grad_norm": 0.3290131390094757,
"learning_rate": 1.212573677575627e-06,
"loss": 1.5728079080581665,
"step": 1780
},
{
"epoch": 2.7247706422018347,
"grad_norm": 0.3726375102996826,
"learning_rate": 1.2079885829426653e-06,
"loss": 1.6637623310089111,
"step": 1782
},
{
"epoch": 2.7278287461773703,
"grad_norm": 0.7502315640449524,
"learning_rate": 1.2034523079397264e-06,
"loss": 1.550297737121582,
"step": 1784
},
{
"epoch": 2.7308868501529053,
"grad_norm": 0.3677420914173126,
"learning_rate": 1.1989649041648244e-06,
"loss": 1.3913054466247559,
"step": 1786
},
{
"epoch": 2.7339449541284404,
"grad_norm": 0.6194299459457397,
"learning_rate": 1.1945264226600878e-06,
"loss": 1.49534010887146,
"step": 1788
},
{
"epoch": 2.7370030581039755,
"grad_norm": 0.42255425453186035,
"learning_rate": 1.1901369139111737e-06,
"loss": 1.5017262697219849,
"step": 1790
},
{
"epoch": 2.7400611620795106,
"grad_norm": 0.39475998282432556,
"learning_rate": 1.1857964278467003e-06,
"loss": 1.4985376596450806,
"step": 1792
},
{
"epoch": 2.7431192660550456,
"grad_norm": 0.4835125207901001,
"learning_rate": 1.1815050138376731e-06,
"loss": 1.513980746269226,
"step": 1794
},
{
"epoch": 2.746177370030581,
"grad_norm": 0.27400922775268555,
"learning_rate": 1.1772627206969286e-06,
"loss": 1.5117716789245605,
"step": 1796
},
{
"epoch": 2.7492354740061162,
"grad_norm": 0.35452115535736084,
"learning_rate": 1.1730695966785726e-06,
"loss": 1.3024158477783203,
"step": 1798
},
{
"epoch": 2.7522935779816513,
"grad_norm": 0.45254552364349365,
"learning_rate": 1.1689256894774384e-06,
"loss": 1.3760697841644287,
"step": 1800
},
{
"epoch": 2.7553516819571864,
"grad_norm": 0.6041072010993958,
"learning_rate": 1.1648310462285386e-06,
"loss": 1.298436164855957,
"step": 1802
},
{
"epoch": 2.758409785932722,
"grad_norm": 0.555728554725647,
"learning_rate": 1.1607857135065337e-06,
"loss": 1.3885629177093506,
"step": 1804
},
{
"epoch": 2.761467889908257,
"grad_norm": 0.5937597751617432,
"learning_rate": 1.1567897373251967e-06,
"loss": 1.3754394054412842,
"step": 1806
},
{
"epoch": 2.764525993883792,
"grad_norm": 0.35898932814598083,
"learning_rate": 1.1528431631368957e-06,
"loss": 1.2469127178192139,
"step": 1808
},
{
"epoch": 2.767584097859327,
"grad_norm": 0.24282048642635345,
"learning_rate": 1.1489460358320728e-06,
"loss": 0.9015558958053589,
"step": 1810
},
{
"epoch": 2.770642201834862,
"grad_norm": 0.27484798431396484,
"learning_rate": 1.1450983997387365e-06,
"loss": 1.2076148986816406,
"step": 1812
},
{
"epoch": 2.7737003058103973,
"grad_norm": 0.29970651865005493,
"learning_rate": 1.1413002986219528e-06,
"loss": 1.2744965553283691,
"step": 1814
},
{
"epoch": 2.776758409785933,
"grad_norm": 0.26047366857528687,
"learning_rate": 1.1375517756833534e-06,
"loss": 1.3271204233169556,
"step": 1816
},
{
"epoch": 2.779816513761468,
"grad_norm": 0.3544829785823822,
"learning_rate": 1.1338528735606391e-06,
"loss": 1.3407413959503174,
"step": 1818
},
{
"epoch": 2.782874617737003,
"grad_norm": 0.24868814647197723,
"learning_rate": 1.1302036343270996e-06,
"loss": 1.4030461311340332,
"step": 1820
},
{
"epoch": 2.7859327217125385,
"grad_norm": 0.30862292647361755,
"learning_rate": 1.12660409949113e-06,
"loss": 1.3144700527191162,
"step": 1822
},
{
"epoch": 2.7889908256880735,
"grad_norm": 0.9225071668624878,
"learning_rate": 1.1230543099957608e-06,
"loss": 1.338538646697998,
"step": 1824
},
{
"epoch": 2.7920489296636086,
"grad_norm": 0.32354745268821716,
"learning_rate": 1.1195543062181954e-06,
"loss": 1.310173749923706,
"step": 1826
},
{
"epoch": 2.7951070336391437,
"grad_norm": 0.24064457416534424,
"learning_rate": 1.1161041279693445e-06,
"loss": 1.3204376697540283,
"step": 1828
},
{
"epoch": 2.7981651376146788,
"grad_norm": 0.23651309311389923,
"learning_rate": 1.1127038144933787e-06,
"loss": 1.281717300415039,
"step": 1830
},
{
"epoch": 2.801223241590214,
"grad_norm": 0.21533581614494324,
"learning_rate": 1.1093534044672796e-06,
"loss": 1.3252437114715576,
"step": 1832
},
{
"epoch": 2.8042813455657494,
"grad_norm": 0.38182252645492554,
"learning_rate": 1.1060529360004003e-06,
"loss": 1.27931809425354,
"step": 1834
},
{
"epoch": 2.8073394495412844,
"grad_norm": 0.12391169369220734,
"learning_rate": 1.1028024466340305e-06,
"loss": 1.1552488803863525,
"step": 1836
},
{
"epoch": 2.8103975535168195,
"grad_norm": 0.17293956875801086,
"learning_rate": 1.0996019733409732e-06,
"loss": 1.2036254405975342,
"step": 1838
},
{
"epoch": 2.8134556574923546,
"grad_norm": 0.21059419214725494,
"learning_rate": 1.096451552525121e-06,
"loss": 0.9850409030914307,
"step": 1840
},
{
"epoch": 2.81651376146789,
"grad_norm": 0.2714180648326874,
"learning_rate": 1.093351220021043e-06,
"loss": 1.2215778827667236,
"step": 1842
},
{
"epoch": 2.819571865443425,
"grad_norm": 0.22156941890716553,
"learning_rate": 1.090301011093575e-06,
"loss": 1.2629544734954834,
"step": 1844
},
{
"epoch": 2.8226299694189603,
"grad_norm": 0.20625340938568115,
"learning_rate": 1.0873009604374246e-06,
"loss": 1.2778034210205078,
"step": 1846
},
{
"epoch": 2.8256880733944953,
"grad_norm": 0.29442811012268066,
"learning_rate": 1.084351102176769e-06,
"loss": 1.2413357496261597,
"step": 1848
},
{
"epoch": 2.8287461773700304,
"grad_norm": 0.18544712662696838,
"learning_rate": 1.081451469864872e-06,
"loss": 1.2637240886688232,
"step": 1850
},
{
"epoch": 2.8318042813455655,
"grad_norm": 0.22874392569065094,
"learning_rate": 1.0786020964836991e-06,
"loss": 1.2410205602645874,
"step": 1852
},
{
"epoch": 2.834862385321101,
"grad_norm": 0.2457342892885208,
"learning_rate": 1.075803014443546e-06,
"loss": 1.2094589471817017,
"step": 1854
},
{
"epoch": 2.837920489296636,
"grad_norm": 0.22759026288986206,
"learning_rate": 1.0730542555826654e-06,
"loss": 1.274350643157959,
"step": 1856
},
{
"epoch": 2.840978593272171,
"grad_norm": 0.206235870718956,
"learning_rate": 1.07035585116691e-06,
"loss": 1.245356559753418,
"step": 1858
},
{
"epoch": 2.8440366972477067,
"grad_norm": 0.49194467067718506,
"learning_rate": 1.0677078318893716e-06,
"loss": 1.2151732444763184,
"step": 1860
},
{
"epoch": 2.8470948012232418,
"grad_norm": 0.33920061588287354,
"learning_rate": 1.0651102278700364e-06,
"loss": 1.2073887586593628,
"step": 1862
},
{
"epoch": 2.850152905198777,
"grad_norm": 0.25718092918395996,
"learning_rate": 1.062563068655439e-06,
"loss": 1.2325494289398193,
"step": 1864
},
{
"epoch": 2.853211009174312,
"grad_norm": 0.24365228414535522,
"learning_rate": 1.0600663832183293e-06,
"loss": 1.2226455211639404,
"step": 1866
},
{
"epoch": 2.856269113149847,
"grad_norm": 0.19332216680049896,
"learning_rate": 1.0576201999573405e-06,
"loss": 1.1831451654434204,
"step": 1868
},
{
"epoch": 2.859327217125382,
"grad_norm": 0.25319862365722656,
"learning_rate": 1.0552245466966678e-06,
"loss": 1.2440452575683594,
"step": 1870
},
{
"epoch": 2.8623853211009176,
"grad_norm": 0.27022072672843933,
"learning_rate": 1.0528794506857508e-06,
"loss": 1.2725245952606201,
"step": 1872
},
{
"epoch": 2.8654434250764527,
"grad_norm": 0.3112826943397522,
"learning_rate": 1.050584938598963e-06,
"loss": 1.282654047012329,
"step": 1874
},
{
"epoch": 2.8685015290519877,
"grad_norm": 0.2421792596578598,
"learning_rate": 1.048341036535311e-06,
"loss": 1.273242712020874,
"step": 1876
},
{
"epoch": 2.871559633027523,
"grad_norm": 0.23541022837162018,
"learning_rate": 1.0461477700181355e-06,
"loss": 1.2899906635284424,
"step": 1878
},
{
"epoch": 2.8746177370030583,
"grad_norm": 0.2772025167942047,
"learning_rate": 1.044005163994821e-06,
"loss": 1.2756202220916748,
"step": 1880
},
{
"epoch": 2.8776758409785934,
"grad_norm": 0.47361937165260315,
"learning_rate": 1.0419132428365116e-06,
"loss": 1.2930552959442139,
"step": 1882
},
{
"epoch": 2.8807339449541285,
"grad_norm": 0.18241485953330994,
"learning_rate": 1.0398720303378374e-06,
"loss": 1.223031997680664,
"step": 1884
},
{
"epoch": 2.8837920489296636,
"grad_norm": 0.40437427163124084,
"learning_rate": 1.0378815497166385e-06,
"loss": 1.2670063972473145,
"step": 1886
},
{
"epoch": 2.8868501529051986,
"grad_norm": 0.22389701008796692,
"learning_rate": 1.0359418236137047e-06,
"loss": 1.2270456552505493,
"step": 1888
},
{
"epoch": 2.8899082568807337,
"grad_norm": 0.29309970140457153,
"learning_rate": 1.0340528740925169e-06,
"loss": 1.2563271522521973,
"step": 1890
},
{
"epoch": 2.8929663608562692,
"grad_norm": 0.24637004733085632,
"learning_rate": 1.0322147226389952e-06,
"loss": 1.2668583393096924,
"step": 1892
},
{
"epoch": 2.8960244648318043,
"grad_norm": 0.5765001177787781,
"learning_rate": 1.0304273901612566e-06,
"loss": 1.2873437404632568,
"step": 1894
},
{
"epoch": 2.8990825688073394,
"grad_norm": 0.3287610411643982,
"learning_rate": 1.028690896989375e-06,
"loss": 1.274024248123169,
"step": 1896
},
{
"epoch": 2.9021406727828745,
"grad_norm": 0.2688363492488861,
"learning_rate": 1.027005262875151e-06,
"loss": 1.20585036277771,
"step": 1898
},
{
"epoch": 2.90519877675841,
"grad_norm": 0.3984238803386688,
"learning_rate": 1.0253705069918865e-06,
"loss": 1.2360919713974,
"step": 1900
},
{
"epoch": 2.908256880733945,
"grad_norm": 0.27637046575546265,
"learning_rate": 1.0237866479341687e-06,
"loss": 1.2752952575683594,
"step": 1902
},
{
"epoch": 2.91131498470948,
"grad_norm": 0.5071486234664917,
"learning_rate": 1.0222537037176572e-06,
"loss": 1.2954089641571045,
"step": 1904
},
{
"epoch": 2.914373088685015,
"grad_norm": 0.22012606263160706,
"learning_rate": 1.0207716917788768e-06,
"loss": 1.2765629291534424,
"step": 1906
},
{
"epoch": 2.9174311926605503,
"grad_norm": 0.20149464905261993,
"learning_rate": 1.019340628975023e-06,
"loss": 1.2535219192504883,
"step": 1908
},
{
"epoch": 2.9204892966360854,
"grad_norm": 0.227265864610672,
"learning_rate": 1.0179605315837695e-06,
"loss": 1.2175259590148926,
"step": 1910
},
{
"epoch": 2.923547400611621,
"grad_norm": 0.2566111087799072,
"learning_rate": 1.0166314153030799e-06,
"loss": 1.255599856376648,
"step": 1912
},
{
"epoch": 2.926605504587156,
"grad_norm": 0.38341450691223145,
"learning_rate": 1.0153532952510328e-06,
"loss": 1.2794301509857178,
"step": 1914
},
{
"epoch": 2.929663608562691,
"grad_norm": 0.28000977635383606,
"learning_rate": 1.0141261859656484e-06,
"loss": 1.2272768020629883,
"step": 1916
},
{
"epoch": 2.9327217125382266,
"grad_norm": 0.2550158202648163,
"learning_rate": 1.0129501014047236e-06,
"loss": 1.2561171054840088,
"step": 1918
},
{
"epoch": 2.9357798165137616,
"grad_norm": 0.21566316485404968,
"learning_rate": 1.0118250549456717e-06,
"loss": 1.2545552253723145,
"step": 1920
},
{
"epoch": 2.9388379204892967,
"grad_norm": 0.36798691749572754,
"learning_rate": 1.0107510593853716e-06,
"loss": 1.3016841411590576,
"step": 1922
},
{
"epoch": 2.941896024464832,
"grad_norm": 0.29115161299705505,
"learning_rate": 1.0097281269400234e-06,
"loss": 1.3122904300689697,
"step": 1924
},
{
"epoch": 2.944954128440367,
"grad_norm": 0.42286819219589233,
"learning_rate": 1.0087562692450062e-06,
"loss": 1.2751294374465942,
"step": 1926
},
{
"epoch": 2.948012232415902,
"grad_norm": 0.29917454719543457,
"learning_rate": 1.0078354973547484e-06,
"loss": 1.2971951961517334,
"step": 1928
},
{
"epoch": 2.9510703363914375,
"grad_norm": 0.28312069177627563,
"learning_rate": 1.0069658217426017e-06,
"loss": 1.2662827968597412,
"step": 1930
},
{
"epoch": 2.9541284403669725,
"grad_norm": 0.2748239040374756,
"learning_rate": 1.0061472523007213e-06,
"loss": 1.209917664527893,
"step": 1932
},
{
"epoch": 2.9571865443425076,
"grad_norm": 0.36147835850715637,
"learning_rate": 1.0053797983399524e-06,
"loss": 1.2387361526489258,
"step": 1934
},
{
"epoch": 2.9602446483180427,
"grad_norm": 0.34865546226501465,
"learning_rate": 1.004663468589726e-06,
"loss": 1.2596259117126465,
"step": 1936
},
{
"epoch": 2.963302752293578,
"grad_norm": 0.23798368871212006,
"learning_rate": 1.0039982711979603e-06,
"loss": 1.239612340927124,
"step": 1938
},
{
"epoch": 2.9663608562691133,
"grad_norm": 0.31115320324897766,
"learning_rate": 1.0033842137309649e-06,
"loss": 1.2498747110366821,
"step": 1940
},
{
"epoch": 2.9694189602446484,
"grad_norm": 0.37815067172050476,
"learning_rate": 1.0028213031733578e-06,
"loss": 1.3014090061187744,
"step": 1942
},
{
"epoch": 2.9724770642201834,
"grad_norm": 0.26476937532424927,
"learning_rate": 1.0023095459279838e-06,
"loss": 1.2854735851287842,
"step": 1944
},
{
"epoch": 2.9755351681957185,
"grad_norm": 0.3802984952926636,
"learning_rate": 1.0018489478158434e-06,
"loss": 1.3032188415527344,
"step": 1946
},
{
"epoch": 2.9785932721712536,
"grad_norm": 0.3544924855232239,
"learning_rate": 1.0014395140760255e-06,
"loss": 1.2610487937927246,
"step": 1948
},
{
"epoch": 2.981651376146789,
"grad_norm": 0.30221831798553467,
"learning_rate": 1.0010812493656488e-06,
"loss": 1.2582671642303467,
"step": 1950
},
{
"epoch": 2.984709480122324,
"grad_norm": 0.2731051743030548,
"learning_rate": 1.000774157759806e-06,
"loss": 1.2794151306152344,
"step": 1952
},
{
"epoch": 2.9877675840978593,
"grad_norm": 0.3089560270309448,
"learning_rate": 1.0005182427515222e-06,
"loss": 1.334507703781128,
"step": 1954
},
{
"epoch": 2.9908256880733948,
"grad_norm": 0.31155917048454285,
"learning_rate": 1.0003135072517108e-06,
"loss": 1.3732435703277588,
"step": 1956
},
{
"epoch": 2.99388379204893,
"grad_norm": 0.3963629901409149,
"learning_rate": 1.000159953589143e-06,
"loss": 1.6014021635055542,
"step": 1958
},
{
"epoch": 2.996941896024465,
"grad_norm": 0.8739917278289795,
"learning_rate": 1.00005758351042e-06,
"loss": 1.5767264366149902,
"step": 1960
},
{
"epoch": 3.0,
"grad_norm": 1.2575660943984985,
"learning_rate": 1.0000063981799541e-06,
"loss": 1.7074545621871948,
"step": 1962
},
{
"epoch": 3.0,
"step": 1962,
"total_flos": 2.4882019125669396e+18,
"train_loss": 1.4736498374943825,
"train_runtime": 8380.6004,
"train_samples_per_second": 3.746,
"train_steps_per_second": 0.234
}
],
"logging_steps": 2,
"max_steps": 1962,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 9999999,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.4882019125669396e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}