| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1962, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0030581039755351682, |
| "grad_norm": 0.6435028314590454, |
| "learning_rate": 1.0101010101010103e-07, |
| "loss": 1.8936554193496704, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0061162079510703364, |
| "grad_norm": 0.5548882484436035, |
| "learning_rate": 3.0303030303030305e-07, |
| "loss": 1.8550586700439453, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.009174311926605505, |
| "grad_norm": 0.27108362317085266, |
| "learning_rate": 5.05050505050505e-07, |
| "loss": 1.890197992324829, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.012232415902140673, |
| "grad_norm": 0.24754057824611664, |
| "learning_rate": 7.070707070707071e-07, |
| "loss": 1.8445472717285156, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01529051987767584, |
| "grad_norm": 0.39890649914741516, |
| "learning_rate": 9.090909090909091e-07, |
| "loss": 2.010572910308838, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01834862385321101, |
| "grad_norm": 0.23249551653862, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 1.8801705837249756, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.021406727828746176, |
| "grad_norm": 0.4299562871456146, |
| "learning_rate": 1.3131313131313134e-06, |
| "loss": 1.8805203437805176, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.024464831804281346, |
| "grad_norm": 0.5231528282165527, |
| "learning_rate": 1.5151515151515152e-06, |
| "loss": 1.9465537071228027, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.027522935779816515, |
| "grad_norm": 0.3482355773448944, |
| "learning_rate": 1.7171717171717173e-06, |
| "loss": 1.8298053741455078, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03058103975535168, |
| "grad_norm": 0.3003389239311218, |
| "learning_rate": 1.9191919191919192e-06, |
| "loss": 1.853845238685608, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03363914373088685, |
| "grad_norm": 0.5087025165557861, |
| "learning_rate": 2.1212121212121216e-06, |
| "loss": 1.9923889636993408, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03669724770642202, |
| "grad_norm": 2.0046560764312744, |
| "learning_rate": 2.3232323232323234e-06, |
| "loss": 2.008021354675293, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.039755351681957186, |
| "grad_norm": 0.2651369571685791, |
| "learning_rate": 2.5252525252525258e-06, |
| "loss": 1.7058303356170654, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04281345565749235, |
| "grad_norm": 0.5547925233840942, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 1.8821287155151367, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.045871559633027525, |
| "grad_norm": 0.5607280731201172, |
| "learning_rate": 2.9292929292929295e-06, |
| "loss": 2.1788079738616943, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04892966360856269, |
| "grad_norm": 0.36416563391685486, |
| "learning_rate": 3.131313131313132e-06, |
| "loss": 1.8534326553344727, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.05198776758409786, |
| "grad_norm": 0.4965146481990814, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.9557833671569824, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.05504587155963303, |
| "grad_norm": 0.3163432776927948, |
| "learning_rate": 3.5353535353535356e-06, |
| "loss": 1.7984235286712646, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0581039755351682, |
| "grad_norm": 0.3063645362854004, |
| "learning_rate": 3.737373737373738e-06, |
| "loss": 1.8264985084533691, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.06116207951070336, |
| "grad_norm": 0.30639225244522095, |
| "learning_rate": 3.93939393939394e-06, |
| "loss": 1.8241571187973022, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06422018348623854, |
| "grad_norm": 0.3971042335033417, |
| "learning_rate": 4.141414141414142e-06, |
| "loss": 1.874243974685669, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0672782874617737, |
| "grad_norm": 0.6156560182571411, |
| "learning_rate": 4.343434343434344e-06, |
| "loss": 1.965466022491455, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.07033639143730887, |
| "grad_norm": 0.5533192753791809, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 2.0693740844726562, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07339449541284404, |
| "grad_norm": 1.9126055240631104, |
| "learning_rate": 4.747474747474748e-06, |
| "loss": 2.060253143310547, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0764525993883792, |
| "grad_norm": 0.3860923647880554, |
| "learning_rate": 4.94949494949495e-06, |
| "loss": 1.8577625751495361, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07951070336391437, |
| "grad_norm": 0.4684409499168396, |
| "learning_rate": 5.151515151515152e-06, |
| "loss": 1.8510971069335938, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.08256880733944955, |
| "grad_norm": 0.4307204484939575, |
| "learning_rate": 5.353535353535354e-06, |
| "loss": 1.9931628704071045, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0856269113149847, |
| "grad_norm": 0.3140373229980469, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 1.925836443901062, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.08868501529051988, |
| "grad_norm": 0.36317509412765503, |
| "learning_rate": 5.7575757575757586e-06, |
| "loss": 1.9616905450820923, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.09174311926605505, |
| "grad_norm": 0.21478985249996185, |
| "learning_rate": 5.95959595959596e-06, |
| "loss": 1.895378589630127, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09480122324159021, |
| "grad_norm": 0.2936638593673706, |
| "learning_rate": 6.1616161616161615e-06, |
| "loss": 1.8279492855072021, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.09785932721712538, |
| "grad_norm": 0.3114721179008484, |
| "learning_rate": 6.363636363636364e-06, |
| "loss": 1.715104103088379, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.10091743119266056, |
| "grad_norm": 0.32813334465026855, |
| "learning_rate": 6.565656565656566e-06, |
| "loss": 1.852712631225586, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.10397553516819572, |
| "grad_norm": 0.37994885444641113, |
| "learning_rate": 6.767676767676769e-06, |
| "loss": 1.9753448963165283, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.10703363914373089, |
| "grad_norm": 0.5206537246704102, |
| "learning_rate": 6.969696969696971e-06, |
| "loss": 1.8388103246688843, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.11009174311926606, |
| "grad_norm": 0.6430595517158508, |
| "learning_rate": 7.171717171717172e-06, |
| "loss": 2.0399489402770996, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.11314984709480122, |
| "grad_norm": 0.5809399485588074, |
| "learning_rate": 7.373737373737374e-06, |
| "loss": 2.1389784812927246, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1162079510703364, |
| "grad_norm": 1.2094364166259766, |
| "learning_rate": 7.5757575757575764e-06, |
| "loss": 1.9202568531036377, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.11926605504587157, |
| "grad_norm": 0.7485645413398743, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 2.2573585510253906, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.12232415902140673, |
| "grad_norm": 0.47476136684417725, |
| "learning_rate": 7.97979797979798e-06, |
| "loss": 1.8947498798370361, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.12538226299694188, |
| "grad_norm": 0.24537041783332825, |
| "learning_rate": 8.181818181818183e-06, |
| "loss": 1.636450171470642, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.12844036697247707, |
| "grad_norm": 0.4732670783996582, |
| "learning_rate": 8.383838383838384e-06, |
| "loss": 1.818341612815857, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.13149847094801223, |
| "grad_norm": 0.37070026993751526, |
| "learning_rate": 8.585858585858587e-06, |
| "loss": 1.845613718032837, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1345565749235474, |
| "grad_norm": 0.3881911635398865, |
| "learning_rate": 8.787878787878788e-06, |
| "loss": 1.7559518814086914, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.13761467889908258, |
| "grad_norm": 0.45207998156547546, |
| "learning_rate": 8.98989898989899e-06, |
| "loss": 1.7992792129516602, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.14067278287461774, |
| "grad_norm": 0.1907433420419693, |
| "learning_rate": 9.191919191919193e-06, |
| "loss": 1.8380980491638184, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1437308868501529, |
| "grad_norm": 0.2265041321516037, |
| "learning_rate": 9.393939393939396e-06, |
| "loss": 1.9353697299957275, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.14678899082568808, |
| "grad_norm": 0.5571039319038391, |
| "learning_rate": 9.595959595959597e-06, |
| "loss": 1.861445665359497, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.14984709480122324, |
| "grad_norm": 0.318570613861084, |
| "learning_rate": 9.797979797979798e-06, |
| "loss": 1.7963485717773438, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1529051987767584, |
| "grad_norm": 0.35685858130455017, |
| "learning_rate": 1e-05, |
| "loss": 1.955026626586914, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1559633027522936, |
| "grad_norm": 0.7966809272766113, |
| "learning_rate": 9.99997440729838e-06, |
| "loss": 1.8856327533721924, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.15902140672782875, |
| "grad_norm": 0.2650541663169861, |
| "learning_rate": 9.999897629484621e-06, |
| "loss": 1.814586877822876, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1620795107033639, |
| "grad_norm": 0.36088353395462036, |
| "learning_rate": 9.999769667432037e-06, |
| "loss": 1.8607715368270874, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1651376146788991, |
| "grad_norm": 0.6270299553871155, |
| "learning_rate": 9.999590522596136e-06, |
| "loss": 1.9078267812728882, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.16819571865443425, |
| "grad_norm": 0.27504709362983704, |
| "learning_rate": 9.999360197014607e-06, |
| "loss": 1.9029535055160522, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1712538226299694, |
| "grad_norm": 0.5007109642028809, |
| "learning_rate": 9.999078693307296e-06, |
| "loss": 1.7704020738601685, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1743119266055046, |
| "grad_norm": 0.5426493883132935, |
| "learning_rate": 9.99874601467618e-06, |
| "loss": 1.8907287120819092, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.17737003058103976, |
| "grad_norm": 0.26077231764793396, |
| "learning_rate": 9.998362164905318e-06, |
| "loss": 1.760542869567871, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.18042813455657492, |
| "grad_norm": 0.37686067819595337, |
| "learning_rate": 9.997927148360824e-06, |
| "loss": 1.995668649673462, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1834862385321101, |
| "grad_norm": 0.4259154498577118, |
| "learning_rate": 9.99744096999081e-06, |
| "loss": 1.8606561422348022, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.18654434250764526, |
| "grad_norm": 0.3365345299243927, |
| "learning_rate": 9.996903635325326e-06, |
| "loss": 1.909229040145874, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.18960244648318042, |
| "grad_norm": 0.25919589400291443, |
| "learning_rate": 9.996315150476308e-06, |
| "loss": 1.9200305938720703, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1926605504587156, |
| "grad_norm": 0.2932458221912384, |
| "learning_rate": 9.995675522137492e-06, |
| "loss": 1.8696832656860352, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.19571865443425077, |
| "grad_norm": 0.38474535942077637, |
| "learning_rate": 9.994984757584353e-06, |
| "loss": 1.828667402267456, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.19877675840978593, |
| "grad_norm": 0.3214952349662781, |
| "learning_rate": 9.994242864674021e-06, |
| "loss": 1.8718284368515015, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2018348623853211, |
| "grad_norm": 0.33034268021583557, |
| "learning_rate": 9.993449851845176e-06, |
| "loss": 1.8226697444915771, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.20489296636085627, |
| "grad_norm": 0.8973183631896973, |
| "learning_rate": 9.992605728117972e-06, |
| "loss": 1.9453703165054321, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.20795107033639143, |
| "grad_norm": 0.6750196218490601, |
| "learning_rate": 9.991710503093923e-06, |
| "loss": 1.820605993270874, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.21100917431192662, |
| "grad_norm": 0.2680327594280243, |
| "learning_rate": 9.990764186955797e-06, |
| "loss": 1.711888074874878, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.21406727828746178, |
| "grad_norm": 0.3089163899421692, |
| "learning_rate": 9.989766790467498e-06, |
| "loss": 1.668878197669983, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.21712538226299694, |
| "grad_norm": 0.5638787746429443, |
| "learning_rate": 9.988718324973947e-06, |
| "loss": 1.7612136602401733, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.22018348623853212, |
| "grad_norm": 0.24349473416805267, |
| "learning_rate": 9.98761880240095e-06, |
| "loss": 1.6873559951782227, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.22324159021406728, |
| "grad_norm": 0.3549518585205078, |
| "learning_rate": 9.986468235255065e-06, |
| "loss": 1.743373990058899, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.22629969418960244, |
| "grad_norm": 0.44438421726226807, |
| "learning_rate": 9.985266636623457e-06, |
| "loss": 1.6509066820144653, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.22935779816513763, |
| "grad_norm": 0.46152663230895996, |
| "learning_rate": 9.984014020173748e-06, |
| "loss": 1.8014967441558838, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2324159021406728, |
| "grad_norm": 0.278169184923172, |
| "learning_rate": 9.98271040015387e-06, |
| "loss": 1.8622685670852661, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.23547400611620795, |
| "grad_norm": 0.3168479800224304, |
| "learning_rate": 9.981355791391891e-06, |
| "loss": 1.8940097093582153, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.23853211009174313, |
| "grad_norm": 0.3639688491821289, |
| "learning_rate": 9.979950209295855e-06, |
| "loss": 1.7917258739471436, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2415902140672783, |
| "grad_norm": 0.40860888361930847, |
| "learning_rate": 9.978493669853606e-06, |
| "loss": 1.8766049146652222, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.24464831804281345, |
| "grad_norm": 0.315494179725647, |
| "learning_rate": 9.976986189632597e-06, |
| "loss": 1.7932193279266357, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.24770642201834864, |
| "grad_norm": 0.3525390923023224, |
| "learning_rate": 9.975427785779717e-06, |
| "loss": 1.9470767974853516, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.25076452599388377, |
| "grad_norm": 0.33575552701950073, |
| "learning_rate": 9.97381847602108e-06, |
| "loss": 1.7163609266281128, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.25382262996941896, |
| "grad_norm": 1.193529725074768, |
| "learning_rate": 9.972158278661838e-06, |
| "loss": 1.877960205078125, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.25688073394495414, |
| "grad_norm": 0.348765105009079, |
| "learning_rate": 9.970447212585961e-06, |
| "loss": 1.6149842739105225, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2599388379204893, |
| "grad_norm": 0.5527969598770142, |
| "learning_rate": 9.968685297256027e-06, |
| "loss": 1.8597733974456787, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.26299694189602446, |
| "grad_norm": 0.656193196773529, |
| "learning_rate": 9.966872552713006e-06, |
| "loss": 1.5253994464874268, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.26605504587155965, |
| "grad_norm": 0.7701634764671326, |
| "learning_rate": 9.965008999576018e-06, |
| "loss": 1.5178442001342773, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2691131498470948, |
| "grad_norm": 0.3889455795288086, |
| "learning_rate": 9.963094659042113e-06, |
| "loss": 1.7432003021240234, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.27217125382262997, |
| "grad_norm": 0.7660208344459534, |
| "learning_rate": 9.961129552886024e-06, |
| "loss": 1.655880331993103, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.27522935779816515, |
| "grad_norm": 0.7760636210441589, |
| "learning_rate": 9.959113703459917e-06, |
| "loss": 1.9860963821411133, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2782874617737003, |
| "grad_norm": 1.5110101699829102, |
| "learning_rate": 9.957047133693141e-06, |
| "loss": 1.9139325618743896, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.28134556574923547, |
| "grad_norm": 1.1153804063796997, |
| "learning_rate": 9.954929867091961e-06, |
| "loss": 1.7500460147857666, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.28440366972477066, |
| "grad_norm": 0.3268054723739624, |
| "learning_rate": 9.952761927739303e-06, |
| "loss": 1.5284479856491089, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2874617737003058, |
| "grad_norm": 0.2701658308506012, |
| "learning_rate": 9.95054334029446e-06, |
| "loss": 1.5575287342071533, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.290519877675841, |
| "grad_norm": 0.5897979140281677, |
| "learning_rate": 9.948274129992838e-06, |
| "loss": 1.5360642671585083, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.29357798165137616, |
| "grad_norm": 3.0125443935394287, |
| "learning_rate": 9.945954322645643e-06, |
| "loss": 1.7250124216079712, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.2966360856269113, |
| "grad_norm": 0.22849687933921814, |
| "learning_rate": 9.9435839446396e-06, |
| "loss": 1.7317864894866943, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.2996941896024465, |
| "grad_norm": 0.41497474908828735, |
| "learning_rate": 9.941163022936659e-06, |
| "loss": 1.7118513584136963, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.30275229357798167, |
| "grad_norm": 0.43153518438339233, |
| "learning_rate": 9.938691585073677e-06, |
| "loss": 1.4813673496246338, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.3058103975535168, |
| "grad_norm": 0.2877158522605896, |
| "learning_rate": 9.936169659162105e-06, |
| "loss": 1.5152385234832764, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.308868501529052, |
| "grad_norm": 0.319741427898407, |
| "learning_rate": 9.933597273887676e-06, |
| "loss": 1.657623291015625, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3119266055045872, |
| "grad_norm": 0.4885481894016266, |
| "learning_rate": 9.930974458510074e-06, |
| "loss": 1.8340609073638916, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3149847094801223, |
| "grad_norm": 0.3470771312713623, |
| "learning_rate": 9.9283012428626e-06, |
| "loss": 1.8779006004333496, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3180428134556575, |
| "grad_norm": 0.21095849573612213, |
| "learning_rate": 9.92557765735184e-06, |
| "loss": 1.946405053138733, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3211009174311927, |
| "grad_norm": 0.4015672504901886, |
| "learning_rate": 9.922803732957309e-06, |
| "loss": 1.5457347631454468, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3241590214067278, |
| "grad_norm": 0.2712498903274536, |
| "learning_rate": 9.919979501231102e-06, |
| "loss": 1.6519064903259277, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.327217125382263, |
| "grad_norm": 0.24934278428554535, |
| "learning_rate": 9.917104994297543e-06, |
| "loss": 1.4617292881011963, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.3302752293577982, |
| "grad_norm": 0.22483140230178833, |
| "learning_rate": 9.914180244852804e-06, |
| "loss": 1.3875129222869873, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.6217460632324219, |
| "learning_rate": 9.911205286164553e-06, |
| "loss": 1.8669204711914062, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3363914373088685, |
| "grad_norm": 0.4357741177082062, |
| "learning_rate": 9.908180152071553e-06, |
| "loss": 1.666574239730835, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3394495412844037, |
| "grad_norm": 0.29025763273239136, |
| "learning_rate": 9.9051048769833e-06, |
| "loss": 1.810868263244629, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3425076452599388, |
| "grad_norm": 0.7838276624679565, |
| "learning_rate": 9.901979495879612e-06, |
| "loss": 1.3125014305114746, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.345565749235474, |
| "grad_norm": 0.2543538212776184, |
| "learning_rate": 9.898804044310245e-06, |
| "loss": 1.6106175184249878, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3486238532110092, |
| "grad_norm": 0.4557286500930786, |
| "learning_rate": 9.89557855839448e-06, |
| "loss": 1.886078953742981, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3516819571865443, |
| "grad_norm": 0.2689090073108673, |
| "learning_rate": 9.892303074820712e-06, |
| "loss": 1.631593108177185, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3547400611620795, |
| "grad_norm": 0.25291207432746887, |
| "learning_rate": 9.888977630846048e-06, |
| "loss": 1.7156798839569092, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3577981651376147, |
| "grad_norm": 0.3357708752155304, |
| "learning_rate": 9.88560226429586e-06, |
| "loss": 1.6416988372802734, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.36085626911314983, |
| "grad_norm": 0.3246925473213196, |
| "learning_rate": 9.88217701356337e-06, |
| "loss": 1.5658977031707764, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.363914373088685, |
| "grad_norm": 0.2840614318847656, |
| "learning_rate": 9.878701917609208e-06, |
| "loss": 1.6534138917922974, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 0.5397573709487915, |
| "learning_rate": 9.875177015960973e-06, |
| "loss": 1.7614964246749878, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.37003058103975534, |
| "grad_norm": 0.28763291239738464, |
| "learning_rate": 9.871602348712777e-06, |
| "loss": 1.5937902927398682, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3730886850152905, |
| "grad_norm": 0.21111302077770233, |
| "learning_rate": 9.867977956524798e-06, |
| "loss": 1.6914631128311157, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.3761467889908257, |
| "grad_norm": 0.5114771723747253, |
| "learning_rate": 9.864303880622806e-06, |
| "loss": 1.8919175863265991, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.37920489296636084, |
| "grad_norm": 0.4698966145515442, |
| "learning_rate": 9.8605801627977e-06, |
| "loss": 2.395404815673828, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.382262996941896, |
| "grad_norm": 0.604468047618866, |
| "learning_rate": 9.85680684540504e-06, |
| "loss": 1.523594617843628, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3853211009174312, |
| "grad_norm": 0.295039564371109, |
| "learning_rate": 9.852983971364549e-06, |
| "loss": 1.520268440246582, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.38837920489296635, |
| "grad_norm": 0.2590586245059967, |
| "learning_rate": 9.84911158415964e-06, |
| "loss": 1.5712318420410156, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.39143730886850153, |
| "grad_norm": 0.9178432822227478, |
| "learning_rate": 9.845189727836914e-06, |
| "loss": 1.7512378692626953, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3944954128440367, |
| "grad_norm": 0.512359619140625, |
| "learning_rate": 9.841218447005657e-06, |
| "loss": 1.677209496498108, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.39755351681957185, |
| "grad_norm": 0.8242136240005493, |
| "learning_rate": 9.837197786837341e-06, |
| "loss": 1.52079439163208, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.40061162079510704, |
| "grad_norm": 0.5057528614997864, |
| "learning_rate": 9.833127793065098e-06, |
| "loss": 1.3776154518127441, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4036697247706422, |
| "grad_norm": 0.287590891122818, |
| "learning_rate": 9.829008511983214e-06, |
| "loss": 1.313464879989624, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.40672782874617736, |
| "grad_norm": 0.22291725873947144, |
| "learning_rate": 9.82483999044659e-06, |
| "loss": 1.4770923852920532, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.40978593272171254, |
| "grad_norm": 0.4278978109359741, |
| "learning_rate": 9.820622275870219e-06, |
| "loss": 1.713256597518921, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.41284403669724773, |
| "grad_norm": 0.7735996246337891, |
| "learning_rate": 9.816355416228636e-06, |
| "loss": 1.7301435470581055, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.41590214067278286, |
| "grad_norm": 0.36943763494491577, |
| "learning_rate": 9.812039460055383e-06, |
| "loss": 1.746875286102295, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.41896024464831805, |
| "grad_norm": 0.30427658557891846, |
| "learning_rate": 9.807674456442448e-06, |
| "loss": 1.7644126415252686, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.42201834862385323, |
| "grad_norm": 0.2680354416370392, |
| "learning_rate": 9.80326045503972e-06, |
| "loss": 1.6075056791305542, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.42507645259938837, |
| "grad_norm": 0.5165081024169922, |
| "learning_rate": 9.798797506054398e-06, |
| "loss": 1.7466685771942139, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.42813455657492355, |
| "grad_norm": 0.46960580348968506, |
| "learning_rate": 9.794285660250457e-06, |
| "loss": 1.6852364540100098, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.43119266055045874, |
| "grad_norm": 0.3378291130065918, |
| "learning_rate": 9.789724968948034e-06, |
| "loss": 1.5493333339691162, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.43425076452599387, |
| "grad_norm": 0.2972247004508972, |
| "learning_rate": 9.78511548402287e-06, |
| "loss": 1.5161151885986328, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.43730886850152906, |
| "grad_norm": 0.3610173165798187, |
| "learning_rate": 9.780457257905708e-06, |
| "loss": 1.698796272277832, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.44036697247706424, |
| "grad_norm": 0.4165475070476532, |
| "learning_rate": 9.775750343581702e-06, |
| "loss": 1.4344041347503662, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4434250764525994, |
| "grad_norm": 0.565291702747345, |
| "learning_rate": 9.770994794589804e-06, |
| "loss": 1.6736053228378296, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.44648318042813456, |
| "grad_norm": 0.22272102534770966, |
| "learning_rate": 9.766190665022173e-06, |
| "loss": 1.515446424484253, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.44954128440366975, |
| "grad_norm": 0.292961061000824, |
| "learning_rate": 9.761338009523542e-06, |
| "loss": 1.5677558183670044, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4525993883792049, |
| "grad_norm": 0.22576913237571716, |
| "learning_rate": 9.756436883290608e-06, |
| "loss": 1.6895636320114136, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.45565749235474007, |
| "grad_norm": 0.514447808265686, |
| "learning_rate": 9.751487342071394e-06, |
| "loss": 1.6961359977722168, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "grad_norm": 0.4707038402557373, |
| "learning_rate": 9.74648944216463e-06, |
| "loss": 1.5364969968795776, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4617737003058104, |
| "grad_norm": 0.3324492871761322, |
| "learning_rate": 9.741443240419096e-06, |
| "loss": 1.4445494413375854, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.4648318042813456, |
| "grad_norm": 0.40139055252075195, |
| "learning_rate": 9.736348794232986e-06, |
| "loss": 1.631695032119751, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.46788990825688076, |
| "grad_norm": 0.32826143503189087, |
| "learning_rate": 9.731206161553253e-06, |
| "loss": 1.5630545616149902, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4709480122324159, |
| "grad_norm": 0.7137564420700073, |
| "learning_rate": 9.726015400874945e-06, |
| "loss": 1.7077264785766602, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4740061162079511, |
| "grad_norm": 0.5834897756576538, |
| "learning_rate": 9.72077657124055e-06, |
| "loss": 1.541429877281189, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.47706422018348627, |
| "grad_norm": 0.30517715215682983, |
| "learning_rate": 9.715489732239309e-06, |
| "loss": 1.486952781677246, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.4801223241590214, |
| "grad_norm": 0.39915895462036133, |
| "learning_rate": 9.710154944006558e-06, |
| "loss": 1.4761033058166504, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.4831804281345566, |
| "grad_norm": 0.24902665615081787, |
| "learning_rate": 9.70477226722302e-06, |
| "loss": 1.555905818939209, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.48623853211009177, |
| "grad_norm": 0.27528202533721924, |
| "learning_rate": 9.699341763114142e-06, |
| "loss": 1.5418330430984497, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.4892966360856269, |
| "grad_norm": 0.37373027205467224, |
| "learning_rate": 9.693863493449376e-06, |
| "loss": 1.5460388660430908, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4923547400611621, |
| "grad_norm": 0.3926723301410675, |
| "learning_rate": 9.688337520541487e-06, |
| "loss": 1.7003178596496582, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.4954128440366973, |
| "grad_norm": 0.2708083987236023, |
| "learning_rate": 9.68276390724584e-06, |
| "loss": 1.8639323711395264, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4984709480122324, |
| "grad_norm": 0.3522673547267914, |
| "learning_rate": 9.67714271695969e-06, |
| "loss": 1.7603111267089844, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5015290519877675, |
| "grad_norm": 0.2736775279045105, |
| "learning_rate": 9.671474013621461e-06, |
| "loss": 1.7426960468292236, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5045871559633027, |
| "grad_norm": 0.34006989002227783, |
| "learning_rate": 9.665757861710008e-06, |
| "loss": 1.6802008152008057, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5076452599388379, |
| "grad_norm": 0.7181631922721863, |
| "learning_rate": 9.659994326243897e-06, |
| "loss": 1.3610038757324219, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5107033639143731, |
| "grad_norm": 0.3209435045719147, |
| "learning_rate": 9.654183472780655e-06, |
| "loss": 1.3310749530792236, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5137614678899083, |
| "grad_norm": 0.3394523561000824, |
| "learning_rate": 9.64832536741604e-06, |
| "loss": 1.7552449703216553, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5168195718654435, |
| "grad_norm": 0.26636433601379395, |
| "learning_rate": 9.642420076783266e-06, |
| "loss": 1.7648036479949951, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5198776758409785, |
| "grad_norm": 0.4860476553440094, |
| "learning_rate": 9.636467668052263e-06, |
| "loss": 1.8371148109436035, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5229357798165137, |
| "grad_norm": 0.3957999050617218, |
| "learning_rate": 9.630468208928906e-06, |
| "loss": 1.7691468000411987, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5259938837920489, |
| "grad_norm": 0.29553869366645813, |
| "learning_rate": 9.624421767654247e-06, |
| "loss": 1.8050150871276855, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5290519877675841, |
| "grad_norm": 0.8523488640785217, |
| "learning_rate": 9.618328413003742e-06, |
| "loss": 1.7548258304595947, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5321100917431193, |
| "grad_norm": 0.30288758873939514, |
| "learning_rate": 9.612188214286457e-06, |
| "loss": 1.652245044708252, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5351681957186545, |
| "grad_norm": 0.44331154227256775, |
| "learning_rate": 9.606001241344293e-06, |
| "loss": 1.5749201774597168, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5382262996941896, |
| "grad_norm": 0.3775594234466553, |
| "learning_rate": 9.599767564551185e-06, |
| "loss": 1.8136138916015625, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5412844036697247, |
| "grad_norm": 0.6260164976119995, |
| "learning_rate": 9.593487254812298e-06, |
| "loss": 1.753260850906372, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5443425076452599, |
| "grad_norm": 0.21940867602825165, |
| "learning_rate": 9.587160383563235e-06, |
| "loss": 1.2595834732055664, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5474006116207951, |
| "grad_norm": 0.45921286940574646, |
| "learning_rate": 9.580787022769205e-06, |
| "loss": 1.8687834739685059, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5504587155963303, |
| "grad_norm": 0.25323811173439026, |
| "learning_rate": 9.574367244924216e-06, |
| "loss": 1.87260901927948, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5535168195718655, |
| "grad_norm": 0.3825606405735016, |
| "learning_rate": 9.567901123050255e-06, |
| "loss": 1.9380344152450562, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5565749235474006, |
| "grad_norm": 0.8433843851089478, |
| "learning_rate": 9.56138873069644e-06, |
| "loss": 1.854411005973816, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5596330275229358, |
| "grad_norm": 0.5623306035995483, |
| "learning_rate": 9.554830141938201e-06, |
| "loss": 1.8307363986968994, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5626911314984709, |
| "grad_norm": 0.5833460688591003, |
| "learning_rate": 9.54822543137643e-06, |
| "loss": 1.691839575767517, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5657492354740061, |
| "grad_norm": 0.7582941651344299, |
| "learning_rate": 9.541574674136634e-06, |
| "loss": 1.5816738605499268, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5688073394495413, |
| "grad_norm": 0.5991274118423462, |
| "learning_rate": 9.534877945868075e-06, |
| "loss": 1.141850471496582, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5718654434250765, |
| "grad_norm": 0.27493157982826233, |
| "learning_rate": 9.528135322742916e-06, |
| "loss": 1.1190171241760254, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5749235474006116, |
| "grad_norm": 0.20014670491218567, |
| "learning_rate": 9.521346881455356e-06, |
| "loss": 1.4172542095184326, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5779816513761468, |
| "grad_norm": 0.45737189054489136, |
| "learning_rate": 9.514512699220751e-06, |
| "loss": 1.3267741203308105, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.581039755351682, |
| "grad_norm": 0.342574805021286, |
| "learning_rate": 9.507632853774738e-06, |
| "loss": 1.2848198413848877, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5840978593272171, |
| "grad_norm": 0.2764483690261841, |
| "learning_rate": 9.500707423372354e-06, |
| "loss": 1.2696105241775513, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.5871559633027523, |
| "grad_norm": 0.5538342595100403, |
| "learning_rate": 9.493736486787145e-06, |
| "loss": 1.5733320713043213, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5902140672782875, |
| "grad_norm": 0.5002435445785522, |
| "learning_rate": 9.486720123310264e-06, |
| "loss": 1.4811735153198242, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.5932721712538226, |
| "grad_norm": 0.2729179561138153, |
| "learning_rate": 9.479658412749575e-06, |
| "loss": 1.2759473323822021, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.5963302752293578, |
| "grad_norm": 0.422869473695755, |
| "learning_rate": 9.472551435428751e-06, |
| "loss": 1.6186537742614746, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.599388379204893, |
| "grad_norm": 0.18889868259429932, |
| "learning_rate": 9.465399272186341e-06, |
| "loss": 1.5904256105422974, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6024464831804281, |
| "grad_norm": 0.4715130925178528, |
| "learning_rate": 9.458202004374875e-06, |
| "loss": 1.3664047718048096, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.6055045871559633, |
| "grad_norm": 0.3192538321018219, |
| "learning_rate": 9.450959713859918e-06, |
| "loss": 1.5540097951889038, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6085626911314985, |
| "grad_norm": 0.48479557037353516, |
| "learning_rate": 9.443672483019146e-06, |
| "loss": 1.7298085689544678, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6116207951070336, |
| "grad_norm": 0.40212106704711914, |
| "learning_rate": 9.436340394741424e-06, |
| "loss": 1.2515219449996948, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6146788990825688, |
| "grad_norm": 0.31416311860084534, |
| "learning_rate": 9.428963532425832e-06, |
| "loss": 1.5272061824798584, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.617737003058104, |
| "grad_norm": 0.39595550298690796, |
| "learning_rate": 9.421541979980743e-06, |
| "loss": 1.584099531173706, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.6207951070336392, |
| "grad_norm": 0.3684428632259369, |
| "learning_rate": 9.414075821822862e-06, |
| "loss": 1.5516374111175537, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6238532110091743, |
| "grad_norm": 0.2936325669288635, |
| "learning_rate": 9.406565142876252e-06, |
| "loss": 1.3937046527862549, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6269113149847095, |
| "grad_norm": 0.8210769295692444, |
| "learning_rate": 9.399010028571394e-06, |
| "loss": 1.0384480953216553, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6299694189602446, |
| "grad_norm": 0.31836938858032227, |
| "learning_rate": 9.391410564844189e-06, |
| "loss": 1.6605589389801025, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6330275229357798, |
| "grad_norm": 0.4151877164840698, |
| "learning_rate": 9.383766838134997e-06, |
| "loss": 1.5902981758117676, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.636085626911315, |
| "grad_norm": 0.29467517137527466, |
| "learning_rate": 9.376078935387647e-06, |
| "loss": 1.511544942855835, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6391437308868502, |
| "grad_norm": 0.4552344083786011, |
| "learning_rate": 9.36834694404845e-06, |
| "loss": 1.6092697381973267, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6422018348623854, |
| "grad_norm": 0.3086092174053192, |
| "learning_rate": 9.360570952065205e-06, |
| "loss": 1.5458872318267822, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6452599388379205, |
| "grad_norm": 0.29464077949523926, |
| "learning_rate": 9.3527510478862e-06, |
| "loss": 1.5201151371002197, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6483180428134556, |
| "grad_norm": 0.35874319076538086, |
| "learning_rate": 9.3448873204592e-06, |
| "loss": 1.7184113264083862, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.6513761467889908, |
| "grad_norm": 0.6177545189857483, |
| "learning_rate": 9.336979859230438e-06, |
| "loss": 1.425230860710144, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.654434250764526, |
| "grad_norm": 0.4207315742969513, |
| "learning_rate": 9.329028754143606e-06, |
| "loss": 1.1580491065979004, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6574923547400612, |
| "grad_norm": 0.40215086936950684, |
| "learning_rate": 9.321034095638816e-06, |
| "loss": 1.776092767715454, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6605504587155964, |
| "grad_norm": 0.48207205533981323, |
| "learning_rate": 9.312995974651581e-06, |
| "loss": 1.5432982444763184, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6636085626911316, |
| "grad_norm": 0.9188543558120728, |
| "learning_rate": 9.304914482611788e-06, |
| "loss": 1.6913204193115234, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 2.0712273120880127, |
| "learning_rate": 9.296789711442641e-06, |
| "loss": 1.5286757946014404, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6697247706422018, |
| "grad_norm": 0.4487042725086212, |
| "learning_rate": 9.288621753559624e-06, |
| "loss": 1.7271997928619385, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.672782874617737, |
| "grad_norm": 0.4550405442714691, |
| "learning_rate": 9.280410701869456e-06, |
| "loss": 1.5852614641189575, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6758409785932722, |
| "grad_norm": 0.8099808692932129, |
| "learning_rate": 9.27215664976902e-06, |
| "loss": 1.6332128047943115, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6788990825688074, |
| "grad_norm": 0.5566719174385071, |
| "learning_rate": 9.263859691144315e-06, |
| "loss": 1.5285072326660156, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.6819571865443425, |
| "grad_norm": 0.3996361196041107, |
| "learning_rate": 9.25551992036938e-06, |
| "loss": 1.181262731552124, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.6850152905198776, |
| "grad_norm": 0.7320879697799683, |
| "learning_rate": 9.247137432305221e-06, |
| "loss": 1.6381134986877441, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.6880733944954128, |
| "grad_norm": 0.5473281741142273, |
| "learning_rate": 9.238712322298733e-06, |
| "loss": 1.623387098312378, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.691131498470948, |
| "grad_norm": 0.2673215866088867, |
| "learning_rate": 9.230244686181616e-06, |
| "loss": 1.6147091388702393, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.6941896024464832, |
| "grad_norm": 0.41044941544532776, |
| "learning_rate": 9.22173462026929e-06, |
| "loss": 1.6174466609954834, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.6972477064220184, |
| "grad_norm": 0.3210803270339966, |
| "learning_rate": 9.213182221359785e-06, |
| "loss": 1.4634352922439575, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7003058103975535, |
| "grad_norm": 0.4366549551486969, |
| "learning_rate": 9.204587586732653e-06, |
| "loss": 1.6598728895187378, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7033639143730887, |
| "grad_norm": 0.6817240118980408, |
| "learning_rate": 9.195950814147862e-06, |
| "loss": 1.7457971572875977, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7064220183486238, |
| "grad_norm": 1.429196834564209, |
| "learning_rate": 9.187272001844673e-06, |
| "loss": 1.4895765781402588, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.709480122324159, |
| "grad_norm": 0.33415424823760986, |
| "learning_rate": 9.178551248540534e-06, |
| "loss": 1.7249622344970703, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.7125382262996942, |
| "grad_norm": 0.5185303092002869, |
| "learning_rate": 9.169788653429949e-06, |
| "loss": 1.5071038007736206, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7155963302752294, |
| "grad_norm": 0.703040599822998, |
| "learning_rate": 9.160984316183354e-06, |
| "loss": 1.6332056522369385, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.7186544342507645, |
| "grad_norm": 0.2760729491710663, |
| "learning_rate": 9.152138336945985e-06, |
| "loss": 1.5567004680633545, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7217125382262997, |
| "grad_norm": 0.26987555623054504, |
| "learning_rate": 9.143250816336733e-06, |
| "loss": 1.6896016597747803, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7247706422018348, |
| "grad_norm": 0.4577353894710541, |
| "learning_rate": 9.134321855447004e-06, |
| "loss": 1.780794620513916, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.72782874617737, |
| "grad_norm": 0.3506152629852295, |
| "learning_rate": 9.125351555839568e-06, |
| "loss": 1.676330327987671, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7308868501529052, |
| "grad_norm": 0.3420753479003906, |
| "learning_rate": 9.116340019547403e-06, |
| "loss": 1.53602933883667, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 0.615734875202179, |
| "learning_rate": 9.107287349072535e-06, |
| "loss": 1.6315178871154785, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7370030581039755, |
| "grad_norm": 0.3383826017379761, |
| "learning_rate": 9.098193647384872e-06, |
| "loss": 1.646344542503357, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7400611620795107, |
| "grad_norm": 0.40700384974479675, |
| "learning_rate": 9.089059017921034e-06, |
| "loss": 1.6499868631362915, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.7431192660550459, |
| "grad_norm": 0.4302765727043152, |
| "learning_rate": 9.079883564583176e-06, |
| "loss": 1.6223028898239136, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.746177370030581, |
| "grad_norm": 0.2995837330818176, |
| "learning_rate": 9.070667391737804e-06, |
| "loss": 1.639768123626709, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7492354740061162, |
| "grad_norm": 0.3183751702308655, |
| "learning_rate": 9.061410604214588e-06, |
| "loss": 1.4172444343566895, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7522935779816514, |
| "grad_norm": 0.41883519291877747, |
| "learning_rate": 9.052113307305178e-06, |
| "loss": 1.5172092914581299, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7553516819571865, |
| "grad_norm": 0.4170067310333252, |
| "learning_rate": 9.04277560676199e-06, |
| "loss": 1.4581788778305054, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.7584097859327217, |
| "grad_norm": 0.4589844346046448, |
| "learning_rate": 9.033397608797015e-06, |
| "loss": 1.5675625801086426, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7614678899082569, |
| "grad_norm": 0.4775915741920471, |
| "learning_rate": 9.023979420080614e-06, |
| "loss": 1.5760972499847412, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.764525993883792, |
| "grad_norm": 0.4255703389644623, |
| "learning_rate": 9.014521147740295e-06, |
| "loss": 1.4211878776550293, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7675840978593272, |
| "grad_norm": 0.2350740283727646, |
| "learning_rate": 9.005022899359498e-06, |
| "loss": 1.0600173473358154, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7706422018348624, |
| "grad_norm": 0.25523892045021057, |
| "learning_rate": 8.995484782976372e-06, |
| "loss": 1.3498680591583252, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7737003058103975, |
| "grad_norm": 0.25793585181236267, |
| "learning_rate": 8.985906907082548e-06, |
| "loss": 1.4128957986831665, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7767584097859327, |
| "grad_norm": 0.2672351002693176, |
| "learning_rate": 8.9762893806219e-06, |
| "loss": 1.4579813480377197, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7798165137614679, |
| "grad_norm": 0.3467871844768524, |
| "learning_rate": 8.96663231298931e-06, |
| "loss": 1.469613790512085, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7828746177370031, |
| "grad_norm": 0.2631012797355652, |
| "learning_rate": 8.956935814029426e-06, |
| "loss": 1.5352952480316162, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.7859327217125383, |
| "grad_norm": 0.42967817187309265, |
| "learning_rate": 8.947199994035402e-06, |
| "loss": 1.448859691619873, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7889908256880734, |
| "grad_norm": 0.18720397353172302, |
| "learning_rate": 8.937424963747656e-06, |
| "loss": 1.4682276248931885, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7920489296636085, |
| "grad_norm": 0.2571136951446533, |
| "learning_rate": 8.9276108343526e-06, |
| "loss": 1.430220365524292, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.7951070336391437, |
| "grad_norm": 0.49666231870651245, |
| "learning_rate": 8.917757717481388e-06, |
| "loss": 1.4388704299926758, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7981651376146789, |
| "grad_norm": 0.18454308807849884, |
| "learning_rate": 8.90786572520863e-06, |
| "loss": 1.3887765407562256, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8012232415902141, |
| "grad_norm": 0.19775497913360596, |
| "learning_rate": 8.897934970051128e-06, |
| "loss": 1.4397857189178467, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.8042813455657493, |
| "grad_norm": 0.24946311116218567, |
| "learning_rate": 8.8879655649666e-06, |
| "loss": 1.3772547245025635, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8073394495412844, |
| "grad_norm": 0.1347188949584961, |
| "learning_rate": 8.877957623352376e-06, |
| "loss": 1.2148081064224243, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8103975535168195, |
| "grad_norm": 0.17375752329826355, |
| "learning_rate": 8.867911259044134e-06, |
| "loss": 1.2351716756820679, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8134556574923547, |
| "grad_norm": 0.12528319656848907, |
| "learning_rate": 8.857826586314586e-06, |
| "loss": 1.0168347358703613, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8165137614678899, |
| "grad_norm": 0.22279202938079834, |
| "learning_rate": 8.847703719872184e-06, |
| "loss": 1.3256959915161133, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.8195718654434251, |
| "grad_norm": 0.22974777221679688, |
| "learning_rate": 8.837542774859819e-06, |
| "loss": 1.3868855237960815, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.8226299694189603, |
| "grad_norm": 0.2833384871482849, |
| "learning_rate": 8.827343866853505e-06, |
| "loss": 1.4037737846374512, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.8256880733944955, |
| "grad_norm": 0.20462170243263245, |
| "learning_rate": 8.817107111861068e-06, |
| "loss": 1.3688358068466187, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8287461773700305, |
| "grad_norm": 0.21328498423099518, |
| "learning_rate": 8.806832626320828e-06, |
| "loss": 1.3812446594238281, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.8318042813455657, |
| "grad_norm": 0.2749079465866089, |
| "learning_rate": 8.796520527100268e-06, |
| "loss": 1.3695695400238037, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.8348623853211009, |
| "grad_norm": 0.17869983613491058, |
| "learning_rate": 8.786170931494714e-06, |
| "loss": 1.3381950855255127, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8379204892966361, |
| "grad_norm": 0.23981167376041412, |
| "learning_rate": 8.775783957225991e-06, |
| "loss": 1.409177541732788, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8409785932721713, |
| "grad_norm": 0.4634632170200348, |
| "learning_rate": 8.765359722441096e-06, |
| "loss": 1.3826044797897339, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8440366972477065, |
| "grad_norm": 0.19470739364624023, |
| "learning_rate": 8.754898345710839e-06, |
| "loss": 1.3529078960418701, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8470948012232415, |
| "grad_norm": 0.21753935515880585, |
| "learning_rate": 8.744399946028506e-06, |
| "loss": 1.3324353694915771, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.8501529051987767, |
| "grad_norm": 0.24797090888023376, |
| "learning_rate": 8.733864642808505e-06, |
| "loss": 1.3469841480255127, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8532110091743119, |
| "grad_norm": 0.2123066782951355, |
| "learning_rate": 8.723292555884997e-06, |
| "loss": 1.343614101409912, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8562691131498471, |
| "grad_norm": 0.25072529911994934, |
| "learning_rate": 8.712683805510547e-06, |
| "loss": 1.305376648902893, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8593272171253823, |
| "grad_norm": 0.3219304382801056, |
| "learning_rate": 8.702038512354746e-06, |
| "loss": 1.3584821224212646, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.8623853211009175, |
| "grad_norm": 0.3253892660140991, |
| "learning_rate": 8.691356797502846e-06, |
| "loss": 1.3929443359375, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.8654434250764526, |
| "grad_norm": 0.22387385368347168, |
| "learning_rate": 8.680638782454373e-06, |
| "loss": 1.3898614645004272, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.8685015290519877, |
| "grad_norm": 0.2767902612686157, |
| "learning_rate": 8.669884589121756e-06, |
| "loss": 1.3842121362686157, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.8715596330275229, |
| "grad_norm": 0.2403760552406311, |
| "learning_rate": 8.659094339828934e-06, |
| "loss": 1.3873755931854248, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8746177370030581, |
| "grad_norm": 0.30079615116119385, |
| "learning_rate": 8.648268157309964e-06, |
| "loss": 1.3781442642211914, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.8776758409785933, |
| "grad_norm": 0.24510778486728668, |
| "learning_rate": 8.637406164707628e-06, |
| "loss": 1.4003241062164307, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.8807339449541285, |
| "grad_norm": 0.19053591787815094, |
| "learning_rate": 8.62650848557203e-06, |
| "loss": 1.318782091140747, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.8837920489296636, |
| "grad_norm": 0.5118341445922852, |
| "learning_rate": 8.615575243859194e-06, |
| "loss": 1.3740344047546387, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.8868501529051988, |
| "grad_norm": 0.2653733193874359, |
| "learning_rate": 8.604606563929649e-06, |
| "loss": 1.3240249156951904, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8899082568807339, |
| "grad_norm": 0.2646930515766144, |
| "learning_rate": 8.59360257054702e-06, |
| "loss": 1.3533198833465576, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8929663608562691, |
| "grad_norm": 0.21842285990715027, |
| "learning_rate": 8.582563388876602e-06, |
| "loss": 1.3596748113632202, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.8960244648318043, |
| "grad_norm": 0.2090519517660141, |
| "learning_rate": 8.571489144483945e-06, |
| "loss": 1.3835537433624268, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.8990825688073395, |
| "grad_norm": 0.2362383008003235, |
| "learning_rate": 8.560379963333416e-06, |
| "loss": 1.368111252784729, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9021406727828746, |
| "grad_norm": 0.4883694350719452, |
| "learning_rate": 8.549235971786777e-06, |
| "loss": 1.3067984580993652, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9051987767584098, |
| "grad_norm": 0.3407292366027832, |
| "learning_rate": 8.538057296601739e-06, |
| "loss": 1.3290581703186035, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.908256880733945, |
| "grad_norm": 0.21036434173583984, |
| "learning_rate": 8.526844064930523e-06, |
| "loss": 1.3695251941680908, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.9113149847094801, |
| "grad_norm": 0.22752052545547485, |
| "learning_rate": 8.515596404318415e-06, |
| "loss": 1.3922007083892822, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.9143730886850153, |
| "grad_norm": 0.23141705989837646, |
| "learning_rate": 8.504314442702315e-06, |
| "loss": 1.371009111404419, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "grad_norm": 0.18458011746406555, |
| "learning_rate": 8.492998308409275e-06, |
| "loss": 1.3468807935714722, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9204892966360856, |
| "grad_norm": 0.2277638018131256, |
| "learning_rate": 8.481648130155054e-06, |
| "loss": 1.3067777156829834, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.9235474006116208, |
| "grad_norm": 0.2761037051677704, |
| "learning_rate": 8.470264037042639e-06, |
| "loss": 1.3436920642852783, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.926605504587156, |
| "grad_norm": 0.2718355059623718, |
| "learning_rate": 8.458846158560787e-06, |
| "loss": 1.368149995803833, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.9296636085626911, |
| "grad_norm": 0.471161812543869, |
| "learning_rate": 8.447394624582544e-06, |
| "loss": 1.3190257549285889, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.9327217125382263, |
| "grad_norm": 0.24170783162117004, |
| "learning_rate": 8.435909565363772e-06, |
| "loss": 1.3419578075408936, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.9357798165137615, |
| "grad_norm": 0.26485109329223633, |
| "learning_rate": 8.424391111541673e-06, |
| "loss": 1.338409662246704, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.9388379204892966, |
| "grad_norm": 0.23220610618591309, |
| "learning_rate": 8.412839394133285e-06, |
| "loss": 1.3877780437469482, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.9418960244648318, |
| "grad_norm": 0.24310626089572906, |
| "learning_rate": 8.401254544534018e-06, |
| "loss": 1.4051454067230225, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.944954128440367, |
| "grad_norm": 0.299958735704422, |
| "learning_rate": 8.389636694516134e-06, |
| "loss": 1.3702571392059326, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9480122324159022, |
| "grad_norm": 0.449929803609848, |
| "learning_rate": 8.377985976227265e-06, |
| "loss": 1.379606008529663, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.9510703363914373, |
| "grad_norm": 0.24171197414398193, |
| "learning_rate": 8.366302522188902e-06, |
| "loss": 1.350182294845581, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.9541284403669725, |
| "grad_norm": 0.2935427129268646, |
| "learning_rate": 8.354586465294894e-06, |
| "loss": 1.2931137084960938, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.9571865443425076, |
| "grad_norm": 0.23755374550819397, |
| "learning_rate": 8.342837938809925e-06, |
| "loss": 1.3183162212371826, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.9602446483180428, |
| "grad_norm": 0.3486945331096649, |
| "learning_rate": 8.331057076368012e-06, |
| "loss": 1.3358354568481445, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.963302752293578, |
| "grad_norm": 0.3866771459579468, |
| "learning_rate": 8.319244011970975e-06, |
| "loss": 1.3079657554626465, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9663608562691132, |
| "grad_norm": 0.23048752546310425, |
| "learning_rate": 8.307398879986917e-06, |
| "loss": 1.323075294494629, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.9694189602446484, |
| "grad_norm": 0.2808099687099457, |
| "learning_rate": 8.295521815148697e-06, |
| "loss": 1.376133918762207, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.9724770642201835, |
| "grad_norm": 0.3424737751483917, |
| "learning_rate": 8.283612952552393e-06, |
| "loss": 1.363619327545166, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.9755351681957186, |
| "grad_norm": 0.23272113502025604, |
| "learning_rate": 8.271672427655765e-06, |
| "loss": 1.3780806064605713, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.9785932721712538, |
| "grad_norm": 0.33965811133384705, |
| "learning_rate": 8.259700376276724e-06, |
| "loss": 1.3397910594940186, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.981651376146789, |
| "grad_norm": 0.25269240140914917, |
| "learning_rate": 8.247696934591774e-06, |
| "loss": 1.3255189657211304, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.9847094801223242, |
| "grad_norm": 1.2317392826080322, |
| "learning_rate": 8.235662239134473e-06, |
| "loss": 1.347729206085205, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.9877675840978594, |
| "grad_norm": 0.37982505559921265, |
| "learning_rate": 8.22359642679387e-06, |
| "loss": 1.3894901275634766, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.9908256880733946, |
| "grad_norm": 0.2849336564540863, |
| "learning_rate": 8.211499634812966e-06, |
| "loss": 1.429058313369751, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.9938837920489296, |
| "grad_norm": 0.6233349442481995, |
| "learning_rate": 8.199372000787126e-06, |
| "loss": 2.095426082611084, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9969418960244648, |
| "grad_norm": 0.6541375517845154, |
| "learning_rate": 8.187213662662539e-06, |
| "loss": 2.1073060035705566, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 11.037178039550781, |
| "learning_rate": 8.175024758734636e-06, |
| "loss": 2.095914840698242, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.003058103975535, |
| "grad_norm": 0.3948424160480499, |
| "learning_rate": 8.16280542764652e-06, |
| "loss": 1.4957305192947388, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.0061162079510704, |
| "grad_norm": 0.310005784034729, |
| "learning_rate": 8.150555808387389e-06, |
| "loss": 1.455479383468628, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.0091743119266054, |
| "grad_norm": 0.26789844036102295, |
| "learning_rate": 8.138276040290952e-06, |
| "loss": 1.4779293537139893, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.0122324159021407, |
| "grad_norm": 0.19781345129013062, |
| "learning_rate": 8.125966263033852e-06, |
| "loss": 1.4063279628753662, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.0152905198776758, |
| "grad_norm": 0.21764519810676575, |
| "learning_rate": 8.11362661663407e-06, |
| "loss": 1.5875146389007568, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.018348623853211, |
| "grad_norm": 0.25749847292900085, |
| "learning_rate": 8.101257241449332e-06, |
| "loss": 1.480888843536377, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.0214067278287462, |
| "grad_norm": 0.26426374912261963, |
| "learning_rate": 8.08885827817552e-06, |
| "loss": 1.4235765933990479, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.0244648318042813, |
| "grad_norm": 0.25188708305358887, |
| "learning_rate": 8.07642986784506e-06, |
| "loss": 1.5084459781646729, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.0275229357798166, |
| "grad_norm": 0.6583337783813477, |
| "learning_rate": 8.063972151825332e-06, |
| "loss": 1.369026780128479, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.0305810397553516, |
| "grad_norm": 0.21123117208480835, |
| "learning_rate": 8.05148527181705e-06, |
| "loss": 1.4445654153823853, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.033639143730887, |
| "grad_norm": 0.293588787317276, |
| "learning_rate": 8.038969369852654e-06, |
| "loss": 1.555469274520874, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.036697247706422, |
| "grad_norm": 0.27872779965400696, |
| "learning_rate": 8.026424588294701e-06, |
| "loss": 1.4869214296340942, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.039755351681957, |
| "grad_norm": 0.23042356967926025, |
| "learning_rate": 8.013851069834233e-06, |
| "loss": 1.279091238975525, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.0428134556574924, |
| "grad_norm": 0.289106547832489, |
| "learning_rate": 8.001248957489164e-06, |
| "loss": 1.4306490421295166, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.0458715596330275, |
| "grad_norm": 0.5272045135498047, |
| "learning_rate": 7.988618394602653e-06, |
| "loss": 1.6781132221221924, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.0489296636085628, |
| "grad_norm": 0.22576113045215607, |
| "learning_rate": 7.975959524841464e-06, |
| "loss": 1.3457372188568115, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.0519877675840978, |
| "grad_norm": 0.5630601644515991, |
| "learning_rate": 7.963272492194344e-06, |
| "loss": 1.4807915687561035, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.0550458715596331, |
| "grad_norm": 0.34389057755470276, |
| "learning_rate": 7.950557440970377e-06, |
| "loss": 1.368910789489746, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.0581039755351682, |
| "grad_norm": 0.21063481271266937, |
| "learning_rate": 7.937814515797348e-06, |
| "loss": 1.360002040863037, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.0611620795107033, |
| "grad_norm": 0.20320424437522888, |
| "learning_rate": 7.92504386162009e-06, |
| "loss": 1.3675504922866821, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.0642201834862386, |
| "grad_norm": 0.2813395857810974, |
| "learning_rate": 7.912245623698846e-06, |
| "loss": 1.395061731338501, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.0672782874617737, |
| "grad_norm": 0.4647752046585083, |
| "learning_rate": 7.899419947607611e-06, |
| "loss": 1.5662283897399902, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.070336391437309, |
| "grad_norm": 0.3765999972820282, |
| "learning_rate": 7.886566979232471e-06, |
| "loss": 1.5935697555541992, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.073394495412844, |
| "grad_norm": 0.29083383083343506, |
| "learning_rate": 7.873686864769955e-06, |
| "loss": 1.434537649154663, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.0764525993883791, |
| "grad_norm": 0.4763205349445343, |
| "learning_rate": 7.860779750725362e-06, |
| "loss": 1.4121177196502686, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.0795107033639144, |
| "grad_norm": 0.33439531922340393, |
| "learning_rate": 7.8478457839111e-06, |
| "loss": 1.3943579196929932, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.0825688073394495, |
| "grad_norm": 0.342690110206604, |
| "learning_rate": 7.834885111445017e-06, |
| "loss": 1.4776759147644043, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.0856269113149848, |
| "grad_norm": 0.29185494780540466, |
| "learning_rate": 7.82189788074872e-06, |
| "loss": 1.4435069561004639, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.0886850152905199, |
| "grad_norm": 1.3288284540176392, |
| "learning_rate": 7.80888423954591e-06, |
| "loss": 1.4731531143188477, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.091743119266055, |
| "grad_norm": 0.2119162380695343, |
| "learning_rate": 7.795844335860691e-06, |
| "loss": 1.4626476764678955, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.0948012232415902, |
| "grad_norm": 0.20571930706501007, |
| "learning_rate": 7.782778318015892e-06, |
| "loss": 1.342850685119629, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.0978593272171253, |
| "grad_norm": 0.22236645221710205, |
| "learning_rate": 7.769686334631375e-06, |
| "loss": 1.286208152770996, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.1009174311926606, |
| "grad_norm": 0.18384046852588654, |
| "learning_rate": 7.756568534622355e-06, |
| "loss": 1.4446015357971191, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.1039755351681957, |
| "grad_norm": 0.2486264407634735, |
| "learning_rate": 7.743425067197693e-06, |
| "loss": 1.5612818002700806, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.107033639143731, |
| "grad_norm": 0.23211126029491425, |
| "learning_rate": 7.730256081858207e-06, |
| "loss": 1.3999545574188232, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.110091743119266, |
| "grad_norm": 0.41483980417251587, |
| "learning_rate": 7.717061728394968e-06, |
| "loss": 1.591150164604187, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.1131498470948011, |
| "grad_norm": 0.3113287091255188, |
| "learning_rate": 7.7038421568876e-06, |
| "loss": 1.620883584022522, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.1162079510703364, |
| "grad_norm": 0.5611585378646851, |
| "learning_rate": 7.690597517702569e-06, |
| "loss": 1.3835599422454834, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.1192660550458715, |
| "grad_norm": 0.5187618732452393, |
| "learning_rate": 7.677327961491475e-06, |
| "loss": 1.3614990711212158, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.1223241590214068, |
| "grad_norm": 0.34465184807777405, |
| "learning_rate": 7.664033639189336e-06, |
| "loss": 1.467517614364624, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.1253822629969419, |
| "grad_norm": 0.22211050987243652, |
| "learning_rate": 7.650714702012876e-06, |
| "loss": 1.287433385848999, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.1284403669724772, |
| "grad_norm": 0.36259227991104126, |
| "learning_rate": 7.637371301458797e-06, |
| "loss": 1.367175817489624, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.1314984709480123, |
| "grad_norm": 0.44571414589881897, |
| "learning_rate": 7.6240035893020625e-06, |
| "loss": 1.3308281898498535, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.1345565749235473, |
| "grad_norm": 0.26124662160873413, |
| "learning_rate": 7.610611717594173e-06, |
| "loss": 1.3915913105010986, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.1376146788990826, |
| "grad_norm": 0.3137398064136505, |
| "learning_rate": 7.597195838661426e-06, |
| "loss": 1.3188378810882568, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.1406727828746177, |
| "grad_norm": 0.3484938144683838, |
| "learning_rate": 7.583756105103195e-06, |
| "loss": 1.3703608512878418, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.143730886850153, |
| "grad_norm": 0.3699035942554474, |
| "learning_rate": 7.570292669790186e-06, |
| "loss": 1.5115067958831787, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.146788990825688, |
| "grad_norm": 0.24170878529548645, |
| "learning_rate": 7.556805685862703e-06, |
| "loss": 1.3954684734344482, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.1498470948012232, |
| "grad_norm": 0.20038793981075287, |
| "learning_rate": 7.543295306728904e-06, |
| "loss": 1.345947027206421, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.1529051987767585, |
| "grad_norm": 0.38949868083000183, |
| "learning_rate": 7.529761686063056e-06, |
| "loss": 1.5590949058532715, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.1559633027522935, |
| "grad_norm": 0.33645766973495483, |
| "learning_rate": 7.516204977803789e-06, |
| "loss": 1.446972370147705, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.1590214067278288, |
| "grad_norm": 0.18463970720767975, |
| "learning_rate": 7.5026253361523435e-06, |
| "loss": 1.3630192279815674, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.162079510703364, |
| "grad_norm": 0.33572879433631897, |
| "learning_rate": 7.489022915570813e-06, |
| "loss": 1.457106113433838, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.165137614678899, |
| "grad_norm": 0.2753995954990387, |
| "learning_rate": 7.475397870780397e-06, |
| "loss": 1.4502360820770264, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.1681957186544343, |
| "grad_norm": 0.35596194863319397, |
| "learning_rate": 7.4617503567596295e-06, |
| "loss": 1.4977834224700928, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.1712538226299694, |
| "grad_norm": 0.4726940095424652, |
| "learning_rate": 7.448080528742624e-06, |
| "loss": 1.3764468431472778, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.1743119266055047, |
| "grad_norm": 0.26225268840789795, |
| "learning_rate": 7.434388542217303e-06, |
| "loss": 1.4741466045379639, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.1773700305810397, |
| "grad_norm": 0.27619338035583496, |
| "learning_rate": 7.420674552923638e-06, |
| "loss": 1.3593350648880005, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.1804281345565748, |
| "grad_norm": 0.3182947635650635, |
| "learning_rate": 7.4069387168518615e-06, |
| "loss": 1.673621654510498, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.18348623853211, |
| "grad_norm": 0.28721779584884644, |
| "learning_rate": 7.393181190240714e-06, |
| "loss": 1.4450278282165527, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.1865443425076452, |
| "grad_norm": 0.2768658399581909, |
| "learning_rate": 7.379402129575645e-06, |
| "loss": 1.5032843351364136, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.1896024464831805, |
| "grad_norm": 0.3218024969100952, |
| "learning_rate": 7.3656016915870545e-06, |
| "loss": 1.4965013265609741, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.1926605504587156, |
| "grad_norm": 0.4919971227645874, |
| "learning_rate": 7.351780033248491e-06, |
| "loss": 1.4509224891662598, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.1957186544342508, |
| "grad_norm": 0.3981909155845642, |
| "learning_rate": 7.33793731177488e-06, |
| "loss": 1.4464759826660156, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.198776758409786, |
| "grad_norm": 0.3076995611190796, |
| "learning_rate": 7.324073684620726e-06, |
| "loss": 1.4577126502990723, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.2018348623853212, |
| "grad_norm": 0.28227174282073975, |
| "learning_rate": 7.310189309478331e-06, |
| "loss": 1.439997911453247, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.2048929663608563, |
| "grad_norm": 0.26599401235580444, |
| "learning_rate": 7.296284344275991e-06, |
| "loss": 1.531783103942871, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.2079510703363914, |
| "grad_norm": 0.69685959815979, |
| "learning_rate": 7.282358947176207e-06, |
| "loss": 1.4577662944793701, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.2110091743119267, |
| "grad_norm": 0.25103896856307983, |
| "learning_rate": 7.268413276573881e-06, |
| "loss": 1.3561824560165405, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.2140672782874617, |
| "grad_norm": 0.21765579283237457, |
| "learning_rate": 7.25444749109452e-06, |
| "loss": 1.3165652751922607, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.217125382262997, |
| "grad_norm": 0.2564055919647217, |
| "learning_rate": 7.2404617495924254e-06, |
| "loss": 1.383346676826477, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.2201834862385321, |
| "grad_norm": 0.40797773003578186, |
| "learning_rate": 7.226456211148891e-06, |
| "loss": 1.3315465450286865, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.2232415902140672, |
| "grad_norm": 0.31532490253448486, |
| "learning_rate": 7.212431035070391e-06, |
| "loss": 1.3896580934524536, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.2262996941896025, |
| "grad_norm": 0.25705334544181824, |
| "learning_rate": 7.198386380886765e-06, |
| "loss": 1.3460421562194824, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.2293577981651376, |
| "grad_norm": 0.31377753615379333, |
| "learning_rate": 7.1843224083494154e-06, |
| "loss": 1.595191240310669, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.2324159021406729, |
| "grad_norm": 0.2853119969367981, |
| "learning_rate": 7.170239277429474e-06, |
| "loss": 1.6170880794525146, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.235474006116208, |
| "grad_norm": 0.44243165850639343, |
| "learning_rate": 7.156137148315993e-06, |
| "loss": 1.6550755500793457, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.238532110091743, |
| "grad_norm": 0.3517357110977173, |
| "learning_rate": 7.14201618141412e-06, |
| "loss": 1.566192865371704, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.2415902140672783, |
| "grad_norm": 0.2986673414707184, |
| "learning_rate": 7.127876537343277e-06, |
| "loss": 1.63118314743042, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.2446483180428134, |
| "grad_norm": 0.3479074537754059, |
| "learning_rate": 7.1137183769353225e-06, |
| "loss": 1.5168559551239014, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.2477064220183487, |
| "grad_norm": 0.4152420461177826, |
| "learning_rate": 7.099541861232736e-06, |
| "loss": 1.6398264169692993, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.2507645259938838, |
| "grad_norm": 0.384573370218277, |
| "learning_rate": 7.085347151486779e-06, |
| "loss": 1.4128949642181396, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.2538226299694188, |
| "grad_norm": 0.3804616630077362, |
| "learning_rate": 7.071134409155659e-06, |
| "loss": 1.557448148727417, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.2568807339449541, |
| "grad_norm": 0.6236130595207214, |
| "learning_rate": 7.056903795902701e-06, |
| "loss": 1.3184959888458252, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.2599388379204892, |
| "grad_norm": 0.7443933486938477, |
| "learning_rate": 7.042655473594495e-06, |
| "loss": 1.537932276725769, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.2629969418960245, |
| "grad_norm": 0.5472233891487122, |
| "learning_rate": 7.028389604299074e-06, |
| "loss": 1.1561626195907593, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.2660550458715596, |
| "grad_norm": 0.847542941570282, |
| "learning_rate": 7.01410635028405e-06, |
| "loss": 1.1249284744262695, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.2691131498470947, |
| "grad_norm": 0.3495579957962036, |
| "learning_rate": 6.9998058740147835e-06, |
| "loss": 1.3474421501159668, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.27217125382263, |
| "grad_norm": 0.4069005846977234, |
| "learning_rate": 6.985488338152529e-06, |
| "loss": 1.3892837762832642, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.2752293577981653, |
| "grad_norm": 0.6165335178375244, |
| "learning_rate": 6.971153905552587e-06, |
| "loss": 1.524814248085022, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.2782874617737003, |
| "grad_norm": 0.6481596827507019, |
| "learning_rate": 6.956802739262446e-06, |
| "loss": 1.464059829711914, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.2813455657492354, |
| "grad_norm": 0.3051135241985321, |
| "learning_rate": 6.942435002519938e-06, |
| "loss": 1.212691307067871, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.2844036697247707, |
| "grad_norm": 0.31896138191223145, |
| "learning_rate": 6.9280508587513725e-06, |
| "loss": 1.179284691810608, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.2874617737003058, |
| "grad_norm": 0.2261551022529602, |
| "learning_rate": 6.913650471569684e-06, |
| "loss": 1.38997220993042, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.290519877675841, |
| "grad_norm": 0.3368714451789856, |
| "learning_rate": 6.899234004772566e-06, |
| "loss": 1.3169426918029785, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.2935779816513762, |
| "grad_norm": 0.49499788880348206, |
| "learning_rate": 6.884801622340612e-06, |
| "loss": 1.293768048286438, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.2966360856269112, |
| "grad_norm": 0.2904210686683655, |
| "learning_rate": 6.870353488435447e-06, |
| "loss": 1.5008976459503174, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.2996941896024465, |
| "grad_norm": 0.4230108857154846, |
| "learning_rate": 6.855889767397863e-06, |
| "loss": 1.4707106351852417, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.3027522935779816, |
| "grad_norm": 0.2836777865886688, |
| "learning_rate": 6.841410623745944e-06, |
| "loss": 1.182532548904419, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.305810397553517, |
| "grad_norm": 0.3048684895038605, |
| "learning_rate": 6.826916222173205e-06, |
| "loss": 1.373314380645752, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.308868501529052, |
| "grad_norm": 0.38874655961990356, |
| "learning_rate": 6.812406727546713e-06, |
| "loss": 1.5207183361053467, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.311926605504587, |
| "grad_norm": 0.541847288608551, |
| "learning_rate": 6.7978823049052046e-06, |
| "loss": 1.6546745300292969, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.3149847094801224, |
| "grad_norm": 0.3354927897453308, |
| "learning_rate": 6.783343119457221e-06, |
| "loss": 1.6852827072143555, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.3180428134556574, |
| "grad_norm": 0.22799281775951385, |
| "learning_rate": 6.768789336579224e-06, |
| "loss": 1.7998615503311157, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.3211009174311927, |
| "grad_norm": 0.2829393446445465, |
| "learning_rate": 6.754221121813707e-06, |
| "loss": 1.3555914163589478, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.3241590214067278, |
| "grad_norm": 0.2552604377269745, |
| "learning_rate": 6.739638640867332e-06, |
| "loss": 1.44038724899292, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.3272171253822629, |
| "grad_norm": 0.2328341007232666, |
| "learning_rate": 6.72504205960902e-06, |
| "loss": 1.2792387008666992, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.3302752293577982, |
| "grad_norm": 0.19776956737041473, |
| "learning_rate": 6.710431544068085e-06, |
| "loss": 1.2014856338500977, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.2862965762615204, |
| "learning_rate": 6.695807260432332e-06, |
| "loss": 1.612195372581482, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.3363914373088686, |
| "grad_norm": 0.2737024426460266, |
| "learning_rate": 6.681169375046173e-06, |
| "loss": 1.4856352806091309, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.3394495412844036, |
| "grad_norm": 0.33617132902145386, |
| "learning_rate": 6.666518054408734e-06, |
| "loss": 1.6690922975540161, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.3425076452599387, |
| "grad_norm": 0.33230748772621155, |
| "learning_rate": 6.65185346517196e-06, |
| "loss": 1.134220838546753, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.345565749235474, |
| "grad_norm": 0.34520813822746277, |
| "learning_rate": 6.637175774138722e-06, |
| "loss": 1.3939542770385742, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.3486238532110093, |
| "grad_norm": 0.3193676471710205, |
| "learning_rate": 6.622485148260916e-06, |
| "loss": 1.6689043045043945, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.3516819571865444, |
| "grad_norm": 0.2586718499660492, |
| "learning_rate": 6.607781754637567e-06, |
| "loss": 1.3927881717681885, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.3547400611620795, |
| "grad_norm": 0.36470475792884827, |
| "learning_rate": 6.593065760512924e-06, |
| "loss": 1.5524687767028809, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.3577981651376148, |
| "grad_norm": 0.5333327054977417, |
| "learning_rate": 6.578337333274566e-06, |
| "loss": 1.4335553646087646, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.3608562691131498, |
| "grad_norm": 0.24828922748565674, |
| "learning_rate": 6.563596640451489e-06, |
| "loss": 1.3478354215621948, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.3639143730886851, |
| "grad_norm": 0.2684786021709442, |
| "learning_rate": 6.548843849712206e-06, |
| "loss": 1.4221248626708984, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.3669724770642202, |
| "grad_norm": 0.2922813594341278, |
| "learning_rate": 6.534079128862835e-06, |
| "loss": 1.4792616367340088, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.3700305810397553, |
| "grad_norm": 0.21960243582725525, |
| "learning_rate": 6.5193026458452006e-06, |
| "loss": 1.3363940715789795, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.3730886850152906, |
| "grad_norm": 0.41456371545791626, |
| "learning_rate": 6.50451456873491e-06, |
| "loss": 1.4480544328689575, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.3761467889908257, |
| "grad_norm": 0.6222192049026489, |
| "learning_rate": 6.489715065739448e-06, |
| "loss": 1.7465565204620361, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.379204892966361, |
| "grad_norm": 0.5998108983039856, |
| "learning_rate": 6.474904305196268e-06, |
| "loss": 2.144679546356201, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.382262996941896, |
| "grad_norm": 0.5612609386444092, |
| "learning_rate": 6.4600824555708695e-06, |
| "loss": 1.378048300743103, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.385321100917431, |
| "grad_norm": 0.32021385431289673, |
| "learning_rate": 6.445249685454885e-06, |
| "loss": 1.361167073249817, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.3883792048929664, |
| "grad_norm": 0.36393630504608154, |
| "learning_rate": 6.4304061635641645e-06, |
| "loss": 1.433903694152832, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.3914373088685015, |
| "grad_norm": 0.7985405325889587, |
| "learning_rate": 6.415552058736854e-06, |
| "loss": 1.5466125011444092, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.3944954128440368, |
| "grad_norm": 0.30912530422210693, |
| "learning_rate": 6.4006875399314705e-06, |
| "loss": 1.463235855102539, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.3975535168195719, |
| "grad_norm": 0.2953026294708252, |
| "learning_rate": 6.3858127762249945e-06, |
| "loss": 1.3276557922363281, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.400611620795107, |
| "grad_norm": 0.19828742742538452, |
| "learning_rate": 6.3709279368109264e-06, |
| "loss": 1.2300511598587036, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.4036697247706422, |
| "grad_norm": 0.21878407895565033, |
| "learning_rate": 6.356033190997386e-06, |
| "loss": 1.1606783866882324, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.4067278287461773, |
| "grad_norm": 0.19046013057231903, |
| "learning_rate": 6.341128708205162e-06, |
| "loss": 1.3056751489639282, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.4097859327217126, |
| "grad_norm": 0.40108954906463623, |
| "learning_rate": 6.326214657965804e-06, |
| "loss": 1.5421757698059082, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.4128440366972477, |
| "grad_norm": 0.46537211537361145, |
| "learning_rate": 6.311291209919682e-06, |
| "loss": 1.5684192180633545, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.4159021406727827, |
| "grad_norm": 0.5733487606048584, |
| "learning_rate": 6.296358533814065e-06, |
| "loss": 1.5650339126586914, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.418960244648318, |
| "grad_norm": 0.4306733310222626, |
| "learning_rate": 6.281416799501188e-06, |
| "loss": 1.5992372035980225, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.4220183486238533, |
| "grad_norm": 0.407654732465744, |
| "learning_rate": 6.266466176936313e-06, |
| "loss": 1.4283607006072998, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.4250764525993884, |
| "grad_norm": 4.419346332550049, |
| "learning_rate": 6.251506836175807e-06, |
| "loss": 1.5659562349319458, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.4281345565749235, |
| "grad_norm": 0.7012003064155579, |
| "learning_rate": 6.236538947375203e-06, |
| "loss": 1.4677741527557373, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.4311926605504588, |
| "grad_norm": 0.22764644026756287, |
| "learning_rate": 6.221562680787258e-06, |
| "loss": 1.374863624572754, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.4342507645259939, |
| "grad_norm": 0.4946407973766327, |
| "learning_rate": 6.20657820676003e-06, |
| "loss": 1.3795430660247803, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.4373088685015292, |
| "grad_norm": 1.4666649103164673, |
| "learning_rate": 6.191585695734925e-06, |
| "loss": 1.584106683731079, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.4403669724770642, |
| "grad_norm": 0.9116813540458679, |
| "learning_rate": 6.176585318244775e-06, |
| "loss": 1.3207650184631348, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.4434250764525993, |
| "grad_norm": 0.4549460709095001, |
| "learning_rate": 6.161577244911883e-06, |
| "loss": 1.5188086032867432, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.4464831804281346, |
| "grad_norm": 0.6293279528617859, |
| "learning_rate": 6.146561646446088e-06, |
| "loss": 1.40483558177948, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.4495412844036697, |
| "grad_norm": 0.5348030924797058, |
| "learning_rate": 6.131538693642828e-06, |
| "loss": 1.4180057048797607, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.452599388379205, |
| "grad_norm": 0.7010774612426758, |
| "learning_rate": 6.116508557381191e-06, |
| "loss": 1.5555238723754883, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.45565749235474, |
| "grad_norm": 0.3996182382106781, |
| "learning_rate": 6.1014714086219725e-06, |
| "loss": 1.5635944604873657, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.4587155963302751, |
| "grad_norm": 0.3819827139377594, |
| "learning_rate": 6.086427418405735e-06, |
| "loss": 1.3868696689605713, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.4617737003058104, |
| "grad_norm": 0.24838334321975708, |
| "learning_rate": 6.071376757850858e-06, |
| "loss": 1.3217381238937378, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.4648318042813455, |
| "grad_norm": 0.5527139902114868, |
| "learning_rate": 6.0563195981515885e-06, |
| "loss": 1.456415057182312, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.4678899082568808, |
| "grad_norm": 0.2822090983390808, |
| "learning_rate": 6.0412561105761055e-06, |
| "loss": 1.3990404605865479, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.470948012232416, |
| "grad_norm": 0.370832234621048, |
| "learning_rate": 6.026186466464562e-06, |
| "loss": 1.5524400472640991, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.474006116207951, |
| "grad_norm": 0.30970191955566406, |
| "learning_rate": 6.011110837227138e-06, |
| "loss": 1.4143943786621094, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.4770642201834863, |
| "grad_norm": 0.3659932613372803, |
| "learning_rate": 5.996029394342089e-06, |
| "loss": 1.3726913928985596, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.4801223241590213, |
| "grad_norm": 0.40378639101982117, |
| "learning_rate": 5.980942309353803e-06, |
| "loss": 1.3403112888336182, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.4831804281345566, |
| "grad_norm": 0.2668818235397339, |
| "learning_rate": 5.965849753870841e-06, |
| "loss": 1.4581551551818848, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.4862385321100917, |
| "grad_norm": 0.39147576689720154, |
| "learning_rate": 5.950751899563989e-06, |
| "loss": 1.4426075220108032, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.4892966360856268, |
| "grad_norm": 0.4053312838077545, |
| "learning_rate": 5.935648918164308e-06, |
| "loss": 1.429807424545288, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.492354740061162, |
| "grad_norm": 0.2912329435348511, |
| "learning_rate": 5.9205409814611694e-06, |
| "loss": 1.6015820503234863, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.4954128440366974, |
| "grad_norm": 0.39581140875816345, |
| "learning_rate": 5.9054282613003165e-06, |
| "loss": 1.7901129722595215, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.4984709480122325, |
| "grad_norm": 5.4772210121154785, |
| "learning_rate": 5.890310929581899e-06, |
| "loss": 1.665008544921875, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.5015290519877675, |
| "grad_norm": 0.32753488421440125, |
| "learning_rate": 5.875189158258521e-06, |
| "loss": 1.658569574356079, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.5045871559633026, |
| "grad_norm": 0.3322629928588867, |
| "learning_rate": 5.860063119333287e-06, |
| "loss": 1.568853735923767, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.507645259938838, |
| "grad_norm": 0.3625146746635437, |
| "learning_rate": 5.844932984857841e-06, |
| "loss": 1.2555010318756104, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.5107033639143732, |
| "grad_norm": 0.3967174291610718, |
| "learning_rate": 5.829798926930411e-06, |
| "loss": 1.2352030277252197, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.5137614678899083, |
| "grad_norm": 0.92249995470047, |
| "learning_rate": 5.814661117693856e-06, |
| "loss": 1.6529834270477295, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.5168195718654434, |
| "grad_norm": 0.43264713883399963, |
| "learning_rate": 5.799519729333702e-06, |
| "loss": 1.6510822772979736, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.5198776758409784, |
| "grad_norm": 0.48226049542427063, |
| "learning_rate": 5.784374934076188e-06, |
| "loss": 1.7469120025634766, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.5229357798165137, |
| "grad_norm": 0.6006577014923096, |
| "learning_rate": 5.769226904186301e-06, |
| "loss": 1.6751326322555542, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.525993883792049, |
| "grad_norm": 0.417524129152298, |
| "learning_rate": 5.754075811965826e-06, |
| "loss": 1.7241541147232056, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.529051987767584, |
| "grad_norm": 0.4846678674221039, |
| "learning_rate": 5.738921829751374e-06, |
| "loss": 1.5894498825073242, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.5321100917431192, |
| "grad_norm": 0.37620386481285095, |
| "learning_rate": 5.723765129912433e-06, |
| "loss": 1.5567536354064941, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.5351681957186545, |
| "grad_norm": 0.9559251070022583, |
| "learning_rate": 5.708605884849402e-06, |
| "loss": 1.444126844406128, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.5382262996941896, |
| "grad_norm": 0.4608314335346222, |
| "learning_rate": 5.6934442669916315e-06, |
| "loss": 1.7045128345489502, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.5412844036697249, |
| "grad_norm": 0.5580506920814514, |
| "learning_rate": 5.678280448795457e-06, |
| "loss": 1.576319932937622, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.54434250764526, |
| "grad_norm": 0.414983332157135, |
| "learning_rate": 5.663114602742247e-06, |
| "loss": 1.1866123676300049, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.547400611620795, |
| "grad_norm": 0.5494526624679565, |
| "learning_rate": 5.647946901336433e-06, |
| "loss": 1.7420477867126465, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.5504587155963303, |
| "grad_norm": 0.6842697262763977, |
| "learning_rate": 5.632777517103552e-06, |
| "loss": 1.7904109954833984, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.5535168195718656, |
| "grad_norm": 0.43980666995048523, |
| "learning_rate": 5.617606622588282e-06, |
| "loss": 1.862006425857544, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.5565749235474007, |
| "grad_norm": 0.3990402817726135, |
| "learning_rate": 5.602434390352476e-06, |
| "loss": 1.7830100059509277, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.5596330275229358, |
| "grad_norm": 0.4031524360179901, |
| "learning_rate": 5.58726099297321e-06, |
| "loss": 1.7594141960144043, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.5626911314984708, |
| "grad_norm": 0.6580591797828674, |
| "learning_rate": 5.572086603040809e-06, |
| "loss": 1.6219829320907593, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.5657492354740061, |
| "grad_norm": 0.36656439304351807, |
| "learning_rate": 5.556911393156885e-06, |
| "loss": 1.4893901348114014, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.5688073394495414, |
| "grad_norm": 0.6261524558067322, |
| "learning_rate": 5.541735535932383e-06, |
| "loss": 1.058058261871338, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.5718654434250765, |
| "grad_norm": 0.3441345691680908, |
| "learning_rate": 5.526559203985605e-06, |
| "loss": 1.0509142875671387, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.5749235474006116, |
| "grad_norm": 0.2408900260925293, |
| "learning_rate": 5.511382569940258e-06, |
| "loss": 1.2871123552322388, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.5779816513761467, |
| "grad_norm": 0.45723816752433777, |
| "learning_rate": 5.496205806423481e-06, |
| "loss": 1.2235673666000366, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.581039755351682, |
| "grad_norm": 0.3109905421733856, |
| "learning_rate": 5.481029086063887e-06, |
| "loss": 1.177577018737793, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.5840978593272173, |
| "grad_norm": 0.20282985270023346, |
| "learning_rate": 5.4658525814896014e-06, |
| "loss": 1.2040612697601318, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.5871559633027523, |
| "grad_norm": 0.43076759576797485, |
| "learning_rate": 5.45067646532629e-06, |
| "loss": 1.4584531784057617, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.5902140672782874, |
| "grad_norm": 0.472885400056839, |
| "learning_rate": 5.435500910195203e-06, |
| "loss": 1.387641429901123, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.5932721712538225, |
| "grad_norm": 3.1532437801361084, |
| "learning_rate": 5.420326088711209e-06, |
| "loss": 1.221092700958252, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.5963302752293578, |
| "grad_norm": 0.6743189692497253, |
| "learning_rate": 5.405152173480833e-06, |
| "loss": 1.4836219549179077, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.599388379204893, |
| "grad_norm": 0.20277228951454163, |
| "learning_rate": 5.389979337100289e-06, |
| "loss": 1.5031371116638184, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.6024464831804281, |
| "grad_norm": 0.5120447874069214, |
| "learning_rate": 5.374807752153522e-06, |
| "loss": 1.282975673675537, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.6055045871559632, |
| "grad_norm": 0.35753709077835083, |
| "learning_rate": 5.359637591210242e-06, |
| "loss": 1.4665361642837524, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.6085626911314985, |
| "grad_norm": 0.7353309988975525, |
| "learning_rate": 5.344469026823959e-06, |
| "loss": 1.6730611324310303, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.6116207951070336, |
| "grad_norm": 0.4338257610797882, |
| "learning_rate": 5.329302231530029e-06, |
| "loss": 1.186348795890808, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.614678899082569, |
| "grad_norm": 0.42416566610336304, |
| "learning_rate": 5.31413737784368e-06, |
| "loss": 1.4430310726165771, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.617737003058104, |
| "grad_norm": 0.2432592213153839, |
| "learning_rate": 5.298974638258055e-06, |
| "loss": 1.518967866897583, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.620795107033639, |
| "grad_norm": 0.408245712518692, |
| "learning_rate": 5.283814185242252e-06, |
| "loss": 1.426690697669983, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.6238532110091743, |
| "grad_norm": 0.2117079198360443, |
| "learning_rate": 5.2686561912393606e-06, |
| "loss": 1.2693121433258057, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.6269113149847096, |
| "grad_norm": 4.30716609954834, |
| "learning_rate": 5.253500828664501e-06, |
| "loss": 0.9013931155204773, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.6299694189602447, |
| "grad_norm": 0.38770049810409546, |
| "learning_rate": 5.23834826990286e-06, |
| "loss": 1.5694489479064941, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.6330275229357798, |
| "grad_norm": 0.6700468063354492, |
| "learning_rate": 5.223198687307733e-06, |
| "loss": 1.503030776977539, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.6360856269113149, |
| "grad_norm": 0.2767106294631958, |
| "learning_rate": 5.208052253198564e-06, |
| "loss": 1.3917062282562256, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.6391437308868502, |
| "grad_norm": 0.3463125228881836, |
| "learning_rate": 5.192909139858981e-06, |
| "loss": 1.5068938732147217, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.6422018348623855, |
| "grad_norm": 0.3212260603904724, |
| "learning_rate": 5.177769519534846e-06, |
| "loss": 1.4421181678771973, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.6452599388379205, |
| "grad_norm": 0.4484805762767792, |
| "learning_rate": 5.162633564432285e-06, |
| "loss": 1.408212661743164, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.6483180428134556, |
| "grad_norm": 0.4805358350276947, |
| "learning_rate": 5.1475014467157325e-06, |
| "loss": 1.6133791208267212, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.6513761467889907, |
| "grad_norm": 0.5775420665740967, |
| "learning_rate": 5.132373338505978e-06, |
| "loss": 1.2856450080871582, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.654434250764526, |
| "grad_norm": 0.32906994223594666, |
| "learning_rate": 5.117249411878204e-06, |
| "loss": 1.04205322265625, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.6574923547400613, |
| "grad_norm": 0.5074779987335205, |
| "learning_rate": 5.10212983886003e-06, |
| "loss": 1.6698901653289795, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.6605504587155964, |
| "grad_norm": 0.36449626088142395, |
| "learning_rate": 5.087014791429552e-06, |
| "loss": 1.449878215789795, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.6636085626911314, |
| "grad_norm": 1.0477646589279175, |
| "learning_rate": 5.071904441513393e-06, |
| "loss": 1.5865240097045898, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.3797400891780853, |
| "learning_rate": 5.056798960984741e-06, |
| "loss": 1.4271771907806396, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.6697247706422018, |
| "grad_norm": 0.3018883466720581, |
| "learning_rate": 5.041698521661401e-06, |
| "loss": 1.6418373584747314, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.6727828746177371, |
| "grad_norm": 0.5908496379852295, |
| "learning_rate": 5.026603295303833e-06, |
| "loss": 1.5063586235046387, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.6758409785932722, |
| "grad_norm": 0.5799764394760132, |
| "learning_rate": 5.011513453613205e-06, |
| "loss": 1.5312390327453613, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.6788990825688073, |
| "grad_norm": 0.4648537337779999, |
| "learning_rate": 4.996429168229432e-06, |
| "loss": 1.4155495166778564, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.6819571865443423, |
| "grad_norm": 0.3357274830341339, |
| "learning_rate": 4.981350610729234e-06, |
| "loss": 1.07462477684021, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.6850152905198776, |
| "grad_norm": 0.8209952712059021, |
| "learning_rate": 4.966277952624179e-06, |
| "loss": 1.532288670539856, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.688073394495413, |
| "grad_norm": 0.6916195750236511, |
| "learning_rate": 4.951211365358723e-06, |
| "loss": 1.5015881061553955, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.691131498470948, |
| "grad_norm": 0.6677690148353577, |
| "learning_rate": 4.936151020308282e-06, |
| "loss": 1.5166327953338623, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.694189602446483, |
| "grad_norm": 0.7889437675476074, |
| "learning_rate": 4.921097088777261e-06, |
| "loss": 1.5232961177825928, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.6972477064220184, |
| "grad_norm": 0.5421835780143738, |
| "learning_rate": 4.906049741997119e-06, |
| "loss": 1.3370258808135986, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.7003058103975535, |
| "grad_norm": 0.28672778606414795, |
| "learning_rate": 4.8910091511244115e-06, |
| "loss": 1.5552886724472046, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.7033639143730888, |
| "grad_norm": 0.8609727025032043, |
| "learning_rate": 4.875975487238853e-06, |
| "loss": 1.6477062702178955, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.7064220183486238, |
| "grad_norm": 0.46577727794647217, |
| "learning_rate": 4.860948921341366e-06, |
| "loss": 1.3554713726043701, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.709480122324159, |
| "grad_norm": 0.4357546865940094, |
| "learning_rate": 4.845929624352136e-06, |
| "loss": 1.616469383239746, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.7125382262996942, |
| "grad_norm": 0.8016573786735535, |
| "learning_rate": 4.830917767108666e-06, |
| "loss": 1.4049677848815918, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.7155963302752295, |
| "grad_norm": 0.34570103883743286, |
| "learning_rate": 4.8159135203638394e-06, |
| "loss": 1.5350430011749268, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.7186544342507646, |
| "grad_norm": 0.6164813041687012, |
| "learning_rate": 4.800917054783971e-06, |
| "loss": 1.4737257957458496, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.7217125382262997, |
| "grad_norm": 0.30021098256111145, |
| "learning_rate": 4.785928540946869e-06, |
| "loss": 1.59697425365448, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.7247706422018347, |
| "grad_norm": 0.3294142782688141, |
| "learning_rate": 4.770948149339897e-06, |
| "loss": 1.6918811798095703, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.72782874617737, |
| "grad_norm": 0.33221927285194397, |
| "learning_rate": 4.755976050358026e-06, |
| "loss": 1.581977128982544, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.7308868501529053, |
| "grad_norm": 0.27995747327804565, |
| "learning_rate": 4.741012414301907e-06, |
| "loss": 1.42479407787323, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.7339449541284404, |
| "grad_norm": 0.4526294767856598, |
| "learning_rate": 4.726057411375927e-06, |
| "loss": 1.5270183086395264, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.7370030581039755, |
| "grad_norm": 0.6458525657653809, |
| "learning_rate": 4.711111211686279e-06, |
| "loss": 1.5350821018218994, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.7400611620795106, |
| "grad_norm": 0.40516841411590576, |
| "learning_rate": 4.6961739852390175e-06, |
| "loss": 1.5310497283935547, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.7431192660550459, |
| "grad_norm": 1.3104746341705322, |
| "learning_rate": 4.681245901938134e-06, |
| "loss": 1.5385562181472778, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.7461773700305812, |
| "grad_norm": 0.40381914377212524, |
| "learning_rate": 4.666327131583621e-06, |
| "loss": 1.5392662286758423, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.7492354740061162, |
| "grad_norm": 0.8844152688980103, |
| "learning_rate": 4.65141784386954e-06, |
| "loss": 1.333682894706726, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.7522935779816513, |
| "grad_norm": 0.423922061920166, |
| "learning_rate": 4.636518208382091e-06, |
| "loss": 1.4100391864776611, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.7553516819571864, |
| "grad_norm": 0.3589678406715393, |
| "learning_rate": 4.621628394597687e-06, |
| "loss": 1.341862440109253, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.7584097859327217, |
| "grad_norm": 0.6498292088508606, |
| "learning_rate": 4.606748571881018e-06, |
| "loss": 1.4297010898590088, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.761467889908257, |
| "grad_norm": 0.5506405234336853, |
| "learning_rate": 4.59187890948314e-06, |
| "loss": 1.4309487342834473, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.764525993883792, |
| "grad_norm": 0.45955854654312134, |
| "learning_rate": 4.577019576539527e-06, |
| "loss": 1.2851155996322632, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.7675840978593271, |
| "grad_norm": 0.28625011444091797, |
| "learning_rate": 4.562170742068175e-06, |
| "loss": 0.9397743940353394, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.7706422018348624, |
| "grad_norm": 0.22773736715316772, |
| "learning_rate": 4.547332574967653e-06, |
| "loss": 1.237460732460022, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.7737003058103975, |
| "grad_norm": 0.25427719950675964, |
| "learning_rate": 4.5325052440151985e-06, |
| "loss": 1.3028910160064697, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.7767584097859328, |
| "grad_norm": 0.2875189781188965, |
| "learning_rate": 4.517688917864794e-06, |
| "loss": 1.3547457456588745, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.7798165137614679, |
| "grad_norm": 0.21899199485778809, |
| "learning_rate": 4.502883765045244e-06, |
| "loss": 1.36411714553833, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.782874617737003, |
| "grad_norm": 0.21183030307292938, |
| "learning_rate": 4.488089953958264e-06, |
| "loss": 1.4323028326034546, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.7859327217125383, |
| "grad_norm": 0.22526955604553223, |
| "learning_rate": 4.473307652876563e-06, |
| "loss": 1.3429040908813477, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.7889908256880735, |
| "grad_norm": 0.266107439994812, |
| "learning_rate": 4.458537029941926e-06, |
| "loss": 1.3663442134857178, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.7920489296636086, |
| "grad_norm": 0.490496963262558, |
| "learning_rate": 4.4437782531633074e-06, |
| "loss": 1.3354597091674805, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.7951070336391437, |
| "grad_norm": 0.1854841560125351, |
| "learning_rate": 4.429031490414919e-06, |
| "loss": 1.3446393013000488, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.7981651376146788, |
| "grad_norm": 0.1960364729166031, |
| "learning_rate": 4.414296909434311e-06, |
| "loss": 1.3029416799545288, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.801223241590214, |
| "grad_norm": 0.35048866271972656, |
| "learning_rate": 4.399574677820481e-06, |
| "loss": 1.348449945449829, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.8042813455657494, |
| "grad_norm": 0.3793323040008545, |
| "learning_rate": 4.384864963031952e-06, |
| "loss": 1.297593593597412, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.8073394495412844, |
| "grad_norm": 0.14626124501228333, |
| "learning_rate": 4.370167932384873e-06, |
| "loss": 1.1695170402526855, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.8103975535168195, |
| "grad_norm": 0.16865181922912598, |
| "learning_rate": 4.355483753051125e-06, |
| "loss": 1.2123092412948608, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.8134556574923546, |
| "grad_norm": 0.1931789070367813, |
| "learning_rate": 4.340812592056401e-06, |
| "loss": 0.9932126998901367, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.81651376146789, |
| "grad_norm": 0.2547837793827057, |
| "learning_rate": 4.326154616278326e-06, |
| "loss": 1.2431546449661255, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.8195718654434252, |
| "grad_norm": 0.23825769126415253, |
| "learning_rate": 4.311509992444539e-06, |
| "loss": 1.286515712738037, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.8226299694189603, |
| "grad_norm": 0.25244706869125366, |
| "learning_rate": 4.296878887130819e-06, |
| "loss": 1.3000450134277344, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.8256880733944953, |
| "grad_norm": 0.23451480269432068, |
| "learning_rate": 4.282261466759165e-06, |
| "loss": 1.2664532661437988, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.8287461773700304, |
| "grad_norm": 0.2735919952392578, |
| "learning_rate": 4.267657897595929e-06, |
| "loss": 1.288360834121704, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.8318042813455657, |
| "grad_norm": 0.18107269704341888, |
| "learning_rate": 4.253068345749903e-06, |
| "loss": 1.2625651359558105, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.834862385321101, |
| "grad_norm": 0.2293253242969513, |
| "learning_rate": 4.238492977170439e-06, |
| "loss": 1.234043836593628, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.837920489296636, |
| "grad_norm": 0.27160146832466125, |
| "learning_rate": 4.223931957645566e-06, |
| "loss": 1.300539493560791, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.8409785932721712, |
| "grad_norm": 0.25112462043762207, |
| "learning_rate": 4.2093854528000955e-06, |
| "loss": 1.2719401121139526, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.8440366972477065, |
| "grad_norm": 0.33997592329978943, |
| "learning_rate": 4.194853628093742e-06, |
| "loss": 1.2453508377075195, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.8470948012232415, |
| "grad_norm": 0.6576793789863586, |
| "learning_rate": 4.180336648819242e-06, |
| "loss": 1.233917236328125, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.8501529051987768, |
| "grad_norm": 0.26551222801208496, |
| "learning_rate": 4.165834680100469e-06, |
| "loss": 1.2595276832580566, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.853211009174312, |
| "grad_norm": 0.2170596420764923, |
| "learning_rate": 4.151347886890562e-06, |
| "loss": 1.2505378723144531, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.856269113149847, |
| "grad_norm": 0.2974804937839508, |
| "learning_rate": 4.1368764339700404e-06, |
| "loss": 1.2092756032943726, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.8593272171253823, |
| "grad_norm": 0.2567199468612671, |
| "learning_rate": 4.1224204859449425e-06, |
| "loss": 1.2698951959609985, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.8623853211009176, |
| "grad_norm": 0.23152267932891846, |
| "learning_rate": 4.107980207244937e-06, |
| "loss": 1.3027379512786865, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.8654434250764527, |
| "grad_norm": 0.26830926537513733, |
| "learning_rate": 4.093555762121469e-06, |
| "loss": 1.308929443359375, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.8685015290519877, |
| "grad_norm": 0.2566030025482178, |
| "learning_rate": 4.07914731464588e-06, |
| "loss": 1.2964577674865723, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.8715596330275228, |
| "grad_norm": 0.4025701582431793, |
| "learning_rate": 4.064755028707546e-06, |
| "loss": 1.31220543384552, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.8746177370030581, |
| "grad_norm": 0.25386303663253784, |
| "learning_rate": 4.0503790680120136e-06, |
| "loss": 1.299830436706543, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.8776758409785934, |
| "grad_norm": 0.39947405457496643, |
| "learning_rate": 4.036019596079136e-06, |
| "loss": 1.3202039003372192, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.8807339449541285, |
| "grad_norm": 0.23179592192173004, |
| "learning_rate": 4.021676776241218e-06, |
| "loss": 1.2405881881713867, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.8837920489296636, |
| "grad_norm": 0.48796483874320984, |
| "learning_rate": 4.007350771641151e-06, |
| "loss": 1.288329005241394, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.8868501529051986, |
| "grad_norm": 0.26645490527153015, |
| "learning_rate": 3.993041745230562e-06, |
| "loss": 1.2443333864212036, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.889908256880734, |
| "grad_norm": 0.19715459644794464, |
| "learning_rate": 3.978749859767961e-06, |
| "loss": 1.2754254341125488, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.8929663608562692, |
| "grad_norm": 0.2424282431602478, |
| "learning_rate": 3.9644752778168836e-06, |
| "loss": 1.2853577136993408, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.8960244648318043, |
| "grad_norm": 0.22451399266719818, |
| "learning_rate": 3.950218161744049e-06, |
| "loss": 1.308832049369812, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.8990825688073394, |
| "grad_norm": 0.38970160484313965, |
| "learning_rate": 3.935978673717512e-06, |
| "loss": 1.2945680618286133, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.9021406727828745, |
| "grad_norm": 0.22287186980247498, |
| "learning_rate": 3.921756975704809e-06, |
| "loss": 1.2276027202606201, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.9051987767584098, |
| "grad_norm": 0.2538350820541382, |
| "learning_rate": 3.9075532294711326e-06, |
| "loss": 1.2546557188034058, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.908256880733945, |
| "grad_norm": 0.19810384511947632, |
| "learning_rate": 3.893367596577475e-06, |
| "loss": 1.2940235137939453, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.9113149847094801, |
| "grad_norm": 0.20586298406124115, |
| "learning_rate": 3.8792002383788044e-06, |
| "loss": 1.3136601448059082, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.9143730886850152, |
| "grad_norm": 0.2770041227340698, |
| "learning_rate": 3.865051316022215e-06, |
| "loss": 1.2952957153320312, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.9174311926605505, |
| "grad_norm": 0.22728121280670166, |
| "learning_rate": 3.85092099044511e-06, |
| "loss": 1.271630048751831, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.9204892966360856, |
| "grad_norm": 0.1984010934829712, |
| "learning_rate": 3.836809422373354e-06, |
| "loss": 1.2360022068023682, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.9235474006116209, |
| "grad_norm": 0.24555295705795288, |
| "learning_rate": 3.822716772319463e-06, |
| "loss": 1.271683692932129, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.926605504587156, |
| "grad_norm": 0.20771312713623047, |
| "learning_rate": 3.8086432005807616e-06, |
| "loss": 1.2962419986724854, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.929663608562691, |
| "grad_norm": 0.268265962600708, |
| "learning_rate": 3.794588867237574e-06, |
| "loss": 1.2458467483520508, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.9327217125382263, |
| "grad_norm": 0.3802253007888794, |
| "learning_rate": 3.780553932151392e-06, |
| "loss": 1.2733559608459473, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.9357798165137616, |
| "grad_norm": 0.6309070587158203, |
| "learning_rate": 3.766538554963062e-06, |
| "loss": 1.270596981048584, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.9388379204892967, |
| "grad_norm": 0.3053569793701172, |
| "learning_rate": 3.752542895090969e-06, |
| "loss": 1.3194211721420288, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.9418960244648318, |
| "grad_norm": 0.21923166513442993, |
| "learning_rate": 3.7385671117292245e-06, |
| "loss": 1.3323618173599243, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.9449541284403669, |
| "grad_norm": 0.2166883647441864, |
| "learning_rate": 3.72461136384585e-06, |
| "loss": 1.2965784072875977, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.9480122324159022, |
| "grad_norm": 0.2825508117675781, |
| "learning_rate": 3.710675810180977e-06, |
| "loss": 1.3159446716308594, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.9510703363914375, |
| "grad_norm": 0.299638956785202, |
| "learning_rate": 3.696760609245035e-06, |
| "loss": 1.2833199501037598, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.9541284403669725, |
| "grad_norm": 0.2223178744316101, |
| "learning_rate": 3.68286591931695e-06, |
| "loss": 1.22653329372406, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.9571865443425076, |
| "grad_norm": 0.2592408359050751, |
| "learning_rate": 3.668991898442347e-06, |
| "loss": 1.2542335987091064, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.9602446483180427, |
| "grad_norm": 0.2755810618400574, |
| "learning_rate": 3.6551387044317464e-06, |
| "loss": 1.2745262384414673, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.963302752293578, |
| "grad_norm": 0.21057268977165222, |
| "learning_rate": 3.6413064948587773e-06, |
| "loss": 1.2521765232086182, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.9663608562691133, |
| "grad_norm": 0.34427741169929504, |
| "learning_rate": 3.6274954270583797e-06, |
| "loss": 1.263521432876587, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.9694189602446484, |
| "grad_norm": 0.2196524441242218, |
| "learning_rate": 3.6137056581250142e-06, |
| "loss": 1.3154864311218262, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.9724770642201834, |
| "grad_norm": 0.3191309869289398, |
| "learning_rate": 3.599937344910872e-06, |
| "loss": 1.2999801635742188, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.9755351681957185, |
| "grad_norm": 0.22587168216705322, |
| "learning_rate": 3.5861906440241057e-06, |
| "loss": 1.3176116943359375, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.9785932721712538, |
| "grad_norm": 0.2769485414028168, |
| "learning_rate": 3.5724657118270344e-06, |
| "loss": 1.273116111755371, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.981651376146789, |
| "grad_norm": 0.3299882411956787, |
| "learning_rate": 3.558762704434361e-06, |
| "loss": 1.268465280532837, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.9847094801223242, |
| "grad_norm": 0.26859885454177856, |
| "learning_rate": 3.545081777711412e-06, |
| "loss": 1.2919847965240479, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.9877675840978593, |
| "grad_norm": 0.9502137899398804, |
| "learning_rate": 3.5314230872723564e-06, |
| "loss": 1.342604160308838, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.9908256880733946, |
| "grad_norm": 0.2677958011627197, |
| "learning_rate": 3.5177867884784334e-06, |
| "loss": 1.3786706924438477, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.9938837920489296, |
| "grad_norm": 0.40644171833992004, |
| "learning_rate": 3.504173036436186e-06, |
| "loss": 1.7326993942260742, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.996941896024465, |
| "grad_norm": 0.45419755578041077, |
| "learning_rate": 3.4905819859957002e-06, |
| "loss": 1.7214076519012451, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.9430392980575562, |
| "learning_rate": 3.4770137917488454e-06, |
| "loss": 1.8467901945114136, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.003058103975535, |
| "grad_norm": 0.26824504137039185, |
| "learning_rate": 3.463468608027505e-06, |
| "loss": 1.4361066818237305, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.00611620795107, |
| "grad_norm": 0.22578075528144836, |
| "learning_rate": 3.4499465889018337e-06, |
| "loss": 1.394030213356018, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.0091743119266057, |
| "grad_norm": 0.26776137948036194, |
| "learning_rate": 3.4364478881785002e-06, |
| "loss": 1.4127156734466553, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.0122324159021407, |
| "grad_norm": 0.3707635998725891, |
| "learning_rate": 3.4229726593989353e-06, |
| "loss": 1.340601921081543, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.015290519877676, |
| "grad_norm": 0.23890726268291473, |
| "learning_rate": 3.409521055837586e-06, |
| "loss": 1.5300512313842773, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.018348623853211, |
| "grad_norm": 0.21163959801197052, |
| "learning_rate": 3.396093230500176e-06, |
| "loss": 1.4162603616714478, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.021406727828746, |
| "grad_norm": 0.3320009112358093, |
| "learning_rate": 3.3826893361219614e-06, |
| "loss": 1.3640984296798706, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.0244648318042815, |
| "grad_norm": 0.2645728886127472, |
| "learning_rate": 3.3693095251659975e-06, |
| "loss": 1.4446080923080444, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.0275229357798166, |
| "grad_norm": 0.2824868857860565, |
| "learning_rate": 3.3559539498213965e-06, |
| "loss": 1.3105710744857788, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.0305810397553516, |
| "grad_norm": 0.23126038908958435, |
| "learning_rate": 3.342622762001606e-06, |
| "loss": 1.3857829570770264, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.0336391437308867, |
| "grad_norm": 0.3670974671840668, |
| "learning_rate": 3.3293161133426777e-06, |
| "loss": 1.496924638748169, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.036697247706422, |
| "grad_norm": 0.3528394401073456, |
| "learning_rate": 3.3160341552015375e-06, |
| "loss": 1.4135003089904785, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.0397553516819573, |
| "grad_norm": 0.20478151738643646, |
| "learning_rate": 3.3027770386542706e-06, |
| "loss": 1.2156240940093994, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.0428134556574924, |
| "grad_norm": 0.46617865562438965, |
| "learning_rate": 3.289544914494403e-06, |
| "loss": 1.3763898611068726, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.0458715596330275, |
| "grad_norm": 0.3884037733078003, |
| "learning_rate": 3.276337933231179e-06, |
| "loss": 1.622403860092163, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.0489296636085625, |
| "grad_norm": 0.25180479884147644, |
| "learning_rate": 3.2631562450878597e-06, |
| "loss": 1.2860331535339355, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.051987767584098, |
| "grad_norm": 0.3756599426269531, |
| "learning_rate": 3.2500000000000015e-06, |
| "loss": 1.4189289808273315, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.055045871559633, |
| "grad_norm": 0.32630693912506104, |
| "learning_rate": 3.236869347613764e-06, |
| "loss": 1.308931827545166, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.058103975535168, |
| "grad_norm": 0.28512176871299744, |
| "learning_rate": 3.2237644372842016e-06, |
| "loss": 1.2988288402557373, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.0611620795107033, |
| "grad_norm": 0.19952069222927094, |
| "learning_rate": 3.2106854180735625e-06, |
| "loss": 1.3092859983444214, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.0642201834862384, |
| "grad_norm": 0.24031268060207367, |
| "learning_rate": 3.1976324387495948e-06, |
| "loss": 1.3389842510223389, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.067278287461774, |
| "grad_norm": 0.26569297909736633, |
| "learning_rate": 3.1846056477838572e-06, |
| "loss": 1.5241750478744507, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.070336391437309, |
| "grad_norm": 0.5251048803329468, |
| "learning_rate": 3.171605193350028e-06, |
| "loss": 1.542860507965088, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.073394495412844, |
| "grad_norm": 0.34643858671188354, |
| "learning_rate": 3.158631223322216e-06, |
| "loss": 1.3612843751907349, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.076452599388379, |
| "grad_norm": 0.2934923470020294, |
| "learning_rate": 3.145683885273288e-06, |
| "loss": 1.355604648590088, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.079510703363914, |
| "grad_norm": 0.743224024772644, |
| "learning_rate": 3.1327633264731806e-06, |
| "loss": 1.341210126876831, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.0825688073394497, |
| "grad_norm": 0.32269051671028137, |
| "learning_rate": 3.11986969388723e-06, |
| "loss": 1.4118154048919678, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.085626911314985, |
| "grad_norm": 0.29159843921661377, |
| "learning_rate": 3.1070031341744983e-06, |
| "loss": 1.389265775680542, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.08868501529052, |
| "grad_norm": 0.24911250174045563, |
| "learning_rate": 3.094163793686108e-06, |
| "loss": 1.422662377357483, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.091743119266055, |
| "grad_norm": 0.21826767921447754, |
| "learning_rate": 3.0813518184635737e-06, |
| "loss": 1.4053363800048828, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.09480122324159, |
| "grad_norm": 0.3076784610748291, |
| "learning_rate": 3.0685673542371465e-06, |
| "loss": 1.283433198928833, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.0978593272171255, |
| "grad_norm": 0.17591321468353271, |
| "learning_rate": 3.0558105464241466e-06, |
| "loss": 1.237450361251831, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.1009174311926606, |
| "grad_norm": 0.2663421332836151, |
| "learning_rate": 3.0430815401273206e-06, |
| "loss": 1.3944424390792847, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.1039755351681957, |
| "grad_norm": 0.26904943585395813, |
| "learning_rate": 3.030380480133186e-06, |
| "loss": 1.5187671184539795, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.1070336391437308, |
| "grad_norm": 0.6649749279022217, |
| "learning_rate": 3.017707510910378e-06, |
| "loss": 1.3504502773284912, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.1100917431192663, |
| "grad_norm": 0.37516942620277405, |
| "learning_rate": 3.0050627766080188e-06, |
| "loss": 1.5420799255371094, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.1131498470948014, |
| "grad_norm": 0.342439204454422, |
| "learning_rate": 2.9924464210540717e-06, |
| "loss": 1.5547534227371216, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.1162079510703364, |
| "grad_norm": 0.48497647047042847, |
| "learning_rate": 2.979858587753698e-06, |
| "loss": 1.3153679370880127, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.1192660550458715, |
| "grad_norm": 0.39512813091278076, |
| "learning_rate": 2.96729941988764e-06, |
| "loss": 1.2663487195968628, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.1223241590214066, |
| "grad_norm": 0.3283194899559021, |
| "learning_rate": 2.9547690603105774e-06, |
| "loss": 1.4247238636016846, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.1253822629969417, |
| "grad_norm": 0.3506661355495453, |
| "learning_rate": 2.942267651549513e-06, |
| "loss": 1.2393386363983154, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.128440366972477, |
| "grad_norm": 0.3594140112400055, |
| "learning_rate": 2.9297953358021487e-06, |
| "loss": 1.317380666732788, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.1314984709480123, |
| "grad_norm": 0.5971735715866089, |
| "learning_rate": 2.9173522549352608e-06, |
| "loss": 1.2773442268371582, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.1345565749235473, |
| "grad_norm": 0.3666265606880188, |
| "learning_rate": 2.9049385504830987e-06, |
| "loss": 1.34925377368927, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.1376146788990824, |
| "grad_norm": 0.31561410427093506, |
| "learning_rate": 2.892554363645766e-06, |
| "loss": 1.2674505710601807, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.140672782874618, |
| "grad_norm": 0.2038232684135437, |
| "learning_rate": 2.880199835287618e-06, |
| "loss": 1.3169916868209839, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.143730886850153, |
| "grad_norm": 0.25303685665130615, |
| "learning_rate": 2.867875105935658e-06, |
| "loss": 1.4587633609771729, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.146788990825688, |
| "grad_norm": 0.31143543124198914, |
| "learning_rate": 2.8555803157779384e-06, |
| "loss": 1.3396885395050049, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.149847094801223, |
| "grad_norm": 0.2281101942062378, |
| "learning_rate": 2.8433156046619705e-06, |
| "loss": 1.2936108112335205, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.1529051987767582, |
| "grad_norm": 0.3648523688316345, |
| "learning_rate": 2.831081112093129e-06, |
| "loss": 1.5100679397583008, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.1559633027522938, |
| "grad_norm": 0.278677374124527, |
| "learning_rate": 2.8188769772330637e-06, |
| "loss": 1.3869754076004028, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.159021406727829, |
| "grad_norm": 0.21437983214855194, |
| "learning_rate": 2.806703338898123e-06, |
| "loss": 1.3129749298095703, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.162079510703364, |
| "grad_norm": 0.24729043245315552, |
| "learning_rate": 2.794560335557771e-06, |
| "loss": 1.4099204540252686, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.165137614678899, |
| "grad_norm": 0.3120039701461792, |
| "learning_rate": 2.7824481053330154e-06, |
| "loss": 1.3897459506988525, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.168195718654434, |
| "grad_norm": 0.4525415897369385, |
| "learning_rate": 2.770366785994827e-06, |
| "loss": 1.445647954940796, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.1712538226299696, |
| "grad_norm": 0.4266716241836548, |
| "learning_rate": 2.758316514962585e-06, |
| "loss": 1.3233726024627686, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.1743119266055047, |
| "grad_norm": 0.28266647458076477, |
| "learning_rate": 2.7462974293025112e-06, |
| "loss": 1.4238274097442627, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.1773700305810397, |
| "grad_norm": 0.3248072564601898, |
| "learning_rate": 2.7343096657261e-06, |
| "loss": 1.3104677200317383, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.180428134556575, |
| "grad_norm": 0.3584449887275696, |
| "learning_rate": 2.7223533605885784e-06, |
| "loss": 1.6277508735656738, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.18348623853211, |
| "grad_norm": 0.35764527320861816, |
| "learning_rate": 2.710428649887348e-06, |
| "loss": 1.3882687091827393, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.1865443425076454, |
| "grad_norm": 0.24804551899433136, |
| "learning_rate": 2.6985356692604336e-06, |
| "loss": 1.4513651132583618, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.1896024464831805, |
| "grad_norm": 0.2202014923095703, |
| "learning_rate": 2.686674553984951e-06, |
| "loss": 1.4342420101165771, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.1926605504587156, |
| "grad_norm": 0.36250677704811096, |
| "learning_rate": 2.6748454389755576e-06, |
| "loss": 1.394620656967163, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.1957186544342506, |
| "grad_norm": 0.3232296109199524, |
| "learning_rate": 2.6630484587829265e-06, |
| "loss": 1.3978071212768555, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.198776758409786, |
| "grad_norm": 0.4420628547668457, |
| "learning_rate": 2.651283747592211e-06, |
| "loss": 1.4031468629837036, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.2018348623853212, |
| "grad_norm": 0.6229142546653748, |
| "learning_rate": 2.639551439221516e-06, |
| "loss": 1.3914484977722168, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.2048929663608563, |
| "grad_norm": 0.3233772814273834, |
| "learning_rate": 2.627851667120387e-06, |
| "loss": 1.476043701171875, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.2079510703363914, |
| "grad_norm": 0.35107681155204773, |
| "learning_rate": 2.6161845643682763e-06, |
| "loss": 1.407777190208435, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.2110091743119265, |
| "grad_norm": 0.3123028874397278, |
| "learning_rate": 2.6045502636730457e-06, |
| "loss": 1.3102259635925293, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.214067278287462, |
| "grad_norm": 0.2534146308898926, |
| "learning_rate": 2.5929488973694406e-06, |
| "loss": 1.2788276672363281, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.217125382262997, |
| "grad_norm": 0.24462664127349854, |
| "learning_rate": 2.581380597417599e-06, |
| "loss": 1.3362743854522705, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.220183486238532, |
| "grad_norm": 0.2978283166885376, |
| "learning_rate": 2.569845495401542e-06, |
| "loss": 1.2902576923370361, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.223241590214067, |
| "grad_norm": 0.299277126789093, |
| "learning_rate": 2.5583437225276818e-06, |
| "loss": 1.3449206352233887, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.2262996941896023, |
| "grad_norm": 0.36601486802101135, |
| "learning_rate": 2.546875409623324e-06, |
| "loss": 1.3038407564163208, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.229357798165138, |
| "grad_norm": 0.42299339175224304, |
| "learning_rate": 2.5354406871351833e-06, |
| "loss": 1.5554304122924805, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.232415902140673, |
| "grad_norm": 0.32388123869895935, |
| "learning_rate": 2.5240396851279043e-06, |
| "loss": 1.5746049880981445, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.235474006116208, |
| "grad_norm": 0.39095836877822876, |
| "learning_rate": 2.5126725332825675e-06, |
| "loss": 1.6094728708267212, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.238532110091743, |
| "grad_norm": 0.5842258930206299, |
| "learning_rate": 2.501339360895231e-06, |
| "loss": 1.5279463529586792, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.241590214067278, |
| "grad_norm": 0.3429890275001526, |
| "learning_rate": 2.4900402968754504e-06, |
| "loss": 1.5856099128723145, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.2446483180428136, |
| "grad_norm": 0.35519224405288696, |
| "learning_rate": 2.4787754697448153e-06, |
| "loss": 1.4757394790649414, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.2477064220183487, |
| "grad_norm": 0.46203580498695374, |
| "learning_rate": 2.4675450076354822e-06, |
| "loss": 1.584846019744873, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.2507645259938838, |
| "grad_norm": 0.8099899888038635, |
| "learning_rate": 2.4563490382887267e-06, |
| "loss": 1.367172360420227, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.253822629969419, |
| "grad_norm": 0.7287035584449768, |
| "learning_rate": 2.4451876890534847e-06, |
| "loss": 1.492293357849121, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.2568807339449544, |
| "grad_norm": 0.3203519284725189, |
| "learning_rate": 2.4340610868849e-06, |
| "loss": 1.2751667499542236, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.2599388379204894, |
| "grad_norm": 0.6493098139762878, |
| "learning_rate": 2.4229693583428916e-06, |
| "loss": 1.4823472499847412, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.2629969418960245, |
| "grad_norm": 0.4101910889148712, |
| "learning_rate": 2.4119126295906997e-06, |
| "loss": 1.09395170211792, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.2660550458715596, |
| "grad_norm": 0.4682796597480774, |
| "learning_rate": 2.400891026393464e-06, |
| "loss": 1.0601507425308228, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.2691131498470947, |
| "grad_norm": 0.5146844387054443, |
| "learning_rate": 2.3899046741167868e-06, |
| "loss": 1.2724342346191406, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.2721712538226297, |
| "grad_norm": 0.8610156178474426, |
| "learning_rate": 2.3789536977253034e-06, |
| "loss": 1.3352521657943726, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.2752293577981653, |
| "grad_norm": 1.053831696510315, |
| "learning_rate": 2.3680382217812685e-06, |
| "loss": 1.4391016960144043, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.2782874617737003, |
| "grad_norm": 0.6413374543190002, |
| "learning_rate": 2.3571583704431355e-06, |
| "loss": 1.3907897472381592, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.2813455657492354, |
| "grad_norm": 0.30044737458229065, |
| "learning_rate": 2.346314267464145e-06, |
| "loss": 1.1618599891662598, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.2844036697247705, |
| "grad_norm": 0.3427642285823822, |
| "learning_rate": 2.3355060361909134e-06, |
| "loss": 1.134230375289917, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.287461773700306, |
| "grad_norm": 0.28166523575782776, |
| "learning_rate": 2.3247337995620363e-06, |
| "loss": 1.357274055480957, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.290519877675841, |
| "grad_norm": 0.7598418593406677, |
| "learning_rate": 2.313997680106686e-06, |
| "loss": 1.2663555145263672, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.293577981651376, |
| "grad_norm": 1.0048569440841675, |
| "learning_rate": 2.3032977999432205e-06, |
| "loss": 1.2259790897369385, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.2966360856269112, |
| "grad_norm": 0.3067741096019745, |
| "learning_rate": 2.2926342807777886e-06, |
| "loss": 1.435164213180542, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.2996941896024463, |
| "grad_norm": 0.5623937249183655, |
| "learning_rate": 2.2820072439029524e-06, |
| "loss": 1.4023568630218506, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.302752293577982, |
| "grad_norm": 0.3359718918800354, |
| "learning_rate": 2.271416810196308e-06, |
| "loss": 1.1277801990509033, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.305810397553517, |
| "grad_norm": 0.3305533528327942, |
| "learning_rate": 2.2608631001190994e-06, |
| "loss": 1.3414134979248047, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.308868501529052, |
| "grad_norm": 0.28481531143188477, |
| "learning_rate": 2.2503462337148642e-06, |
| "loss": 1.4879052639007568, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.311926605504587, |
| "grad_norm": 0.28595951199531555, |
| "learning_rate": 2.239866330608057e-06, |
| "loss": 1.6209688186645508, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.314984709480122, |
| "grad_norm": 0.29558923840522766, |
| "learning_rate": 2.2294235100026933e-06, |
| "loss": 1.6481235027313232, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.3180428134556577, |
| "grad_norm": 0.5758782029151917, |
| "learning_rate": 2.21901789068099e-06, |
| "loss": 1.7679166793823242, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.3211009174311927, |
| "grad_norm": 0.3111439347267151, |
| "learning_rate": 2.2086495910020192e-06, |
| "loss": 1.3151183128356934, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.324159021406728, |
| "grad_norm": 0.44918501377105713, |
| "learning_rate": 2.1983187289003587e-06, |
| "loss": 1.3933916091918945, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.327217125382263, |
| "grad_norm": 0.3173042833805084, |
| "learning_rate": 2.188025421884754e-06, |
| "loss": 1.240437388420105, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.330275229357798, |
| "grad_norm": 0.2350539118051529, |
| "learning_rate": 2.1777697870367713e-06, |
| "loss": 1.1647779941558838, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.3137843906879425, |
| "learning_rate": 2.1675519410094803e-06, |
| "loss": 1.5445265769958496, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.3363914373088686, |
| "grad_norm": 0.5268841981887817, |
| "learning_rate": 2.157372000026119e-06, |
| "loss": 1.444595217704773, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.3394495412844036, |
| "grad_norm": 0.3506692349910736, |
| "learning_rate": 2.1472300798787746e-06, |
| "loss": 1.6354224681854248, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.3425076452599387, |
| "grad_norm": 0.3233583867549896, |
| "learning_rate": 2.1371262959270594e-06, |
| "loss": 1.1021732091903687, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.3455657492354742, |
| "grad_norm": 0.29296091198921204, |
| "learning_rate": 2.1270607630968104e-06, |
| "loss": 1.3453254699707031, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.3486238532110093, |
| "grad_norm": 0.3317727744579315, |
| "learning_rate": 2.1170335958787736e-06, |
| "loss": 1.607575535774231, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.3516819571865444, |
| "grad_norm": 0.2295382171869278, |
| "learning_rate": 2.1070449083273047e-06, |
| "loss": 1.3497262001037598, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.3547400611620795, |
| "grad_norm": 0.4568946957588196, |
| "learning_rate": 2.0970948140590672e-06, |
| "loss": 1.509822130203247, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.3577981651376145, |
| "grad_norm": 0.34416595101356506, |
| "learning_rate": 2.08718342625175e-06, |
| "loss": 1.385573148727417, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.3608562691131496, |
| "grad_norm": 0.33610644936561584, |
| "learning_rate": 2.077310857642772e-06, |
| "loss": 1.3133833408355713, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.363914373088685, |
| "grad_norm": 0.332163006067276, |
| "learning_rate": 2.067477220527998e-06, |
| "loss": 1.3794035911560059, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.36697247706422, |
| "grad_norm": 0.46091410517692566, |
| "learning_rate": 2.05768262676047e-06, |
| "loss": 1.4221172332763672, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.3700305810397553, |
| "grad_norm": 0.2670794427394867, |
| "learning_rate": 2.0479271877491278e-06, |
| "loss": 1.2908828258514404, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.3730886850152904, |
| "grad_norm": 0.31927385926246643, |
| "learning_rate": 2.038211014457546e-06, |
| "loss": 1.3988337516784668, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.376146788990826, |
| "grad_norm": 0.4126211404800415, |
| "learning_rate": 2.028534217402667e-06, |
| "loss": 1.7016716003417969, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.379204892966361, |
| "grad_norm": 0.6094360947608948, |
| "learning_rate": 2.0188969066535484e-06, |
| "loss": 2.0326876640319824, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.382262996941896, |
| "grad_norm": 0.40967652201652527, |
| "learning_rate": 2.0092991918301106e-06, |
| "loss": 1.3301377296447754, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.385321100917431, |
| "grad_norm": 0.6155174970626831, |
| "learning_rate": 1.9997411821018885e-06, |
| "loss": 1.319265604019165, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.388379204892966, |
| "grad_norm": 0.4441206455230713, |
| "learning_rate": 1.990222986186786e-06, |
| "loss": 1.3922169208526611, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.3914373088685017, |
| "grad_norm": 0.5924298167228699, |
| "learning_rate": 1.980744712349849e-06, |
| "loss": 1.4741730690002441, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.3944954128440368, |
| "grad_norm": 0.42252296209335327, |
| "learning_rate": 1.9713064684020262e-06, |
| "loss": 1.4076108932495117, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.397553516819572, |
| "grad_norm": 0.36031708121299744, |
| "learning_rate": 1.9619083616989457e-06, |
| "loss": 1.278861403465271, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.400611620795107, |
| "grad_norm": 0.24064381420612335, |
| "learning_rate": 1.952550499139689e-06, |
| "loss": 1.19804048538208, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.4036697247706424, |
| "grad_norm": 0.18197159469127655, |
| "learning_rate": 1.9432329871655837e-06, |
| "loss": 1.12447988986969, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.4067278287461775, |
| "grad_norm": 0.30438297986984253, |
| "learning_rate": 1.933955931758988e-06, |
| "loss": 1.2643486261367798, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.4097859327217126, |
| "grad_norm": 0.5426669120788574, |
| "learning_rate": 1.9247194384420855e-06, |
| "loss": 1.504340410232544, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.4128440366972477, |
| "grad_norm": 0.6118716597557068, |
| "learning_rate": 1.915523612275681e-06, |
| "loss": 1.5359920263290405, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.4159021406727827, |
| "grad_norm": 0.5290548801422119, |
| "learning_rate": 1.9063685578580137e-06, |
| "loss": 1.5219250917434692, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.418960244648318, |
| "grad_norm": 0.348886638879776, |
| "learning_rate": 1.8972543793235626e-06, |
| "loss": 1.5620722770690918, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.4220183486238533, |
| "grad_norm": 0.4480542838573456, |
| "learning_rate": 1.8881811803418624e-06, |
| "loss": 1.3870704174041748, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.4250764525993884, |
| "grad_norm": 0.6594481468200684, |
| "learning_rate": 1.8791490641163218e-06, |
| "loss": 1.5246330499649048, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.4281345565749235, |
| "grad_norm": 0.48964548110961914, |
| "learning_rate": 1.870158133383055e-06, |
| "loss": 1.4073295593261719, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.4311926605504586, |
| "grad_norm": 0.40440455079078674, |
| "learning_rate": 1.8612084904097117e-06, |
| "loss": 1.329315423965454, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.434250764525994, |
| "grad_norm": 0.3714819550514221, |
| "learning_rate": 1.852300236994308e-06, |
| "loss": 1.3444490432739258, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.437308868501529, |
| "grad_norm": 0.5145377516746521, |
| "learning_rate": 1.8434334744640763e-06, |
| "loss": 1.5467479228973389, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.4403669724770642, |
| "grad_norm": 0.46002912521362305, |
| "learning_rate": 1.8346083036743104e-06, |
| "loss": 1.289878249168396, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.4434250764525993, |
| "grad_norm": 0.793483555316925, |
| "learning_rate": 1.8258248250072158e-06, |
| "loss": 1.4660496711730957, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.4464831804281344, |
| "grad_norm": 0.44911351799964905, |
| "learning_rate": 1.8170831383707683e-06, |
| "loss": 1.3652875423431396, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.44954128440367, |
| "grad_norm": 0.38207677006721497, |
| "learning_rate": 1.8083833431975805e-06, |
| "loss": 1.3762791156768799, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.452599388379205, |
| "grad_norm": 0.4357513189315796, |
| "learning_rate": 1.7997255384437695e-06, |
| "loss": 1.5232503414154053, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.45565749235474, |
| "grad_norm": 0.3423779308795929, |
| "learning_rate": 1.7911098225878309e-06, |
| "loss": 1.5271486043930054, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.458715596330275, |
| "grad_norm": 5.960415363311768, |
| "learning_rate": 1.7825362936295171e-06, |
| "loss": 1.3485842943191528, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.46177370030581, |
| "grad_norm": 0.36111417412757874, |
| "learning_rate": 1.774005049088725e-06, |
| "loss": 1.2900433540344238, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.4648318042813457, |
| "grad_norm": 0.33147767186164856, |
| "learning_rate": 1.7655161860043873e-06, |
| "loss": 1.4210761785507202, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.467889908256881, |
| "grad_norm": 0.3786766231060028, |
| "learning_rate": 1.7570698009333664e-06, |
| "loss": 1.370017409324646, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.470948012232416, |
| "grad_norm": 1.8267617225646973, |
| "learning_rate": 1.7486659899493537e-06, |
| "loss": 1.5153461694717407, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.474006116207951, |
| "grad_norm": 0.3199278712272644, |
| "learning_rate": 1.740304848641787e-06, |
| "loss": 1.3838684558868408, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.477064220183486, |
| "grad_norm": 0.3670620322227478, |
| "learning_rate": 1.731986472114751e-06, |
| "loss": 1.33723783493042, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.4801223241590216, |
| "grad_norm": 0.36861374974250793, |
| "learning_rate": 1.7237109549859043e-06, |
| "loss": 1.2932226657867432, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.4831804281345566, |
| "grad_norm": 0.34438320994377136, |
| "learning_rate": 1.7154783913853968e-06, |
| "loss": 1.42689049243927, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.4862385321100917, |
| "grad_norm": 0.23838122189044952, |
| "learning_rate": 1.7072888749548033e-06, |
| "loss": 1.4100431203842163, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.489296636085627, |
| "grad_norm": 0.46484264731407166, |
| "learning_rate": 1.6991424988460592e-06, |
| "loss": 1.3829045295715332, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.4923547400611623, |
| "grad_norm": 0.3008574843406677, |
| "learning_rate": 1.6910393557203964e-06, |
| "loss": 1.5693084001541138, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.4954128440366974, |
| "grad_norm": 0.37115153670310974, |
| "learning_rate": 1.6829795377472908e-06, |
| "loss": 1.7590757608413696, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.4984709480122325, |
| "grad_norm": 0.616698682308197, |
| "learning_rate": 1.674963136603417e-06, |
| "loss": 1.6397650241851807, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.5015290519877675, |
| "grad_norm": 0.384959876537323, |
| "learning_rate": 1.6669902434716046e-06, |
| "loss": 1.6299896240234375, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.5045871559633026, |
| "grad_norm": 0.8294275403022766, |
| "learning_rate": 1.6590609490397958e-06, |
| "loss": 1.5394856929779053, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.5076452599388377, |
| "grad_norm": 0.40894415974617004, |
| "learning_rate": 1.6511753435000205e-06, |
| "loss": 1.2182371616363525, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.510703363914373, |
| "grad_norm": 0.45905759930610657, |
| "learning_rate": 1.6433335165473686e-06, |
| "loss": 1.2023439407348633, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.5137614678899083, |
| "grad_norm": 0.38532376289367676, |
| "learning_rate": 1.635535557378968e-06, |
| "loss": 1.6095008850097656, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.5168195718654434, |
| "grad_norm": 1.44415283203125, |
| "learning_rate": 1.6277815546929688e-06, |
| "loss": 1.6082322597503662, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.5198776758409784, |
| "grad_norm": 0.5093996524810791, |
| "learning_rate": 1.6200715966875394e-06, |
| "loss": 1.7141090631484985, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.522935779816514, |
| "grad_norm": 0.5241023898124695, |
| "learning_rate": 1.6124057710598603e-06, |
| "loss": 1.6450610160827637, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.525993883792049, |
| "grad_norm": 0.49204516410827637, |
| "learning_rate": 1.6047841650051272e-06, |
| "loss": 1.6974513530731201, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.529051987767584, |
| "grad_norm": 0.8506813049316406, |
| "learning_rate": 1.5972068652155554e-06, |
| "loss": 1.5313912630081177, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.532110091743119, |
| "grad_norm": 0.33754727244377136, |
| "learning_rate": 1.5896739578794e-06, |
| "loss": 1.5209699869155884, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.5351681957186543, |
| "grad_norm": 0.7774704694747925, |
| "learning_rate": 1.5821855286799742e-06, |
| "loss": 1.4035563468933105, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.5382262996941893, |
| "grad_norm": 0.6433319449424744, |
| "learning_rate": 1.5747416627946673e-06, |
| "loss": 1.665273666381836, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.541284403669725, |
| "grad_norm": 0.6971220970153809, |
| "learning_rate": 1.5673424448939887e-06, |
| "loss": 1.5019344091415405, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.54434250764526, |
| "grad_norm": 0.40314802527427673, |
| "learning_rate": 1.5599879591405917e-06, |
| "loss": 1.1620054244995117, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.547400611620795, |
| "grad_norm": 0.48018017411231995, |
| "learning_rate": 1.552678289188326e-06, |
| "loss": 1.6923828125, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.5504587155963305, |
| "grad_norm": 0.4809359312057495, |
| "learning_rate": 1.545413518181283e-06, |
| "loss": 1.7656713724136353, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.5535168195718656, |
| "grad_norm": 0.40401753783226013, |
| "learning_rate": 1.5381937287528449e-06, |
| "loss": 1.8313161134719849, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.5565749235474007, |
| "grad_norm": 0.4581202268600464, |
| "learning_rate": 1.5310190030247546e-06, |
| "loss": 1.7572789192199707, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.5596330275229358, |
| "grad_norm": 0.9305920600891113, |
| "learning_rate": 1.5238894226061737e-06, |
| "loss": 1.7307026386260986, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.562691131498471, |
| "grad_norm": 0.47380930185317993, |
| "learning_rate": 1.5168050685927566e-06, |
| "loss": 1.5947740077972412, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.565749235474006, |
| "grad_norm": 1.2263463735580444, |
| "learning_rate": 1.5097660215657306e-06, |
| "loss": 1.4555588960647583, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.5688073394495414, |
| "grad_norm": 0.43118909001350403, |
| "learning_rate": 1.5027723615909745e-06, |
| "loss": 1.0147868394851685, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.5718654434250765, |
| "grad_norm": 0.5391921401023865, |
| "learning_rate": 1.4958241682181137e-06, |
| "loss": 1.0223249197006226, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.5749235474006116, |
| "grad_norm": 0.2522028386592865, |
| "learning_rate": 1.4889215204796082e-06, |
| "loss": 1.250197172164917, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.5779816513761467, |
| "grad_norm": 0.29159918427467346, |
| "learning_rate": 1.4820644968898605e-06, |
| "loss": 1.1835776567459106, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.581039755351682, |
| "grad_norm": 0.2946909964084625, |
| "learning_rate": 1.47525317544432e-06, |
| "loss": 1.1374409198760986, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.5840978593272173, |
| "grad_norm": 0.19036340713500977, |
| "learning_rate": 1.468487633618594e-06, |
| "loss": 1.1817882061004639, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.5871559633027523, |
| "grad_norm": 1.4873279333114624, |
| "learning_rate": 1.4617679483675673e-06, |
| "loss": 1.4171775579452515, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.5902140672782874, |
| "grad_norm": 0.32151684165000916, |
| "learning_rate": 1.4550941961245288e-06, |
| "loss": 1.3625459671020508, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.5932721712538225, |
| "grad_norm": 0.26637983322143555, |
| "learning_rate": 1.4484664528003026e-06, |
| "loss": 1.2058180570602417, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.5963302752293576, |
| "grad_norm": 0.5087877511978149, |
| "learning_rate": 1.4418847937823784e-06, |
| "loss": 1.425114631652832, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.599388379204893, |
| "grad_norm": 0.9368872046470642, |
| "learning_rate": 1.4353492939340618e-06, |
| "loss": 1.4749643802642822, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.602446483180428, |
| "grad_norm": 0.48912081122398376, |
| "learning_rate": 1.4288600275936184e-06, |
| "loss": 1.245436668395996, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.6055045871559632, |
| "grad_norm": 0.4674423635005951, |
| "learning_rate": 1.4224170685734303e-06, |
| "loss": 1.4404422044754028, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.6085626911314987, |
| "grad_norm": 0.7305318117141724, |
| "learning_rate": 1.416020490159152e-06, |
| "loss": 1.6482999324798584, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.611620795107034, |
| "grad_norm": 0.5728065371513367, |
| "learning_rate": 1.4096703651088848e-06, |
| "loss": 1.1557910442352295, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.614678899082569, |
| "grad_norm": 0.6479355096817017, |
| "learning_rate": 1.4033667656523405e-06, |
| "loss": 1.4093899726867676, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.617737003058104, |
| "grad_norm": 1.1274484395980835, |
| "learning_rate": 1.3971097634900262e-06, |
| "loss": 1.4923943281173706, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.620795107033639, |
| "grad_norm": 0.5374640822410583, |
| "learning_rate": 1.3908994297924275e-06, |
| "loss": 1.3800336122512817, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.623853211009174, |
| "grad_norm": 0.6038364171981812, |
| "learning_rate": 1.3847358351991945e-06, |
| "loss": 1.2194199562072754, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.6269113149847096, |
| "grad_norm": 0.7064008712768555, |
| "learning_rate": 1.3786190498183446e-06, |
| "loss": 0.8604775667190552, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.6299694189602447, |
| "grad_norm": 0.3798482418060303, |
| "learning_rate": 1.3725491432254627e-06, |
| "loss": 1.5459158420562744, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.63302752293578, |
| "grad_norm": 0.47553232312202454, |
| "learning_rate": 1.3665261844629053e-06, |
| "loss": 1.466538429260254, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.636085626911315, |
| "grad_norm": 0.3397771716117859, |
| "learning_rate": 1.360550242039024e-06, |
| "loss": 1.3562582731246948, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.6391437308868504, |
| "grad_norm": 0.282279908657074, |
| "learning_rate": 1.354621383927379e-06, |
| "loss": 1.4752657413482666, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.6422018348623855, |
| "grad_norm": 0.3183048963546753, |
| "learning_rate": 1.3487396775659691e-06, |
| "loss": 1.4154858589172363, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.6452599388379205, |
| "grad_norm": 0.4210142493247986, |
| "learning_rate": 1.3429051898564623e-06, |
| "loss": 1.3750901222229004, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.6483180428134556, |
| "grad_norm": 0.6870266795158386, |
| "learning_rate": 1.337117987163439e-06, |
| "loss": 1.5814931392669678, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.6513761467889907, |
| "grad_norm": 0.4824894964694977, |
| "learning_rate": 1.3313781353136329e-06, |
| "loss": 1.2281584739685059, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.6544342507645258, |
| "grad_norm": 0.2543982267379761, |
| "learning_rate": 1.3256856995951852e-06, |
| "loss": 1.0042641162872314, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.6574923547400613, |
| "grad_norm": 0.39150846004486084, |
| "learning_rate": 1.3200407447568985e-06, |
| "loss": 1.6282243728637695, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.6605504587155964, |
| "grad_norm": 0.43744921684265137, |
| "learning_rate": 1.3144433350075045e-06, |
| "loss": 1.419670820236206, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.6636085626911314, |
| "grad_norm": 0.5169599652290344, |
| "learning_rate": 1.3088935340149312e-06, |
| "loss": 1.5492973327636719, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.3686998188495636, |
| "learning_rate": 1.3033914049055776e-06, |
| "loss": 1.390296459197998, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.669724770642202, |
| "grad_norm": 0.3961811363697052, |
| "learning_rate": 1.2979370102636001e-06, |
| "loss": 1.6185352802276611, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.672782874617737, |
| "grad_norm": 0.4181622266769409, |
| "learning_rate": 1.2925304121301956e-06, |
| "loss": 1.47446608543396, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.675840978593272, |
| "grad_norm": 0.5175849199295044, |
| "learning_rate": 1.2871716720029001e-06, |
| "loss": 1.4941065311431885, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.6788990825688073, |
| "grad_norm": 0.4671924412250519, |
| "learning_rate": 1.2818608508348831e-06, |
| "loss": 1.3738720417022705, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.6819571865443423, |
| "grad_norm": 0.31229135394096375, |
| "learning_rate": 1.2765980090342638e-06, |
| "loss": 1.0343739986419678, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.6850152905198774, |
| "grad_norm": 0.5780667662620544, |
| "learning_rate": 1.2713832064634127e-06, |
| "loss": 1.4987692832946777, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.688073394495413, |
| "grad_norm": 0.29605942964553833, |
| "learning_rate": 1.2662165024382813e-06, |
| "loss": 1.4711230993270874, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.691131498470948, |
| "grad_norm": 0.4572795629501343, |
| "learning_rate": 1.2610979557277186e-06, |
| "loss": 1.4898228645324707, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.694189602446483, |
| "grad_norm": 0.5139583945274353, |
| "learning_rate": 1.2560276245528099e-06, |
| "loss": 1.4924449920654297, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.6972477064220186, |
| "grad_norm": 0.3455151319503784, |
| "learning_rate": 1.251005566586209e-06, |
| "loss": 1.3008229732513428, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.7003058103975537, |
| "grad_norm": 0.5034812092781067, |
| "learning_rate": 1.2460318389514868e-06, |
| "loss": 1.5259795188903809, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.7033639143730888, |
| "grad_norm": 0.55739825963974, |
| "learning_rate": 1.241106498222476e-06, |
| "loss": 1.610971212387085, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.706422018348624, |
| "grad_norm": 0.3922676146030426, |
| "learning_rate": 1.2362296004226327e-06, |
| "loss": 1.3188968896865845, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.709480122324159, |
| "grad_norm": 0.4953126311302185, |
| "learning_rate": 1.2314012010243973e-06, |
| "loss": 1.5828558206558228, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.712538226299694, |
| "grad_norm": 0.6791023015975952, |
| "learning_rate": 1.2266213549485638e-06, |
| "loss": 1.3703022003173828, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.7155963302752295, |
| "grad_norm": 0.37211811542510986, |
| "learning_rate": 1.2218901165636526e-06, |
| "loss": 1.504420280456543, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.7186544342507646, |
| "grad_norm": 0.2997111678123474, |
| "learning_rate": 1.2172075396852972e-06, |
| "loss": 1.442054271697998, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.7217125382262997, |
| "grad_norm": 0.3290131390094757, |
| "learning_rate": 1.212573677575627e-06, |
| "loss": 1.5728079080581665, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.7247706422018347, |
| "grad_norm": 0.3726375102996826, |
| "learning_rate": 1.2079885829426653e-06, |
| "loss": 1.6637623310089111, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.7278287461773703, |
| "grad_norm": 0.7502315640449524, |
| "learning_rate": 1.2034523079397264e-06, |
| "loss": 1.550297737121582, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.7308868501529053, |
| "grad_norm": 0.3677420914173126, |
| "learning_rate": 1.1989649041648244e-06, |
| "loss": 1.3913054466247559, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.7339449541284404, |
| "grad_norm": 0.6194299459457397, |
| "learning_rate": 1.1945264226600878e-06, |
| "loss": 1.49534010887146, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.7370030581039755, |
| "grad_norm": 0.42255425453186035, |
| "learning_rate": 1.1901369139111737e-06, |
| "loss": 1.5017262697219849, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.7400611620795106, |
| "grad_norm": 0.39475998282432556, |
| "learning_rate": 1.1857964278467003e-06, |
| "loss": 1.4985376596450806, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.7431192660550456, |
| "grad_norm": 0.4835125207901001, |
| "learning_rate": 1.1815050138376731e-06, |
| "loss": 1.513980746269226, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.746177370030581, |
| "grad_norm": 0.27400922775268555, |
| "learning_rate": 1.1772627206969286e-06, |
| "loss": 1.5117716789245605, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.7492354740061162, |
| "grad_norm": 0.35452115535736084, |
| "learning_rate": 1.1730695966785726e-06, |
| "loss": 1.3024158477783203, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.7522935779816513, |
| "grad_norm": 0.45254552364349365, |
| "learning_rate": 1.1689256894774384e-06, |
| "loss": 1.3760697841644287, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.7553516819571864, |
| "grad_norm": 0.6041072010993958, |
| "learning_rate": 1.1648310462285386e-06, |
| "loss": 1.298436164855957, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.758409785932722, |
| "grad_norm": 0.555728554725647, |
| "learning_rate": 1.1607857135065337e-06, |
| "loss": 1.3885629177093506, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.761467889908257, |
| "grad_norm": 0.5937597751617432, |
| "learning_rate": 1.1567897373251967e-06, |
| "loss": 1.3754394054412842, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.764525993883792, |
| "grad_norm": 0.35898932814598083, |
| "learning_rate": 1.1528431631368957e-06, |
| "loss": 1.2469127178192139, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.767584097859327, |
| "grad_norm": 0.24282048642635345, |
| "learning_rate": 1.1489460358320728e-06, |
| "loss": 0.9015558958053589, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.770642201834862, |
| "grad_norm": 0.27484798431396484, |
| "learning_rate": 1.1450983997387365e-06, |
| "loss": 1.2076148986816406, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.7737003058103973, |
| "grad_norm": 0.29970651865005493, |
| "learning_rate": 1.1413002986219528e-06, |
| "loss": 1.2744965553283691, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.776758409785933, |
| "grad_norm": 0.26047366857528687, |
| "learning_rate": 1.1375517756833534e-06, |
| "loss": 1.3271204233169556, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.779816513761468, |
| "grad_norm": 0.3544829785823822, |
| "learning_rate": 1.1338528735606391e-06, |
| "loss": 1.3407413959503174, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.782874617737003, |
| "grad_norm": 0.24868814647197723, |
| "learning_rate": 1.1302036343270996e-06, |
| "loss": 1.4030461311340332, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.7859327217125385, |
| "grad_norm": 0.30862292647361755, |
| "learning_rate": 1.12660409949113e-06, |
| "loss": 1.3144700527191162, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.7889908256880735, |
| "grad_norm": 0.9225071668624878, |
| "learning_rate": 1.1230543099957608e-06, |
| "loss": 1.338538646697998, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.7920489296636086, |
| "grad_norm": 0.32354745268821716, |
| "learning_rate": 1.1195543062181954e-06, |
| "loss": 1.310173749923706, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.7951070336391437, |
| "grad_norm": 0.24064457416534424, |
| "learning_rate": 1.1161041279693445e-06, |
| "loss": 1.3204376697540283, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.7981651376146788, |
| "grad_norm": 0.23651309311389923, |
| "learning_rate": 1.1127038144933787e-06, |
| "loss": 1.281717300415039, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.801223241590214, |
| "grad_norm": 0.21533581614494324, |
| "learning_rate": 1.1093534044672796e-06, |
| "loss": 1.3252437114715576, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.8042813455657494, |
| "grad_norm": 0.38182252645492554, |
| "learning_rate": 1.1060529360004003e-06, |
| "loss": 1.27931809425354, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.8073394495412844, |
| "grad_norm": 0.12391169369220734, |
| "learning_rate": 1.1028024466340305e-06, |
| "loss": 1.1552488803863525, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.8103975535168195, |
| "grad_norm": 0.17293956875801086, |
| "learning_rate": 1.0996019733409732e-06, |
| "loss": 1.2036254405975342, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.8134556574923546, |
| "grad_norm": 0.21059419214725494, |
| "learning_rate": 1.096451552525121e-06, |
| "loss": 0.9850409030914307, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.81651376146789, |
| "grad_norm": 0.2714180648326874, |
| "learning_rate": 1.093351220021043e-06, |
| "loss": 1.2215778827667236, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.819571865443425, |
| "grad_norm": 0.22156941890716553, |
| "learning_rate": 1.090301011093575e-06, |
| "loss": 1.2629544734954834, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.8226299694189603, |
| "grad_norm": 0.20625340938568115, |
| "learning_rate": 1.0873009604374246e-06, |
| "loss": 1.2778034210205078, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.8256880733944953, |
| "grad_norm": 0.29442811012268066, |
| "learning_rate": 1.084351102176769e-06, |
| "loss": 1.2413357496261597, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.8287461773700304, |
| "grad_norm": 0.18544712662696838, |
| "learning_rate": 1.081451469864872e-06, |
| "loss": 1.2637240886688232, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.8318042813455655, |
| "grad_norm": 0.22874392569065094, |
| "learning_rate": 1.0786020964836991e-06, |
| "loss": 1.2410205602645874, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.834862385321101, |
| "grad_norm": 0.2457342892885208, |
| "learning_rate": 1.075803014443546e-06, |
| "loss": 1.2094589471817017, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.837920489296636, |
| "grad_norm": 0.22759026288986206, |
| "learning_rate": 1.0730542555826654e-06, |
| "loss": 1.274350643157959, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.840978593272171, |
| "grad_norm": 0.206235870718956, |
| "learning_rate": 1.07035585116691e-06, |
| "loss": 1.245356559753418, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.8440366972477067, |
| "grad_norm": 0.49194467067718506, |
| "learning_rate": 1.0677078318893716e-06, |
| "loss": 1.2151732444763184, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.8470948012232418, |
| "grad_norm": 0.33920061588287354, |
| "learning_rate": 1.0651102278700364e-06, |
| "loss": 1.2073887586593628, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.850152905198777, |
| "grad_norm": 0.25718092918395996, |
| "learning_rate": 1.062563068655439e-06, |
| "loss": 1.2325494289398193, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.853211009174312, |
| "grad_norm": 0.24365228414535522, |
| "learning_rate": 1.0600663832183293e-06, |
| "loss": 1.2226455211639404, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.856269113149847, |
| "grad_norm": 0.19332216680049896, |
| "learning_rate": 1.0576201999573405e-06, |
| "loss": 1.1831451654434204, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.859327217125382, |
| "grad_norm": 0.25319862365722656, |
| "learning_rate": 1.0552245466966678e-06, |
| "loss": 1.2440452575683594, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.8623853211009176, |
| "grad_norm": 0.27022072672843933, |
| "learning_rate": 1.0528794506857508e-06, |
| "loss": 1.2725245952606201, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.8654434250764527, |
| "grad_norm": 0.3112826943397522, |
| "learning_rate": 1.050584938598963e-06, |
| "loss": 1.282654047012329, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.8685015290519877, |
| "grad_norm": 0.2421792596578598, |
| "learning_rate": 1.048341036535311e-06, |
| "loss": 1.273242712020874, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.871559633027523, |
| "grad_norm": 0.23541022837162018, |
| "learning_rate": 1.0461477700181355e-06, |
| "loss": 1.2899906635284424, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.8746177370030583, |
| "grad_norm": 0.2772025167942047, |
| "learning_rate": 1.044005163994821e-06, |
| "loss": 1.2756202220916748, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.8776758409785934, |
| "grad_norm": 0.47361937165260315, |
| "learning_rate": 1.0419132428365116e-06, |
| "loss": 1.2930552959442139, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.8807339449541285, |
| "grad_norm": 0.18241485953330994, |
| "learning_rate": 1.0398720303378374e-06, |
| "loss": 1.223031997680664, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.8837920489296636, |
| "grad_norm": 0.40437427163124084, |
| "learning_rate": 1.0378815497166385e-06, |
| "loss": 1.2670063972473145, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.8868501529051986, |
| "grad_norm": 0.22389701008796692, |
| "learning_rate": 1.0359418236137047e-06, |
| "loss": 1.2270456552505493, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.8899082568807337, |
| "grad_norm": 0.29309970140457153, |
| "learning_rate": 1.0340528740925169e-06, |
| "loss": 1.2563271522521973, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.8929663608562692, |
| "grad_norm": 0.24637004733085632, |
| "learning_rate": 1.0322147226389952e-06, |
| "loss": 1.2668583393096924, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.8960244648318043, |
| "grad_norm": 0.5765001177787781, |
| "learning_rate": 1.0304273901612566e-06, |
| "loss": 1.2873437404632568, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.8990825688073394, |
| "grad_norm": 0.3287610411643982, |
| "learning_rate": 1.028690896989375e-06, |
| "loss": 1.274024248123169, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.9021406727828745, |
| "grad_norm": 0.2688363492488861, |
| "learning_rate": 1.027005262875151e-06, |
| "loss": 1.20585036277771, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.90519877675841, |
| "grad_norm": 0.3984238803386688, |
| "learning_rate": 1.0253705069918865e-06, |
| "loss": 1.2360919713974, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.908256880733945, |
| "grad_norm": 0.27637046575546265, |
| "learning_rate": 1.0237866479341687e-06, |
| "loss": 1.2752952575683594, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.91131498470948, |
| "grad_norm": 0.5071486234664917, |
| "learning_rate": 1.0222537037176572e-06, |
| "loss": 1.2954089641571045, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.914373088685015, |
| "grad_norm": 0.22012606263160706, |
| "learning_rate": 1.0207716917788768e-06, |
| "loss": 1.2765629291534424, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.9174311926605503, |
| "grad_norm": 0.20149464905261993, |
| "learning_rate": 1.019340628975023e-06, |
| "loss": 1.2535219192504883, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.9204892966360854, |
| "grad_norm": 0.227265864610672, |
| "learning_rate": 1.0179605315837695e-06, |
| "loss": 1.2175259590148926, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.923547400611621, |
| "grad_norm": 0.2566111087799072, |
| "learning_rate": 1.0166314153030799e-06, |
| "loss": 1.255599856376648, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.926605504587156, |
| "grad_norm": 0.38341450691223145, |
| "learning_rate": 1.0153532952510328e-06, |
| "loss": 1.2794301509857178, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.929663608562691, |
| "grad_norm": 0.28000977635383606, |
| "learning_rate": 1.0141261859656484e-06, |
| "loss": 1.2272768020629883, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.9327217125382266, |
| "grad_norm": 0.2550158202648163, |
| "learning_rate": 1.0129501014047236e-06, |
| "loss": 1.2561171054840088, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.9357798165137616, |
| "grad_norm": 0.21566316485404968, |
| "learning_rate": 1.0118250549456717e-06, |
| "loss": 1.2545552253723145, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.9388379204892967, |
| "grad_norm": 0.36798691749572754, |
| "learning_rate": 1.0107510593853716e-06, |
| "loss": 1.3016841411590576, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.941896024464832, |
| "grad_norm": 0.29115161299705505, |
| "learning_rate": 1.0097281269400234e-06, |
| "loss": 1.3122904300689697, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.944954128440367, |
| "grad_norm": 0.42286819219589233, |
| "learning_rate": 1.0087562692450062e-06, |
| "loss": 1.2751294374465942, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.948012232415902, |
| "grad_norm": 0.29917454719543457, |
| "learning_rate": 1.0078354973547484e-06, |
| "loss": 1.2971951961517334, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.9510703363914375, |
| "grad_norm": 0.28312069177627563, |
| "learning_rate": 1.0069658217426017e-06, |
| "loss": 1.2662827968597412, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.9541284403669725, |
| "grad_norm": 0.2748239040374756, |
| "learning_rate": 1.0061472523007213e-06, |
| "loss": 1.209917664527893, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.9571865443425076, |
| "grad_norm": 0.36147835850715637, |
| "learning_rate": 1.0053797983399524e-06, |
| "loss": 1.2387361526489258, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.9602446483180427, |
| "grad_norm": 0.34865546226501465, |
| "learning_rate": 1.004663468589726e-06, |
| "loss": 1.2596259117126465, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.963302752293578, |
| "grad_norm": 0.23798368871212006, |
| "learning_rate": 1.0039982711979603e-06, |
| "loss": 1.239612340927124, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.9663608562691133, |
| "grad_norm": 0.31115320324897766, |
| "learning_rate": 1.0033842137309649e-06, |
| "loss": 1.2498747110366821, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.9694189602446484, |
| "grad_norm": 0.37815067172050476, |
| "learning_rate": 1.0028213031733578e-06, |
| "loss": 1.3014090061187744, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.9724770642201834, |
| "grad_norm": 0.26476937532424927, |
| "learning_rate": 1.0023095459279838e-06, |
| "loss": 1.2854735851287842, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.9755351681957185, |
| "grad_norm": 0.3802984952926636, |
| "learning_rate": 1.0018489478158434e-06, |
| "loss": 1.3032188415527344, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.9785932721712536, |
| "grad_norm": 0.3544924855232239, |
| "learning_rate": 1.0014395140760255e-06, |
| "loss": 1.2610487937927246, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.981651376146789, |
| "grad_norm": 0.30221831798553467, |
| "learning_rate": 1.0010812493656488e-06, |
| "loss": 1.2582671642303467, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.984709480122324, |
| "grad_norm": 0.2731051743030548, |
| "learning_rate": 1.000774157759806e-06, |
| "loss": 1.2794151306152344, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.9877675840978593, |
| "grad_norm": 0.3089560270309448, |
| "learning_rate": 1.0005182427515222e-06, |
| "loss": 1.334507703781128, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.9908256880733948, |
| "grad_norm": 0.31155917048454285, |
| "learning_rate": 1.0003135072517108e-06, |
| "loss": 1.3732435703277588, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.99388379204893, |
| "grad_norm": 0.3963629901409149, |
| "learning_rate": 1.000159953589143e-06, |
| "loss": 1.6014021635055542, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.996941896024465, |
| "grad_norm": 0.8739917278289795, |
| "learning_rate": 1.00005758351042e-06, |
| "loss": 1.5767264366149902, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.2575660943984985, |
| "learning_rate": 1.0000063981799541e-06, |
| "loss": 1.7074545621871948, |
| "step": 1962 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1962, |
| "total_flos": 2.4882019125669396e+18, |
| "train_loss": 1.4736498374943825, |
| "train_runtime": 8380.6004, |
| "train_samples_per_second": 3.746, |
| "train_steps_per_second": 0.234 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1962, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.4882019125669396e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|