| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1370, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00291970802919708, |
| "grad_norm": 4.875, |
| "learning_rate": 7.246376811594204e-08, |
| "loss": 1.320786714553833, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00583941605839416, |
| "grad_norm": 26.375, |
| "learning_rate": 2.173913043478261e-07, |
| "loss": 2.3353517055511475, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008759124087591242, |
| "grad_norm": 5.125, |
| "learning_rate": 3.623188405797102e-07, |
| "loss": 1.9446890354156494, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01167883211678832, |
| "grad_norm": 2.234375, |
| "learning_rate": 5.072463768115942e-07, |
| "loss": 1.6843594312667847, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.014598540145985401, |
| "grad_norm": 8.8125, |
| "learning_rate": 6.521739130434783e-07, |
| "loss": 1.8062303066253662, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.017518248175182483, |
| "grad_norm": 5.0, |
| "learning_rate": 7.971014492753623e-07, |
| "loss": 1.9280399084091187, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.020437956204379562, |
| "grad_norm": 3.015625, |
| "learning_rate": 9.420289855072465e-07, |
| "loss": 1.570988655090332, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02335766423357664, |
| "grad_norm": 11.25, |
| "learning_rate": 1.0869565217391306e-06, |
| "loss": 1.7710015773773193, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.026277372262773723, |
| "grad_norm": 4.53125, |
| "learning_rate": 1.2318840579710147e-06, |
| "loss": 1.9166163206100464, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.029197080291970802, |
| "grad_norm": 23.5, |
| "learning_rate": 1.3768115942028987e-06, |
| "loss": 1.9079008102416992, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.032116788321167884, |
| "grad_norm": 6.15625, |
| "learning_rate": 1.521739130434783e-06, |
| "loss": 1.9891327619552612, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.035036496350364967, |
| "grad_norm": 8.6875, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.8731980323791504, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03795620437956204, |
| "grad_norm": 41.5, |
| "learning_rate": 1.8115942028985508e-06, |
| "loss": 1.996793508529663, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.040875912408759124, |
| "grad_norm": 16.125, |
| "learning_rate": 1.956521739130435e-06, |
| "loss": 2.4439406394958496, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.043795620437956206, |
| "grad_norm": 4.78125, |
| "learning_rate": 2.101449275362319e-06, |
| "loss": 1.4941191673278809, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04671532846715328, |
| "grad_norm": 5.71875, |
| "learning_rate": 2.246376811594203e-06, |
| "loss": 1.9384567737579346, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.049635036496350364, |
| "grad_norm": 3.140625, |
| "learning_rate": 2.391304347826087e-06, |
| "loss": 2.106153964996338, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.052554744525547446, |
| "grad_norm": 25.875, |
| "learning_rate": 2.5362318840579714e-06, |
| "loss": 2.235496997833252, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.05547445255474453, |
| "grad_norm": 6.46875, |
| "learning_rate": 2.6811594202898555e-06, |
| "loss": 2.4106810092926025, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.058394160583941604, |
| "grad_norm": 4.375, |
| "learning_rate": 2.8260869565217393e-06, |
| "loss": 1.6466758251190186, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.061313868613138686, |
| "grad_norm": 95.5, |
| "learning_rate": 2.9710144927536235e-06, |
| "loss": 1.9993230104446411, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.06423357664233577, |
| "grad_norm": 3.953125, |
| "learning_rate": 3.1159420289855073e-06, |
| "loss": 1.7203528881072998, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.06715328467153285, |
| "grad_norm": 13.5625, |
| "learning_rate": 3.2608695652173914e-06, |
| "loss": 2.5018796920776367, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07007299270072993, |
| "grad_norm": 12.6875, |
| "learning_rate": 3.4057971014492756e-06, |
| "loss": 1.935620903968811, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.072992700729927, |
| "grad_norm": 4.125, |
| "learning_rate": 3.55072463768116e-06, |
| "loss": 1.9458433389663696, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07591240875912408, |
| "grad_norm": 2.171875, |
| "learning_rate": 3.6956521739130436e-06, |
| "loss": 1.321602702140808, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.07883211678832117, |
| "grad_norm": 3.578125, |
| "learning_rate": 3.840579710144928e-06, |
| "loss": 2.0101318359375, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.08175182481751825, |
| "grad_norm": 5.625, |
| "learning_rate": 3.9855072463768115e-06, |
| "loss": 2.0588250160217285, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.08467153284671533, |
| "grad_norm": 5.3125, |
| "learning_rate": 4.130434782608696e-06, |
| "loss": 1.860298752784729, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.08759124087591241, |
| "grad_norm": 5.9375, |
| "learning_rate": 4.27536231884058e-06, |
| "loss": 1.9684100151062012, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0905109489051095, |
| "grad_norm": 9.375, |
| "learning_rate": 4.4202898550724645e-06, |
| "loss": 1.980459213256836, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.09343065693430656, |
| "grad_norm": 4.90625, |
| "learning_rate": 4.565217391304348e-06, |
| "loss": 1.8493075370788574, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.09635036496350365, |
| "grad_norm": 2.609375, |
| "learning_rate": 4.710144927536232e-06, |
| "loss": 1.5537524223327637, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.09927007299270073, |
| "grad_norm": 4.46875, |
| "learning_rate": 4.855072463768117e-06, |
| "loss": 1.8475682735443115, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.10218978102189781, |
| "grad_norm": 3.734375, |
| "learning_rate": 5e-06, |
| "loss": 1.7411353588104248, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10510948905109489, |
| "grad_norm": 29.875, |
| "learning_rate": 4.999973760423467e-06, |
| "loss": 2.0845284461975098, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.10802919708029197, |
| "grad_norm": 6.21875, |
| "learning_rate": 4.99989504230588e-06, |
| "loss": 1.5018064975738525, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.11094890510948906, |
| "grad_norm": 2.21875, |
| "learning_rate": 4.999763847483267e-06, |
| "loss": 1.464540958404541, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.11386861313868613, |
| "grad_norm": 4.53125, |
| "learning_rate": 4.999580179015625e-06, |
| "loss": 1.8232789039611816, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.11678832116788321, |
| "grad_norm": 1.7578125, |
| "learning_rate": 4.999344041186848e-06, |
| "loss": 1.096325159072876, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11970802919708029, |
| "grad_norm": 3.328125, |
| "learning_rate": 4.999055439504633e-06, |
| "loss": 1.8037409782409668, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.12262773722627737, |
| "grad_norm": 3.84375, |
| "learning_rate": 4.998714380700345e-06, |
| "loss": 1.5575973987579346, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.12554744525547445, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.998320872728862e-06, |
| "loss": 1.8613684177398682, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.12846715328467154, |
| "grad_norm": 5.15625, |
| "learning_rate": 4.9978749247683895e-06, |
| "loss": 1.732508897781372, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.13138686131386862, |
| "grad_norm": 2.59375, |
| "learning_rate": 4.99737654722025e-06, |
| "loss": 1.3435773849487305, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1343065693430657, |
| "grad_norm": 3.25, |
| "learning_rate": 4.996825751708635e-06, |
| "loss": 1.7478176355361938, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.13722627737226278, |
| "grad_norm": 2.03125, |
| "learning_rate": 4.996222551080337e-06, |
| "loss": 1.4358994960784912, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.14014598540145987, |
| "grad_norm": 5.4375, |
| "learning_rate": 4.9955669594044466e-06, |
| "loss": 1.870757818222046, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.14306569343065692, |
| "grad_norm": 3.671875, |
| "learning_rate": 4.994858991972031e-06, |
| "loss": 1.6408865451812744, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.145985401459854, |
| "grad_norm": 3.375, |
| "learning_rate": 4.994098665295768e-06, |
| "loss": 1.4728097915649414, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14890510948905109, |
| "grad_norm": 7.4375, |
| "learning_rate": 4.9932859971095705e-06, |
| "loss": 1.7583755254745483, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.15182481751824817, |
| "grad_norm": 3.25, |
| "learning_rate": 4.992421006368166e-06, |
| "loss": 1.6836040019989014, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.15474452554744525, |
| "grad_norm": 26.25, |
| "learning_rate": 4.991503713246659e-06, |
| "loss": 1.9515830278396606, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.15766423357664233, |
| "grad_norm": 62.25, |
| "learning_rate": 4.990534139140055e-06, |
| "loss": 2.0257816314697266, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.16058394160583941, |
| "grad_norm": 2.640625, |
| "learning_rate": 4.989512306662767e-06, |
| "loss": 1.4182727336883545, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1635036496350365, |
| "grad_norm": 6.6875, |
| "learning_rate": 4.988438239648084e-06, |
| "loss": 1.70530366897583, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.16642335766423358, |
| "grad_norm": 3.5625, |
| "learning_rate": 4.98731196314762e-06, |
| "loss": 1.5088133811950684, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.16934306569343066, |
| "grad_norm": 3.078125, |
| "learning_rate": 4.986133503430724e-06, |
| "loss": 1.6265062093734741, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.17226277372262774, |
| "grad_norm": 6.4375, |
| "learning_rate": 4.98490288798387e-06, |
| "loss": 1.402962327003479, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.17518248175182483, |
| "grad_norm": 4.125, |
| "learning_rate": 4.983620145510017e-06, |
| "loss": 1.8057794570922852, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1781021897810219, |
| "grad_norm": 6.875, |
| "learning_rate": 4.982285305927937e-06, |
| "loss": 1.9605462551116943, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.181021897810219, |
| "grad_norm": 3.625, |
| "learning_rate": 4.980898400371521e-06, |
| "loss": 1.8519611358642578, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.18394160583941604, |
| "grad_norm": 10.0625, |
| "learning_rate": 4.9794594611890465e-06, |
| "loss": 1.6692755222320557, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.18686131386861313, |
| "grad_norm": 6.1875, |
| "learning_rate": 4.977968521942429e-06, |
| "loss": 1.8997008800506592, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1897810218978102, |
| "grad_norm": 1.8515625, |
| "learning_rate": 4.97642561740644e-06, |
| "loss": 1.8168402910232544, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1927007299270073, |
| "grad_norm": 16.375, |
| "learning_rate": 4.974830783567886e-06, |
| "loss": 1.4727129936218262, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.19562043795620437, |
| "grad_norm": 7.71875, |
| "learning_rate": 4.973184057624781e-06, |
| "loss": 1.6138420104980469, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.19854014598540146, |
| "grad_norm": 3.5, |
| "learning_rate": 4.971485477985474e-06, |
| "loss": 1.6893023252487183, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.20145985401459854, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.969735084267752e-06, |
| "loss": 1.3670828342437744, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.20437956204379562, |
| "grad_norm": 8.4375, |
| "learning_rate": 4.967932917297915e-06, |
| "loss": 1.6938685178756714, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2072992700729927, |
| "grad_norm": 4.0625, |
| "learning_rate": 4.966079019109831e-06, |
| "loss": 2.2959558963775635, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.21021897810218979, |
| "grad_norm": 3.328125, |
| "learning_rate": 4.964173432943946e-06, |
| "loss": 1.6218578815460205, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.21313868613138687, |
| "grad_norm": 9.0625, |
| "learning_rate": 4.962216203246281e-06, |
| "loss": 2.592639446258545, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.21605839416058395, |
| "grad_norm": 3.3125, |
| "learning_rate": 4.960207375667396e-06, |
| "loss": 1.5585392713546753, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.21897810218978103, |
| "grad_norm": 3.96875, |
| "learning_rate": 4.958146997061319e-06, |
| "loss": 1.6422696113586426, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.22189781021897811, |
| "grad_norm": 5.59375, |
| "learning_rate": 4.956035115484465e-06, |
| "loss": 1.7883186340332031, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.22481751824817517, |
| "grad_norm": 2.140625, |
| "learning_rate": 4.953871780194501e-06, |
| "loss": 1.657930612564087, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.22773722627737225, |
| "grad_norm": 24.125, |
| "learning_rate": 4.951657041649206e-06, |
| "loss": 1.7987116575241089, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.23065693430656933, |
| "grad_norm": 12.0, |
| "learning_rate": 4.9493909515052944e-06, |
| "loss": 2.016146659851074, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.23357664233576642, |
| "grad_norm": 7.90625, |
| "learning_rate": 4.947073562617206e-06, |
| "loss": 1.3612116575241089, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2364963503649635, |
| "grad_norm": 3.8125, |
| "learning_rate": 4.944704929035877e-06, |
| "loss": 1.7367652654647827, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.23941605839416058, |
| "grad_norm": 2.875, |
| "learning_rate": 4.942285106007477e-06, |
| "loss": 1.3203725814819336, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.24233576642335766, |
| "grad_norm": 11.25, |
| "learning_rate": 4.9398141499721246e-06, |
| "loss": 1.7288057804107666, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.24525547445255474, |
| "grad_norm": 1.5625, |
| "learning_rate": 4.937292118562566e-06, |
| "loss": 1.383696436882019, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.24817518248175183, |
| "grad_norm": 12.5625, |
| "learning_rate": 4.934719070602833e-06, |
| "loss": 1.6433072090148926, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2510948905109489, |
| "grad_norm": 3.109375, |
| "learning_rate": 4.932095066106872e-06, |
| "loss": 1.4025721549987793, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.25401459854014596, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.929420166277141e-06, |
| "loss": 1.6988599300384521, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2569343065693431, |
| "grad_norm": 3.65625, |
| "learning_rate": 4.926694433503186e-06, |
| "loss": 1.6042873859405518, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.25985401459854013, |
| "grad_norm": 1.6484375, |
| "learning_rate": 4.923917931360185e-06, |
| "loss": 1.2862474918365479, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.26277372262773724, |
| "grad_norm": 6.65625, |
| "learning_rate": 4.9210907246074615e-06, |
| "loss": 1.7310783863067627, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2656934306569343, |
| "grad_norm": 4.5625, |
| "learning_rate": 4.9182128791869796e-06, |
| "loss": 1.5482988357543945, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2686131386861314, |
| "grad_norm": 1.5078125, |
| "learning_rate": 4.9152844622218e-06, |
| "loss": 1.2439241409301758, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.27153284671532846, |
| "grad_norm": 4.3125, |
| "learning_rate": 4.91230554201452e-06, |
| "loss": 1.5766255855560303, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.27445255474452557, |
| "grad_norm": 3.90625, |
| "learning_rate": 4.9092761880456764e-06, |
| "loss": 1.311848759651184, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2773722627737226, |
| "grad_norm": 39.75, |
| "learning_rate": 4.906196470972128e-06, |
| "loss": 1.5088813304901123, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.28029197080291973, |
| "grad_norm": 6.40625, |
| "learning_rate": 4.903066462625405e-06, |
| "loss": 1.6081913709640503, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.2832116788321168, |
| "grad_norm": 6.125, |
| "learning_rate": 4.899886236010036e-06, |
| "loss": 1.7471773624420166, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.28613138686131384, |
| "grad_norm": 4.09375, |
| "learning_rate": 4.896655865301842e-06, |
| "loss": 1.6127898693084717, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.28905109489051095, |
| "grad_norm": 3.1875, |
| "learning_rate": 4.893375425846209e-06, |
| "loss": 1.6075236797332764, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.291970802919708, |
| "grad_norm": 3.53125, |
| "learning_rate": 4.890044994156331e-06, |
| "loss": 1.712640643119812, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2948905109489051, |
| "grad_norm": 3.84375, |
| "learning_rate": 4.886664647911422e-06, |
| "loss": 1.5669183731079102, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.29781021897810217, |
| "grad_norm": 5.6875, |
| "learning_rate": 4.883234465954909e-06, |
| "loss": 1.7576971054077148, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3007299270072993, |
| "grad_norm": 2.515625, |
| "learning_rate": 4.879754528292588e-06, |
| "loss": 1.5543663501739502, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.30364963503649633, |
| "grad_norm": 2.921875, |
| "learning_rate": 4.876224916090762e-06, |
| "loss": 1.9160549640655518, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.30656934306569344, |
| "grad_norm": 4.34375, |
| "learning_rate": 4.872645711674348e-06, |
| "loss": 1.646159291267395, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3094890510948905, |
| "grad_norm": 1.625, |
| "learning_rate": 4.8690169985249516e-06, |
| "loss": 1.1048507690429688, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3124087591240876, |
| "grad_norm": 1.5625, |
| "learning_rate": 4.865338861278925e-06, |
| "loss": 1.0736052989959717, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.31532846715328466, |
| "grad_norm": 3.59375, |
| "learning_rate": 4.8616113857253925e-06, |
| "loss": 1.2035229206085205, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3182481751824818, |
| "grad_norm": 23.625, |
| "learning_rate": 4.857834658804247e-06, |
| "loss": 1.137906789779663, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.32116788321167883, |
| "grad_norm": 4.5625, |
| "learning_rate": 4.8540087686041234e-06, |
| "loss": 1.7008376121520996, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.32408759124087594, |
| "grad_norm": 8.75, |
| "learning_rate": 4.850133804360346e-06, |
| "loss": 1.6337850093841553, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.327007299270073, |
| "grad_norm": 3.984375, |
| "learning_rate": 4.8462098564528455e-06, |
| "loss": 1.1808865070343018, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.32992700729927005, |
| "grad_norm": 3.59375, |
| "learning_rate": 4.842237016404048e-06, |
| "loss": 1.5622849464416504, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.33284671532846716, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.838215376876744e-06, |
| "loss": 1.1768817901611328, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3357664233576642, |
| "grad_norm": 6.0, |
| "learning_rate": 4.834145031671931e-06, |
| "loss": 1.3726277351379395, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3386861313868613, |
| "grad_norm": 28.375, |
| "learning_rate": 4.830026075726615e-06, |
| "loss": 1.1469438076019287, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3416058394160584, |
| "grad_norm": 3.421875, |
| "learning_rate": 4.8258586051116045e-06, |
| "loss": 1.5012977123260498, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.3445255474452555, |
| "grad_norm": 12.9375, |
| "learning_rate": 4.821642717029269e-06, |
| "loss": 1.6817822456359863, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.34744525547445254, |
| "grad_norm": 5.0625, |
| "learning_rate": 4.8173785098112675e-06, |
| "loss": 1.525681495666504, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.35036496350364965, |
| "grad_norm": 15.4375, |
| "learning_rate": 4.81306608291626e-06, |
| "loss": 2.0758631229400635, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3532846715328467, |
| "grad_norm": 3.25, |
| "learning_rate": 4.808705536927586e-06, |
| "loss": 1.4310352802276611, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3562043795620438, |
| "grad_norm": 3.28125, |
| "learning_rate": 4.804296973550915e-06, |
| "loss": 1.6908133029937744, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.35912408759124087, |
| "grad_norm": 3.15625, |
| "learning_rate": 4.799840495611879e-06, |
| "loss": 1.2480230331420898, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.362043795620438, |
| "grad_norm": 2.75, |
| "learning_rate": 4.795336207053674e-06, |
| "loss": 1.5943894386291504, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.36496350364963503, |
| "grad_norm": 3.953125, |
| "learning_rate": 4.790784212934631e-06, |
| "loss": 1.1932544708251953, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3678832116788321, |
| "grad_norm": 5.53125, |
| "learning_rate": 4.786184619425773e-06, |
| "loss": 1.4538475275039673, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.3708029197080292, |
| "grad_norm": 6.1875, |
| "learning_rate": 4.781537533808331e-06, |
| "loss": 1.7138783931732178, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.37372262773722625, |
| "grad_norm": 1.609375, |
| "learning_rate": 4.7768430644712435e-06, |
| "loss": 1.37872314453125, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.37664233576642336, |
| "grad_norm": 6.25, |
| "learning_rate": 4.772101320908636e-06, |
| "loss": 1.4937684535980225, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.3795620437956204, |
| "grad_norm": 8.8125, |
| "learning_rate": 4.767312413717256e-06, |
| "loss": 1.4460338354110718, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.38248175182481753, |
| "grad_norm": 4.28125, |
| "learning_rate": 4.7624764545939015e-06, |
| "loss": 1.4206737279891968, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3854014598540146, |
| "grad_norm": 2.671875, |
| "learning_rate": 4.757593556332811e-06, |
| "loss": 1.3555597066879272, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.3883211678832117, |
| "grad_norm": 3.1875, |
| "learning_rate": 4.752663832823038e-06, |
| "loss": 1.6055470705032349, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.39124087591240875, |
| "grad_norm": 4.09375, |
| "learning_rate": 4.747687399045787e-06, |
| "loss": 1.3127577304840088, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.39416058394160586, |
| "grad_norm": 5.40625, |
| "learning_rate": 4.7426643710717386e-06, |
| "loss": 1.6612601280212402, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3970802919708029, |
| "grad_norm": 5.34375, |
| "learning_rate": 4.737594866058339e-06, |
| "loss": 1.2799599170684814, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 6.71875, |
| "learning_rate": 4.7324790022470675e-06, |
| "loss": 1.9163275957107544, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.4029197080291971, |
| "grad_norm": 2.328125, |
| "learning_rate": 4.727316898960681e-06, |
| "loss": 1.4439561367034912, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4058394160583942, |
| "grad_norm": 10.6875, |
| "learning_rate": 4.722108676600427e-06, |
| "loss": 1.2920876741409302, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.40875912408759124, |
| "grad_norm": 3.671875, |
| "learning_rate": 4.7168544566432365e-06, |
| "loss": 1.691207766532898, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4116788321167883, |
| "grad_norm": 3.21875, |
| "learning_rate": 4.711554361638896e-06, |
| "loss": 1.527019739151001, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4145985401459854, |
| "grad_norm": 3.1875, |
| "learning_rate": 4.70620851520718e-06, |
| "loss": 1.4309567213058472, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.41751824817518246, |
| "grad_norm": 2.390625, |
| "learning_rate": 4.7008170420349746e-06, |
| "loss": 1.2672343254089355, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.42043795620437957, |
| "grad_norm": 1.765625, |
| "learning_rate": 4.695380067873368e-06, |
| "loss": 1.3927721977233887, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4233576642335766, |
| "grad_norm": 2.75, |
| "learning_rate": 4.689897719534715e-06, |
| "loss": 1.5347919464111328, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.42627737226277373, |
| "grad_norm": 4.5625, |
| "learning_rate": 4.68437012488968e-06, |
| "loss": 1.2839910984039307, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4291970802919708, |
| "grad_norm": 48.25, |
| "learning_rate": 4.678797412864258e-06, |
| "loss": 1.3073639869689941, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4321167883211679, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.673179713436762e-06, |
| "loss": 1.5608128309249878, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.43503649635036495, |
| "grad_norm": 2.875, |
| "learning_rate": 4.667517157634797e-06, |
| "loss": 1.6924610137939453, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.43795620437956206, |
| "grad_norm": 3.515625, |
| "learning_rate": 4.6618098775322e-06, |
| "loss": 1.218139886856079, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4408759124087591, |
| "grad_norm": 5.34375, |
| "learning_rate": 4.656058006245959e-06, |
| "loss": 1.4968738555908203, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.44379562043795623, |
| "grad_norm": 6.59375, |
| "learning_rate": 4.650261677933111e-06, |
| "loss": 1.522092580795288, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.4467153284671533, |
| "grad_norm": 3.109375, |
| "learning_rate": 4.644421027787614e-06, |
| "loss": 1.15757155418396, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.44963503649635034, |
| "grad_norm": 2.5, |
| "learning_rate": 4.638536192037186e-06, |
| "loss": 1.0606379508972168, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.45255474452554745, |
| "grad_norm": 10.375, |
| "learning_rate": 4.63260730794014e-06, |
| "loss": 1.674492597579956, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4554744525547445, |
| "grad_norm": 3.421875, |
| "learning_rate": 4.62663451378217e-06, |
| "loss": 1.4489834308624268, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.4583941605839416, |
| "grad_norm": 1.6640625, |
| "learning_rate": 4.620617948873133e-06, |
| "loss": 1.4036529064178467, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.46131386861313867, |
| "grad_norm": 4.21875, |
| "learning_rate": 4.6145577535438004e-06, |
| "loss": 1.482384204864502, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.4642335766423358, |
| "grad_norm": 2.8125, |
| "learning_rate": 4.608454069142578e-06, |
| "loss": 1.4590518474578857, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.46715328467153283, |
| "grad_norm": 4.53125, |
| "learning_rate": 4.602307038032216e-06, |
| "loss": 1.7169837951660156, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.47007299270072994, |
| "grad_norm": 4.75, |
| "learning_rate": 4.596116803586487e-06, |
| "loss": 1.5060232877731323, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.472992700729927, |
| "grad_norm": 2.828125, |
| "learning_rate": 4.5898835101868415e-06, |
| "loss": 1.4886112213134766, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4759124087591241, |
| "grad_norm": 1.7265625, |
| "learning_rate": 4.583607303219037e-06, |
| "loss": 1.4076815843582153, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.47883211678832116, |
| "grad_norm": 10.4375, |
| "learning_rate": 4.577288329069753e-06, |
| "loss": 1.5618150234222412, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.48175182481751827, |
| "grad_norm": 4.75, |
| "learning_rate": 4.570926735123171e-06, |
| "loss": 1.274332046508789, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4846715328467153, |
| "grad_norm": 4.4375, |
| "learning_rate": 4.564522669757543e-06, |
| "loss": 1.4747687578201294, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.48759124087591244, |
| "grad_norm": 6.40625, |
| "learning_rate": 4.558076282341723e-06, |
| "loss": 1.653844952583313, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.4905109489051095, |
| "grad_norm": 39.5, |
| "learning_rate": 4.551587723231692e-06, |
| "loss": 1.0735116004943848, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.49343065693430654, |
| "grad_norm": 36.0, |
| "learning_rate": 4.545057143767042e-06, |
| "loss": 1.6714699268341064, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.49635036496350365, |
| "grad_norm": 4.15625, |
| "learning_rate": 4.538484696267453e-06, |
| "loss": 1.4629170894622803, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.4992700729927007, |
| "grad_norm": 10.3125, |
| "learning_rate": 4.5318705340291394e-06, |
| "loss": 1.5702762603759766, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5021897810218978, |
| "grad_norm": 4.96875, |
| "learning_rate": 4.525214811321269e-06, |
| "loss": 1.5001425743103027, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5051094890510949, |
| "grad_norm": 5.0625, |
| "learning_rate": 4.518517683382373e-06, |
| "loss": 1.4789342880249023, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5080291970802919, |
| "grad_norm": 4.15625, |
| "learning_rate": 4.511779306416716e-06, |
| "loss": 1.4476077556610107, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5109489051094891, |
| "grad_norm": 1.703125, |
| "learning_rate": 4.504999837590665e-06, |
| "loss": 1.1996196508407593, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5138686131386861, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.49817943502901e-06, |
| "loss": 1.532009482383728, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5167883211678832, |
| "grad_norm": 1.65625, |
| "learning_rate": 4.4913182578112815e-06, |
| "loss": 1.2889015674591064, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5197080291970803, |
| "grad_norm": 1.640625, |
| "learning_rate": 4.484416465968049e-06, |
| "loss": 1.3533192873001099, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5226277372262774, |
| "grad_norm": 4.3125, |
| "learning_rate": 4.477474220477172e-06, |
| "loss": 1.4686871767044067, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5255474452554745, |
| "grad_norm": 3.78125, |
| "learning_rate": 4.470491683260056e-06, |
| "loss": 1.4659610986709595, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5284671532846715, |
| "grad_norm": 2.46875, |
| "learning_rate": 4.463469017177876e-06, |
| "loss": 1.487034797668457, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5313868613138686, |
| "grad_norm": 3.3125, |
| "learning_rate": 4.456406386027772e-06, |
| "loss": 1.1844420433044434, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5343065693430656, |
| "grad_norm": 7.34375, |
| "learning_rate": 4.4493039545390345e-06, |
| "loss": 1.5557405948638916, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5372262773722628, |
| "grad_norm": 2.984375, |
| "learning_rate": 4.442161888369258e-06, |
| "loss": 1.3480842113494873, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5401459854014599, |
| "grad_norm": 2.90625, |
| "learning_rate": 4.43498035410048e-06, |
| "loss": 1.2928515672683716, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5430656934306569, |
| "grad_norm": 7.3125, |
| "learning_rate": 4.427759519235294e-06, |
| "loss": 1.7453609704971313, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.545985401459854, |
| "grad_norm": 2.640625, |
| "learning_rate": 4.420499552192944e-06, |
| "loss": 1.4482967853546143, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5489051094890511, |
| "grad_norm": 2.0, |
| "learning_rate": 4.413200622305395e-06, |
| "loss": 1.6135839223861694, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5518248175182482, |
| "grad_norm": 13.9375, |
| "learning_rate": 4.405862899813384e-06, |
| "loss": 1.570212483406067, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.5547445255474452, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.398486555862451e-06, |
| "loss": 1.298504114151001, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5576642335766423, |
| "grad_norm": 7.8125, |
| "learning_rate": 4.391071762498941e-06, |
| "loss": 1.4520879983901978, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.5605839416058395, |
| "grad_norm": 14.8125, |
| "learning_rate": 4.383618692666002e-06, |
| "loss": 1.3408211469650269, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5635036496350365, |
| "grad_norm": 3.375, |
| "learning_rate": 4.376127520199541e-06, |
| "loss": 1.4031929969787598, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.5664233576642336, |
| "grad_norm": 4.03125, |
| "learning_rate": 4.3685984198241735e-06, |
| "loss": 1.5412940979003906, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.5693430656934306, |
| "grad_norm": 6.78125, |
| "learning_rate": 4.361031567149149e-06, |
| "loss": 1.3730320930480957, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5722627737226277, |
| "grad_norm": 7.28125, |
| "learning_rate": 4.353427138664254e-06, |
| "loss": 1.3442788124084473, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.5751824817518248, |
| "grad_norm": 6.90625, |
| "learning_rate": 4.345785311735698e-06, |
| "loss": 1.4140475988388062, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.5781021897810219, |
| "grad_norm": 6.25, |
| "learning_rate": 4.3381062646019676e-06, |
| "loss": 1.5376839637756348, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.581021897810219, |
| "grad_norm": 4.25, |
| "learning_rate": 4.330390176369685e-06, |
| "loss": 1.5938429832458496, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.583941605839416, |
| "grad_norm": 1.546875, |
| "learning_rate": 4.322637227009414e-06, |
| "loss": 1.1486091613769531, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5868613138686132, |
| "grad_norm": 3.578125, |
| "learning_rate": 4.314847597351475e-06, |
| "loss": 1.452984094619751, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.5897810218978102, |
| "grad_norm": 3.953125, |
| "learning_rate": 4.3070214690817195e-06, |
| "loss": 1.4647376537322998, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.5927007299270073, |
| "grad_norm": 2.203125, |
| "learning_rate": 4.299159024737295e-06, |
| "loss": 1.2110595703125, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5956204379562043, |
| "grad_norm": 4.1875, |
| "learning_rate": 4.291260447702389e-06, |
| "loss": 1.3485263586044312, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.5985401459854015, |
| "grad_norm": 5.25, |
| "learning_rate": 4.283325922203949e-06, |
| "loss": 1.3334099054336548, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6014598540145986, |
| "grad_norm": 2.0625, |
| "learning_rate": 4.2753556333073875e-06, |
| "loss": 1.2992541790008545, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6043795620437956, |
| "grad_norm": 8.3125, |
| "learning_rate": 4.267349766912266e-06, |
| "loss": 1.3331689834594727, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.6072992700729927, |
| "grad_norm": 3.71875, |
| "learning_rate": 4.259308509747955e-06, |
| "loss": 1.4391039609909058, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6102189781021898, |
| "grad_norm": 9.6875, |
| "learning_rate": 4.251232049369287e-06, |
| "loss": 1.145450472831726, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6131386861313869, |
| "grad_norm": 10.875, |
| "learning_rate": 4.243120574152169e-06, |
| "loss": 1.5916063785552979, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6160583941605839, |
| "grad_norm": 4.75, |
| "learning_rate": 4.234974273289204e-06, |
| "loss": 1.619133710861206, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.618978102189781, |
| "grad_norm": 4.375, |
| "learning_rate": 4.226793336785265e-06, |
| "loss": 1.4133093357086182, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.621897810218978, |
| "grad_norm": 6.03125, |
| "learning_rate": 4.218577955453074e-06, |
| "loss": 1.253399133682251, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.6248175182481752, |
| "grad_norm": 4.6875, |
| "learning_rate": 4.210328320908744e-06, |
| "loss": 1.4635814428329468, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6277372262773723, |
| "grad_norm": 2.875, |
| "learning_rate": 4.20204462556731e-06, |
| "loss": 1.3652441501617432, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6306569343065693, |
| "grad_norm": 8.9375, |
| "learning_rate": 4.193727062638247e-06, |
| "loss": 1.5560953617095947, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6335766423357664, |
| "grad_norm": 3.53125, |
| "learning_rate": 4.18537582612096e-06, |
| "loss": 1.4227533340454102, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6364963503649635, |
| "grad_norm": 3.265625, |
| "learning_rate": 4.176991110800256e-06, |
| "loss": 1.2683900594711304, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6394160583941606, |
| "grad_norm": 14.1875, |
| "learning_rate": 4.168573112241805e-06, |
| "loss": 1.2102452516555786, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.6423357664233577, |
| "grad_norm": 4.84375, |
| "learning_rate": 4.16012202678758e-06, |
| "loss": 1.2587625980377197, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6452554744525547, |
| "grad_norm": 5.46875, |
| "learning_rate": 4.1516380515512705e-06, |
| "loss": 1.410897970199585, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6481751824817519, |
| "grad_norm": 1.78125, |
| "learning_rate": 4.143121384413695e-06, |
| "loss": 1.4373693466186523, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.6510948905109489, |
| "grad_norm": 2.78125, |
| "learning_rate": 4.134572224018176e-06, |
| "loss": 1.4430195093154907, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.654014598540146, |
| "grad_norm": 7.90625, |
| "learning_rate": 4.125990769765911e-06, |
| "loss": 1.4238855838775635, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.656934306569343, |
| "grad_norm": 2.25, |
| "learning_rate": 4.117377221811324e-06, |
| "loss": 1.4734668731689453, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6598540145985401, |
| "grad_norm": 2.734375, |
| "learning_rate": 4.108731781057393e-06, |
| "loss": 1.5210154056549072, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.6627737226277373, |
| "grad_norm": 1.25, |
| "learning_rate": 4.100054649150967e-06, |
| "loss": 1.237725019454956, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.6656934306569343, |
| "grad_norm": 3.953125, |
| "learning_rate": 4.091346028478059e-06, |
| "loss": 1.4640438556671143, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.6686131386861314, |
| "grad_norm": 9.0, |
| "learning_rate": 4.0826061221591326e-06, |
| "loss": 1.105014681816101, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.6715328467153284, |
| "grad_norm": 42.25, |
| "learning_rate": 4.073835134044356e-06, |
| "loss": 1.4338090419769287, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6744525547445256, |
| "grad_norm": 5.90625, |
| "learning_rate": 4.065033268708854e-06, |
| "loss": 1.3917622566223145, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.6773722627737226, |
| "grad_norm": 3.359375, |
| "learning_rate": 4.056200731447929e-06, |
| "loss": 1.0591514110565186, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.6802919708029197, |
| "grad_norm": 4.625, |
| "learning_rate": 4.0473377282722845e-06, |
| "loss": 1.4084625244140625, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.6832116788321168, |
| "grad_norm": 3.734375, |
| "learning_rate": 4.038444465903208e-06, |
| "loss": 1.4596691131591797, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.6861313868613139, |
| "grad_norm": 11.125, |
| "learning_rate": 4.029521151767757e-06, |
| "loss": 1.2422056198120117, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.689051094890511, |
| "grad_norm": 4.4375, |
| "learning_rate": 4.0205679939939164e-06, |
| "loss": 1.33591628074646, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.691970802919708, |
| "grad_norm": 2.21875, |
| "learning_rate": 4.011585201405747e-06, |
| "loss": 1.2504942417144775, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.6948905109489051, |
| "grad_norm": 3.6875, |
| "learning_rate": 4.002572983518515e-06, |
| "loss": 1.2631410360336304, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.6978102189781021, |
| "grad_norm": 5.8125, |
| "learning_rate": 3.993531550533804e-06, |
| "loss": 1.3914625644683838, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7007299270072993, |
| "grad_norm": 20.0, |
| "learning_rate": 3.98446111333461e-06, |
| "loss": 1.288975715637207, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7036496350364964, |
| "grad_norm": 3.234375, |
| "learning_rate": 3.9753618834804295e-06, |
| "loss": 1.4152731895446777, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7065693430656934, |
| "grad_norm": 5.71875, |
| "learning_rate": 3.966234073202316e-06, |
| "loss": 1.316530466079712, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.7094890510948905, |
| "grad_norm": 56.5, |
| "learning_rate": 3.957077895397941e-06, |
| "loss": 1.3749709129333496, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.7124087591240876, |
| "grad_norm": 1.734375, |
| "learning_rate": 3.947893563626615e-06, |
| "loss": 1.2120707035064697, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7153284671532847, |
| "grad_norm": 3.546875, |
| "learning_rate": 3.93868129210432e-06, |
| "loss": 1.4016718864440918, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7182481751824817, |
| "grad_norm": 8.8125, |
| "learning_rate": 3.929441295698702e-06, |
| "loss": 1.154693841934204, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7211678832116788, |
| "grad_norm": 3.640625, |
| "learning_rate": 3.920173789924065e-06, |
| "loss": 1.334530234336853, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.724087591240876, |
| "grad_norm": 1.921875, |
| "learning_rate": 3.910878990936346e-06, |
| "loss": 1.3103371858596802, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.727007299270073, |
| "grad_norm": 2.84375, |
| "learning_rate": 3.901557115528069e-06, |
| "loss": 1.244321584701538, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.7299270072992701, |
| "grad_norm": 4.40625, |
| "learning_rate": 3.892208381123289e-06, |
| "loss": 1.4268873929977417, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7328467153284671, |
| "grad_norm": 1.4765625, |
| "learning_rate": 3.8828330057725225e-06, |
| "loss": 1.3552806377410889, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7357664233576642, |
| "grad_norm": 3.65625, |
| "learning_rate": 3.873431208147664e-06, |
| "loss": 1.6077991724014282, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7386861313868613, |
| "grad_norm": 2.21875, |
| "learning_rate": 3.864003207536879e-06, |
| "loss": 1.2244906425476074, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7416058394160584, |
| "grad_norm": 2.265625, |
| "learning_rate": 3.854549223839497e-06, |
| "loss": 1.0374276638031006, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7445255474452555, |
| "grad_norm": 6.96875, |
| "learning_rate": 3.845069477560876e-06, |
| "loss": 1.547581434249878, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7474452554744525, |
| "grad_norm": 2.203125, |
| "learning_rate": 3.835564189807263e-06, |
| "loss": 1.225568175315857, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.7503649635036497, |
| "grad_norm": 4.09375, |
| "learning_rate": 3.826033582280635e-06, |
| "loss": 1.2825735807418823, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7532846715328467, |
| "grad_norm": 2.96875, |
| "learning_rate": 3.816477877273533e-06, |
| "loss": 1.430619716644287, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7562043795620438, |
| "grad_norm": 10.9375, |
| "learning_rate": 3.8068972976638703e-06, |
| "loss": 1.489488124847412, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.7591240875912408, |
| "grad_norm": 4.3125, |
| "learning_rate": 3.797292066909734e-06, |
| "loss": 0.8555082082748413, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.762043795620438, |
| "grad_norm": 3.703125, |
| "learning_rate": 3.787662409044184e-06, |
| "loss": 1.3753139972686768, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.7649635036496351, |
| "grad_norm": 8.0, |
| "learning_rate": 3.7780085486700126e-06, |
| "loss": 1.6844412088394165, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.7678832116788321, |
| "grad_norm": 5.25, |
| "learning_rate": 3.768330710954517e-06, |
| "loss": 1.592594027519226, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.7708029197080292, |
| "grad_norm": 1.5, |
| "learning_rate": 3.7586291216242433e-06, |
| "loss": 1.2550559043884277, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.7737226277372263, |
| "grad_norm": 3.953125, |
| "learning_rate": 3.748904006959719e-06, |
| "loss": 1.1512435674667358, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7766423357664234, |
| "grad_norm": 10.375, |
| "learning_rate": 3.739155593790182e-06, |
| "loss": 1.5256032943725586, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.7795620437956204, |
| "grad_norm": 10.75, |
| "learning_rate": 3.729384109488282e-06, |
| "loss": 1.6810424327850342, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.7824817518248175, |
| "grad_norm": 3.734375, |
| "learning_rate": 3.719589781964787e-06, |
| "loss": 1.4392688274383545, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.7854014598540145, |
| "grad_norm": 4.125, |
| "learning_rate": 3.7097728396632555e-06, |
| "loss": 1.4172781705856323, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.7883211678832117, |
| "grad_norm": 4.125, |
| "learning_rate": 3.6999335115547185e-06, |
| "loss": 1.401853322982788, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7912408759124088, |
| "grad_norm": 6.375, |
| "learning_rate": 3.690072027132335e-06, |
| "loss": 1.534106731414795, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.7941605839416058, |
| "grad_norm": 5.0, |
| "learning_rate": 3.680188616406037e-06, |
| "loss": 1.629064679145813, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.7970802919708029, |
| "grad_norm": 3.5625, |
| "learning_rate": 3.6702835098971706e-06, |
| "loss": 1.5794017314910889, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 7.90625, |
| "learning_rate": 3.6603569386331122e-06, |
| "loss": 1.556319236755371, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8029197080291971, |
| "grad_norm": 5.125, |
| "learning_rate": 3.6504091341418853e-06, |
| "loss": 1.5984359979629517, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8058394160583942, |
| "grad_norm": 4.5, |
| "learning_rate": 3.640440328446759e-06, |
| "loss": 1.5283421277999878, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8087591240875912, |
| "grad_norm": 6.75, |
| "learning_rate": 3.6304507540608357e-06, |
| "loss": 1.383811116218567, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.8116788321167884, |
| "grad_norm": 3.640625, |
| "learning_rate": 3.620440643981629e-06, |
| "loss": 1.3146003484725952, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8145985401459854, |
| "grad_norm": 4.125, |
| "learning_rate": 3.6104102316856255e-06, |
| "loss": 1.4131672382354736, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8175182481751825, |
| "grad_norm": 13.25, |
| "learning_rate": 3.600359751122845e-06, |
| "loss": 1.549619197845459, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8204379562043795, |
| "grad_norm": 2.796875, |
| "learning_rate": 3.590289436711379e-06, |
| "loss": 1.5269279479980469, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.8233576642335766, |
| "grad_norm": 3.046875, |
| "learning_rate": 3.5801995233319265e-06, |
| "loss": 1.3862372636795044, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.8262773722627738, |
| "grad_norm": 2.484375, |
| "learning_rate": 3.5700902463223137e-06, |
| "loss": 1.2330877780914307, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.8291970802919708, |
| "grad_norm": 7.125, |
| "learning_rate": 3.559961841472005e-06, |
| "loss": 1.4884552955627441, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.8321167883211679, |
| "grad_norm": 3.28125, |
| "learning_rate": 3.5498145450166057e-06, |
| "loss": 1.3787778615951538, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8350364963503649, |
| "grad_norm": 3.609375, |
| "learning_rate": 3.5396485936323456e-06, |
| "loss": 1.3882396221160889, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.8379562043795621, |
| "grad_norm": 3.15625, |
| "learning_rate": 3.529464224430568e-06, |
| "loss": 1.3656411170959473, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.8408759124087591, |
| "grad_norm": 5.65625, |
| "learning_rate": 3.5192616749521942e-06, |
| "loss": 1.5140806436538696, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.8437956204379562, |
| "grad_norm": 4.5, |
| "learning_rate": 3.5090411831621803e-06, |
| "loss": 1.5188113451004028, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.8467153284671532, |
| "grad_norm": 2.671875, |
| "learning_rate": 3.498802987443974e-06, |
| "loss": 1.3665883541107178, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8496350364963504, |
| "grad_norm": 5.25, |
| "learning_rate": 3.4885473265939464e-06, |
| "loss": 1.383296012878418, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8525547445255475, |
| "grad_norm": 2.71875, |
| "learning_rate": 3.478274439815831e-06, |
| "loss": 1.2266430854797363, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.8554744525547445, |
| "grad_norm": 3.9375, |
| "learning_rate": 3.467984566715137e-06, |
| "loss": 1.5247292518615723, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.8583941605839416, |
| "grad_norm": 4.125, |
| "learning_rate": 3.4576779472935644e-06, |
| "loss": 1.4203873872756958, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.8613138686131386, |
| "grad_norm": 2.46875, |
| "learning_rate": 3.447354821943407e-06, |
| "loss": 1.222019076347351, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8642335766423358, |
| "grad_norm": 4.8125, |
| "learning_rate": 3.4370154314419395e-06, |
| "loss": 1.2593979835510254, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.8671532846715329, |
| "grad_norm": 3.21875, |
| "learning_rate": 3.4266600169458135e-06, |
| "loss": 1.22776460647583, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.8700729927007299, |
| "grad_norm": 2.703125, |
| "learning_rate": 3.4162888199854182e-06, |
| "loss": 1.2717225551605225, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.872992700729927, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.405902082459259e-06, |
| "loss": 1.0713449716567993, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.8759124087591241, |
| "grad_norm": 3.453125, |
| "learning_rate": 3.3955000466283073e-06, |
| "loss": 1.2096487283706665, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8788321167883212, |
| "grad_norm": 2.03125, |
| "learning_rate": 3.385082955110355e-06, |
| "loss": 1.2699155807495117, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.8817518248175182, |
| "grad_norm": 2.328125, |
| "learning_rate": 3.3746510508743533e-06, |
| "loss": 1.3786303997039795, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.8846715328467153, |
| "grad_norm": 5.53125, |
| "learning_rate": 3.3642045772347453e-06, |
| "loss": 1.3685808181762695, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.8875912408759125, |
| "grad_norm": 9.0625, |
| "learning_rate": 3.353743777845795e-06, |
| "loss": 1.178727626800537, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.8905109489051095, |
| "grad_norm": 4.1875, |
| "learning_rate": 3.343268896695897e-06, |
| "loss": 1.383094310760498, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8934306569343066, |
| "grad_norm": 3.359375, |
| "learning_rate": 3.3327801781018925e-06, |
| "loss": 1.4056508541107178, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.8963503649635036, |
| "grad_norm": 4.65625, |
| "learning_rate": 3.322277866703367e-06, |
| "loss": 1.5974513292312622, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.8992700729927007, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.3117622074569476e-06, |
| "loss": 1.1610685586929321, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.9021897810218978, |
| "grad_norm": 10.75, |
| "learning_rate": 3.3012334456305846e-06, |
| "loss": 0.901719331741333, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9051094890510949, |
| "grad_norm": 8.3125, |
| "learning_rate": 3.2906918267978355e-06, |
| "loss": 1.2409268617630005, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.908029197080292, |
| "grad_norm": 3.453125, |
| "learning_rate": 3.2801375968321355e-06, |
| "loss": 1.4349682331085205, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.910948905109489, |
| "grad_norm": 6.875, |
| "learning_rate": 3.269571001901061e-06, |
| "loss": 1.3277549743652344, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.9138686131386862, |
| "grad_norm": 5.1875, |
| "learning_rate": 3.2589922884605924e-06, |
| "loss": 1.3614181280136108, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.9167883211678832, |
| "grad_norm": 9.125, |
| "learning_rate": 3.2484017032493615e-06, |
| "loss": 1.705947756767273, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.9197080291970803, |
| "grad_norm": 4.0, |
| "learning_rate": 3.237799493282897e-06, |
| "loss": 1.3996449708938599, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9226277372262773, |
| "grad_norm": 2.75, |
| "learning_rate": 3.2271859058478666e-06, |
| "loss": 1.4013357162475586, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.9255474452554745, |
| "grad_norm": 3.46875, |
| "learning_rate": 3.2165611884963055e-06, |
| "loss": 1.2193137407302856, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.9284671532846716, |
| "grad_norm": 2.421875, |
| "learning_rate": 3.2059255890398445e-06, |
| "loss": 0.9855245351791382, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.9313868613138686, |
| "grad_norm": 3.59375, |
| "learning_rate": 3.1952793555439276e-06, |
| "loss": 1.4272806644439697, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.9343065693430657, |
| "grad_norm": 3.421875, |
| "learning_rate": 3.18462273632203e-06, |
| "loss": 1.1866121292114258, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.9372262773722628, |
| "grad_norm": 6.84375, |
| "learning_rate": 3.173955979929863e-06, |
| "loss": 1.385930061340332, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.9401459854014599, |
| "grad_norm": 1.8125, |
| "learning_rate": 3.163279335159578e-06, |
| "loss": 1.283376932144165, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.9430656934306569, |
| "grad_norm": 5.0625, |
| "learning_rate": 3.152593051033966e-06, |
| "loss": 1.368044376373291, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.945985401459854, |
| "grad_norm": 14.0625, |
| "learning_rate": 3.1418973768006424e-06, |
| "loss": 0.6849503517150879, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.948905109489051, |
| "grad_norm": 2.140625, |
| "learning_rate": 3.1311925619262417e-06, |
| "loss": 1.3481240272521973, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9518248175182482, |
| "grad_norm": 3.234375, |
| "learning_rate": 3.1204788560905935e-06, |
| "loss": 1.390141248703003, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.9547445255474453, |
| "grad_norm": 8.8125, |
| "learning_rate": 3.1097565091809033e-06, |
| "loss": 1.3187050819396973, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.9576642335766423, |
| "grad_norm": 12.125, |
| "learning_rate": 3.0990257712859184e-06, |
| "loss": 1.3746651411056519, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.9605839416058394, |
| "grad_norm": 7.09375, |
| "learning_rate": 3.0882868926901e-06, |
| "loss": 1.2352771759033203, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.9635036496350365, |
| "grad_norm": 3.46875, |
| "learning_rate": 3.077540123867783e-06, |
| "loss": 1.328325629234314, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9664233576642336, |
| "grad_norm": 3.46875, |
| "learning_rate": 3.066785715477334e-06, |
| "loss": 1.2275207042694092, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.9693430656934306, |
| "grad_norm": 2.4375, |
| "learning_rate": 3.056023918355307e-06, |
| "loss": 1.335202693939209, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.9722627737226277, |
| "grad_norm": 6.5, |
| "learning_rate": 3.0452549835105895e-06, |
| "loss": 1.4829626083374023, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.9751824817518249, |
| "grad_norm": 34.0, |
| "learning_rate": 3.03447916211855e-06, |
| "loss": 1.5850169658660889, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.9781021897810219, |
| "grad_norm": 6.5, |
| "learning_rate": 3.0236967055151804e-06, |
| "loss": 1.671141266822815, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.981021897810219, |
| "grad_norm": 23.125, |
| "learning_rate": 3.0129078651912317e-06, |
| "loss": 1.300727128982544, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.983941605839416, |
| "grad_norm": 8.875, |
| "learning_rate": 3.00211289278635e-06, |
| "loss": 1.4001004695892334, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.9868613138686131, |
| "grad_norm": 8.875, |
| "learning_rate": 2.991312040083206e-06, |
| "loss": 0.47176289558410645, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.9897810218978103, |
| "grad_norm": 2.875, |
| "learning_rate": 2.9805055590016225e-06, |
| "loss": 1.2891722917556763, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.9927007299270073, |
| "grad_norm": 4.1875, |
| "learning_rate": 2.9696937015926995e-06, |
| "loss": 1.365147352218628, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9956204379562044, |
| "grad_norm": 1.8828125, |
| "learning_rate": 2.9588767200329348e-06, |
| "loss": 1.2809860706329346, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.9985401459854014, |
| "grad_norm": 8.25, |
| "learning_rate": 2.9480548666183427e-06, |
| "loss": 1.6904196739196777, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.0014598540145985, |
| "grad_norm": 2.21875, |
| "learning_rate": 2.9372283937585675e-06, |
| "loss": 1.3279258012771606, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.0043795620437956, |
| "grad_norm": 4.34375, |
| "learning_rate": 2.926397553970999e-06, |
| "loss": 1.277381181716919, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.0072992700729928, |
| "grad_norm": 5.84375, |
| "learning_rate": 2.915562599874882e-06, |
| "loss": 1.500443935394287, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.0102189781021897, |
| "grad_norm": 9.875, |
| "learning_rate": 2.904723784185422e-06, |
| "loss": 1.2994956970214844, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.013138686131387, |
| "grad_norm": 10.6875, |
| "learning_rate": 2.893881359707894e-06, |
| "loss": 1.227457046508789, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.0160583941605839, |
| "grad_norm": 2.984375, |
| "learning_rate": 2.883035579331744e-06, |
| "loss": 1.2923262119293213, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.018978102189781, |
| "grad_norm": 4.0, |
| "learning_rate": 2.8721866960246912e-06, |
| "loss": 1.445424199104309, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.0218978102189782, |
| "grad_norm": 2.1875, |
| "learning_rate": 2.861334962826828e-06, |
| "loss": 1.1312172412872314, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.0248175182481751, |
| "grad_norm": 3.734375, |
| "learning_rate": 2.8504806328447177e-06, |
| "loss": 1.4891958236694336, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.0277372262773723, |
| "grad_norm": 3.734375, |
| "learning_rate": 2.8396239592454914e-06, |
| "loss": 1.4066648483276367, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.0306569343065692, |
| "grad_norm": 4.21875, |
| "learning_rate": 2.828765195250942e-06, |
| "loss": 1.4027667045593262, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.0335766423357664, |
| "grad_norm": 3.828125, |
| "learning_rate": 2.8179045941316214e-06, |
| "loss": 1.3984425067901611, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.0364963503649636, |
| "grad_norm": 37.25, |
| "learning_rate": 2.8070424092009264e-06, |
| "loss": 1.5881340503692627, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.0394160583941605, |
| "grad_norm": 5.21875, |
| "learning_rate": 2.7961788938091994e-06, |
| "loss": 1.3652167320251465, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.0423357664233577, |
| "grad_norm": 9.0, |
| "learning_rate": 2.785314301337811e-06, |
| "loss": 1.4395644664764404, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.0452554744525548, |
| "grad_norm": 4.125, |
| "learning_rate": 2.7744488851932568e-06, |
| "loss": 1.3807083368301392, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.0481751824817518, |
| "grad_norm": 16.625, |
| "learning_rate": 2.76358289880124e-06, |
| "loss": 1.2562787532806396, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.051094890510949, |
| "grad_norm": 4.03125, |
| "learning_rate": 2.752716595600768e-06, |
| "loss": 1.2394318580627441, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.054014598540146, |
| "grad_norm": 8.625, |
| "learning_rate": 2.7418502290382352e-06, |
| "loss": 1.1047321557998657, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.056934306569343, |
| "grad_norm": 4.46875, |
| "learning_rate": 2.7309840525615146e-06, |
| "loss": 1.5514793395996094, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.0598540145985402, |
| "grad_norm": 3.234375, |
| "learning_rate": 2.720118319614047e-06, |
| "loss": 1.2009215354919434, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.0627737226277372, |
| "grad_norm": 2.65625, |
| "learning_rate": 2.709253283628924e-06, |
| "loss": 1.2573150396347046, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.0656934306569343, |
| "grad_norm": 8.9375, |
| "learning_rate": 2.698389198022987e-06, |
| "loss": 1.624213457107544, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.0686131386861315, |
| "grad_norm": 5.375, |
| "learning_rate": 2.6875263161909054e-06, |
| "loss": 1.3574187755584717, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.0715328467153284, |
| "grad_norm": 7.4375, |
| "learning_rate": 2.676664891499275e-06, |
| "loss": 1.2222844362258911, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.0744525547445256, |
| "grad_norm": 1.640625, |
| "learning_rate": 2.6658051772807046e-06, |
| "loss": 1.2617628574371338, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.0773722627737226, |
| "grad_norm": 8.0, |
| "learning_rate": 2.6549474268279074e-06, |
| "loss": 1.3748055696487427, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.0802919708029197, |
| "grad_norm": 8.5625, |
| "learning_rate": 2.644091893387793e-06, |
| "loss": 1.4741809368133545, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.0832116788321169, |
| "grad_norm": 7.1875, |
| "learning_rate": 2.6332388301555615e-06, |
| "loss": 1.3683550357818604, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.0861313868613138, |
| "grad_norm": 23.125, |
| "learning_rate": 2.622388490268799e-06, |
| "loss": 1.4302444458007812, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.089051094890511, |
| "grad_norm": 2.875, |
| "learning_rate": 2.6115411268015716e-06, |
| "loss": 1.3794375658035278, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.091970802919708, |
| "grad_norm": 3.5, |
| "learning_rate": 2.6006969927585214e-06, |
| "loss": 1.6521217823028564, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.094890510948905, |
| "grad_norm": 4.09375, |
| "learning_rate": 2.589856341068969e-06, |
| "loss": 1.380043625831604, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.0978102189781023, |
| "grad_norm": 2.84375, |
| "learning_rate": 2.5790194245810125e-06, |
| "loss": 1.2655432224273682, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.1007299270072992, |
| "grad_norm": 8.6875, |
| "learning_rate": 2.568186496055628e-06, |
| "loss": 1.4429633617401123, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.1036496350364964, |
| "grad_norm": 2.34375, |
| "learning_rate": 2.5573578081607793e-06, |
| "loss": 1.1212751865386963, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.1065693430656935, |
| "grad_norm": 2.71875, |
| "learning_rate": 2.546533613465518e-06, |
| "loss": 0.9118128418922424, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.1094890510948905, |
| "grad_norm": 2.9375, |
| "learning_rate": 2.5357141644340966e-06, |
| "loss": 1.3533203601837158, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.1124087591240877, |
| "grad_norm": 5.625, |
| "learning_rate": 2.5248997134200833e-06, |
| "loss": 1.2528855800628662, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.1153284671532846, |
| "grad_norm": 2.5, |
| "learning_rate": 2.5140905126604677e-06, |
| "loss": 1.244079351425171, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.1182481751824818, |
| "grad_norm": 5.71875, |
| "learning_rate": 2.503286814269783e-06, |
| "loss": 1.3053560256958008, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.121167883211679, |
| "grad_norm": 1.5546875, |
| "learning_rate": 2.4924888702342266e-06, |
| "loss": 1.2007651329040527, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.1240875912408759, |
| "grad_norm": 5.5625, |
| "learning_rate": 2.481696932405779e-06, |
| "loss": 1.3610585927963257, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.127007299270073, |
| "grad_norm": 2.59375, |
| "learning_rate": 2.4709112524963326e-06, |
| "loss": 1.3990166187286377, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.12992700729927, |
| "grad_norm": 3.484375, |
| "learning_rate": 2.4601320820718196e-06, |
| "loss": 1.3095015287399292, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.1328467153284671, |
| "grad_norm": 2.84375, |
| "learning_rate": 2.4493596725463435e-06, |
| "loss": 1.2231605052947998, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.1357664233576643, |
| "grad_norm": 5.875, |
| "learning_rate": 2.438594275176318e-06, |
| "loss": 1.3952467441558838, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.1386861313868613, |
| "grad_norm": 5.09375, |
| "learning_rate": 2.4278361410546027e-06, |
| "loss": 1.2288057804107666, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.1416058394160584, |
| "grad_norm": 7.15625, |
| "learning_rate": 2.41708552110465e-06, |
| "loss": 1.46846342086792, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.1445255474452556, |
| "grad_norm": 3.421875, |
| "learning_rate": 2.4063426660746517e-06, |
| "loss": 1.3782763481140137, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.1474452554744525, |
| "grad_norm": 9.375, |
| "learning_rate": 2.3956078265316883e-06, |
| "loss": 1.2458666563034058, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.1503649635036497, |
| "grad_norm": 3.59375, |
| "learning_rate": 2.3848812528558887e-06, |
| "loss": 1.2981244325637817, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.1532846715328466, |
| "grad_norm": 5.96875, |
| "learning_rate": 2.374163195234586e-06, |
| "loss": 1.3579144477844238, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.1562043795620438, |
| "grad_norm": 1.4765625, |
| "learning_rate": 2.3634539036564853e-06, |
| "loss": 1.2424495220184326, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.159124087591241, |
| "grad_norm": 3.78125, |
| "learning_rate": 2.352753627905833e-06, |
| "loss": 1.6642348766326904, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.162043795620438, |
| "grad_norm": 5.90625, |
| "learning_rate": 2.3420626175565877e-06, |
| "loss": 1.1931509971618652, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.164963503649635, |
| "grad_norm": 3.75, |
| "learning_rate": 2.331381121966603e-06, |
| "loss": 1.3377602100372314, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.167883211678832, |
| "grad_norm": 3.640625, |
| "learning_rate": 2.3207093902718066e-06, |
| "loss": 1.2145559787750244, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.1708029197080292, |
| "grad_norm": 2.078125, |
| "learning_rate": 2.3100476713803967e-06, |
| "loss": 1.1511560678482056, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.1737226277372264, |
| "grad_norm": 4.75, |
| "learning_rate": 2.2993962139670292e-06, |
| "loss": 1.5985954999923706, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.1766423357664233, |
| "grad_norm": 6.71875, |
| "learning_rate": 2.288755266467022e-06, |
| "loss": 1.4606941938400269, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.1795620437956205, |
| "grad_norm": 7.75, |
| "learning_rate": 2.2781250770705575e-06, |
| "loss": 1.5486199855804443, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.1824817518248176, |
| "grad_norm": 4.1875, |
| "learning_rate": 2.267505893716898e-06, |
| "loss": 1.3502545356750488, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.1854014598540146, |
| "grad_norm": 7.40625, |
| "learning_rate": 2.2568979640885964e-06, |
| "loss": 1.5650737285614014, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.1883211678832117, |
| "grad_norm": 7.96875, |
| "learning_rate": 2.246301535605726e-06, |
| "loss": 1.6433610916137695, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.1912408759124087, |
| "grad_norm": 3.78125, |
| "learning_rate": 2.2357168554201066e-06, |
| "loss": 1.0836632251739502, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.1941605839416058, |
| "grad_norm": 3.796875, |
| "learning_rate": 2.225144170409537e-06, |
| "loss": 1.1502854824066162, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.197080291970803, |
| "grad_norm": 3.015625, |
| "learning_rate": 2.2145837271720433e-06, |
| "loss": 1.6808114051818848, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 3.296875, |
| "learning_rate": 2.204035772020121e-06, |
| "loss": 1.3705600500106812, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.2029197080291971, |
| "grad_norm": 2.78125, |
| "learning_rate": 2.1935005509749933e-06, |
| "loss": 1.1946570873260498, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.205839416058394, |
| "grad_norm": 17.75, |
| "learning_rate": 2.182978309760874e-06, |
| "loss": 1.5363470315933228, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.2087591240875912, |
| "grad_norm": 3.78125, |
| "learning_rate": 2.1724692937992313e-06, |
| "loss": 1.4042502641677856, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.2116788321167884, |
| "grad_norm": 17.25, |
| "learning_rate": 2.16197374820307e-06, |
| "loss": 1.2589643001556396, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.2145985401459853, |
| "grad_norm": 3.359375, |
| "learning_rate": 2.1514919177712085e-06, |
| "loss": 1.6056280136108398, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.2175182481751825, |
| "grad_norm": 4.3125, |
| "learning_rate": 2.141024046982573e-06, |
| "loss": 1.3564906120300293, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.2204379562043797, |
| "grad_norm": 11.625, |
| "learning_rate": 2.1305703799904947e-06, |
| "loss": 0.9380712509155273, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.2233576642335766, |
| "grad_norm": 8.75, |
| "learning_rate": 2.120131160617013e-06, |
| "loss": 1.0530650615692139, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.2262773722627738, |
| "grad_norm": 8.4375, |
| "learning_rate": 2.1097066323471897e-06, |
| "loss": 0.7292347550392151, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.2291970802919707, |
| "grad_norm": 8.125, |
| "learning_rate": 2.0992970383234336e-06, |
| "loss": 0.9691898226737976, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.2321167883211679, |
| "grad_norm": 1.796875, |
| "learning_rate": 2.088902621339823e-06, |
| "loss": 1.152883768081665, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.235036496350365, |
| "grad_norm": 6.3125, |
| "learning_rate": 2.078523623836446e-06, |
| "loss": 1.4850080013275146, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.237956204379562, |
| "grad_norm": 7.3125, |
| "learning_rate": 2.0681602878937472e-06, |
| "loss": 1.3769371509552002, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.2408759124087592, |
| "grad_norm": 3.53125, |
| "learning_rate": 2.057812855226879e-06, |
| "loss": 1.103143334388733, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.243795620437956, |
| "grad_norm": 3.578125, |
| "learning_rate": 2.0474815671800644e-06, |
| "loss": 1.4019992351531982, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.2467153284671533, |
| "grad_norm": 5.40625, |
| "learning_rate": 2.0371666647209694e-06, |
| "loss": 1.1963081359863281, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.2496350364963504, |
| "grad_norm": 3.0625, |
| "learning_rate": 2.0268683884350803e-06, |
| "loss": 1.1888788938522339, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.2525547445255474, |
| "grad_norm": 13.6875, |
| "learning_rate": 2.0165869785200938e-06, |
| "loss": 1.2623980045318604, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.2554744525547445, |
| "grad_norm": 6.4375, |
| "learning_rate": 2.0063226747803143e-06, |
| "loss": 1.2596468925476074, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.2583941605839417, |
| "grad_norm": 3.859375, |
| "learning_rate": 1.9960757166210596e-06, |
| "loss": 1.333680272102356, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.2613138686131387, |
| "grad_norm": 3.71875, |
| "learning_rate": 1.9858463430430807e-06, |
| "loss": 1.1413600444793701, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.2642335766423358, |
| "grad_norm": 5.5625, |
| "learning_rate": 1.9756347926369813e-06, |
| "loss": 1.3728548288345337, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.2671532846715328, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.9654413035776585e-06, |
| "loss": 1.449355125427246, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.27007299270073, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.9552661136187444e-06, |
| "loss": 1.1183695793151855, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.2729927007299269, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.945109460087061e-06, |
| "loss": 1.1493186950683594, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.275912408759124, |
| "grad_norm": 2.640625, |
| "learning_rate": 1.934971579877088e-06, |
| "loss": 1.3397104740142822, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.2788321167883212, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.9248527094454316e-06, |
| "loss": 1.3082889318466187, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.2817518248175181, |
| "grad_norm": 8.4375, |
| "learning_rate": 1.9147530848053152e-06, |
| "loss": 1.563565731048584, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.2846715328467153, |
| "grad_norm": 8.25, |
| "learning_rate": 1.9046729415210686e-06, |
| "loss": 1.4606716632843018, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.2875912408759125, |
| "grad_norm": 4.65625, |
| "learning_rate": 1.8946125147026427e-06, |
| "loss": 1.3690614700317383, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.2905109489051094, |
| "grad_norm": 7.8125, |
| "learning_rate": 1.8845720390001154e-06, |
| "loss": 1.6756688356399536, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.2934306569343066, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.874551748598226e-06, |
| "loss": 1.2701613903045654, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.2963503649635038, |
| "grad_norm": 4.78125, |
| "learning_rate": 1.8645518772109077e-06, |
| "loss": 1.5865097045898438, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.2992700729927007, |
| "grad_norm": 3.921875, |
| "learning_rate": 1.8545726580758428e-06, |
| "loss": 1.401726484298706, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.3021897810218979, |
| "grad_norm": 7.78125, |
| "learning_rate": 1.8446143239490168e-06, |
| "loss": 1.6153247356414795, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.305109489051095, |
| "grad_norm": 6.125, |
| "learning_rate": 1.8346771070992914e-06, |
| "loss": 1.4763232469558716, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.308029197080292, |
| "grad_norm": 1.984375, |
| "learning_rate": 1.82476123930299e-06, |
| "loss": 1.2044928073883057, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.310948905109489, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.8148669518384862e-06, |
| "loss": 1.0226365327835083, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.313868613138686, |
| "grad_norm": 2.1875, |
| "learning_rate": 1.804994475480815e-06, |
| "loss": 1.0369101762771606, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.3167883211678832, |
| "grad_norm": 1.6875, |
| "learning_rate": 1.7951440404962856e-06, |
| "loss": 1.1433358192443848, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.3197080291970802, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.7853158766371143e-06, |
| "loss": 1.1160844564437866, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.3226277372262774, |
| "grad_norm": 10.1875, |
| "learning_rate": 1.7755102131360639e-06, |
| "loss": 1.3365674018859863, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.3255474452554745, |
| "grad_norm": 2.21875, |
| "learning_rate": 1.7657272787010967e-06, |
| "loss": 1.3394170999526978, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.3284671532846715, |
| "grad_norm": 14.0625, |
| "learning_rate": 1.7559673015100405e-06, |
| "loss": 1.2542470693588257, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.3313868613138686, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.7462305092052676e-06, |
| "loss": 1.2083182334899902, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.3343065693430658, |
| "grad_norm": 2.234375, |
| "learning_rate": 1.7365171288883841e-06, |
| "loss": 1.0745160579681396, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.3372262773722627, |
| "grad_norm": 5.5, |
| "learning_rate": 1.7268273871149335e-06, |
| "loss": 1.4868173599243164, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.34014598540146, |
| "grad_norm": 5.96875, |
| "learning_rate": 1.7171615098891117e-06, |
| "loss": 0.7804101705551147, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.343065693430657, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.7075197226584969e-06, |
| "loss": 1.3761916160583496, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.345985401459854, |
| "grad_norm": 2.640625, |
| "learning_rate": 1.6979022503087905e-06, |
| "loss": 1.413581132888794, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.348905109489051, |
| "grad_norm": 8.125, |
| "learning_rate": 1.688309317158572e-06, |
| "loss": 1.6476316452026367, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.3518248175182481, |
| "grad_norm": 5.4375, |
| "learning_rate": 1.6787411469540677e-06, |
| "loss": 1.5541059970855713, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.3547445255474453, |
| "grad_norm": 6.125, |
| "learning_rate": 1.6691979628639281e-06, |
| "loss": 1.5634403228759766, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.3576642335766422, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.6596799874740294e-06, |
| "loss": 1.2540359497070312, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.3605839416058394, |
| "grad_norm": 5.59375, |
| "learning_rate": 1.6501874427822767e-06, |
| "loss": 1.4849543571472168, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.3635036496350366, |
| "grad_norm": 6.40625, |
| "learning_rate": 1.6407205501934285e-06, |
| "loss": 1.141026496887207, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.3664233576642335, |
| "grad_norm": 2.375, |
| "learning_rate": 1.6312795305139328e-06, |
| "loss": 0.9827671647071838, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.3693430656934307, |
| "grad_norm": 5.5, |
| "learning_rate": 1.6218646039467725e-06, |
| "loss": 1.4801573753356934, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.3722627737226278, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.6124759900863365e-06, |
| "loss": 1.6479110717773438, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.3751824817518248, |
| "grad_norm": 7.25, |
| "learning_rate": 1.6031139079132933e-06, |
| "loss": 1.2483787536621094, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.378102189781022, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.593778575789484e-06, |
| "loss": 1.2027292251586914, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.3810218978102191, |
| "grad_norm": 3.859375, |
| "learning_rate": 1.5844702114528315e-06, |
| "loss": 1.5109983682632446, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.383941605839416, |
| "grad_norm": 5.34375, |
| "learning_rate": 1.5751890320122568e-06, |
| "loss": 1.3143746852874756, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.3868613138686132, |
| "grad_norm": 8.25, |
| "learning_rate": 1.5659352539426215e-06, |
| "loss": 1.2749611139297485, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.3897810218978102, |
| "grad_norm": 2.125, |
| "learning_rate": 1.5567090930796746e-06, |
| "loss": 1.244338035583496, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.3927007299270073, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.5475107646150203e-06, |
| "loss": 1.3380858898162842, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.3956204379562043, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.5383404830910981e-06, |
| "loss": 1.4054020643234253, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.3985401459854014, |
| "grad_norm": 10.5625, |
| "learning_rate": 1.529198462396175e-06, |
| "loss": 1.4239089488983154, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.4014598540145986, |
| "grad_norm": 9.25, |
| "learning_rate": 1.5200849157593666e-06, |
| "loss": 1.610469102859497, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.4043795620437955, |
| "grad_norm": 1.71875, |
| "learning_rate": 1.5110000557456542e-06, |
| "loss": 1.1694961786270142, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.4072992700729927, |
| "grad_norm": 5.625, |
| "learning_rate": 1.5019440942509312e-06, |
| "loss": 1.5139713287353516, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.4102189781021899, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.4929172424970576e-06, |
| "loss": 1.376784324645996, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.4131386861313868, |
| "grad_norm": 2.34375, |
| "learning_rate": 1.483919711026939e-06, |
| "loss": 1.3103041648864746, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.416058394160584, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.4749517096996116e-06, |
| "loss": 1.2476757764816284, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.4189781021897812, |
| "grad_norm": 3.703125, |
| "learning_rate": 1.4660134476853485e-06, |
| "loss": 1.3406193256378174, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.421897810218978, |
| "grad_norm": 4.375, |
| "learning_rate": 1.4571051334607813e-06, |
| "loss": 1.2700021266937256, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.4248175182481753, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.4482269748040358e-06, |
| "loss": 1.2266380786895752, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.4277372262773722, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.4393791787898896e-06, |
| "loss": 1.189935564994812, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.4306569343065694, |
| "grad_norm": 4.8125, |
| "learning_rate": 1.430561951784938e-06, |
| "loss": 1.4163111448287964, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.4335766423357663, |
| "grad_norm": 7.125, |
| "learning_rate": 1.4217754994427844e-06, |
| "loss": 1.6390494108200073, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.4364963503649635, |
| "grad_norm": 1.6640625, |
| "learning_rate": 1.4130200266992408e-06, |
| "loss": 1.1357786655426025, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.4394160583941606, |
| "grad_norm": 3.5625, |
| "learning_rate": 1.4042957377675484e-06, |
| "loss": 1.2841823101043701, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.4423357664233576, |
| "grad_norm": 7.34375, |
| "learning_rate": 1.395602836133616e-06, |
| "loss": 1.3807730674743652, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.4452554744525548, |
| "grad_norm": 1.421875, |
| "learning_rate": 1.386941524551273e-06, |
| "loss": 1.135375738143921, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.448175182481752, |
| "grad_norm": 2.875, |
| "learning_rate": 1.37831200503754e-06, |
| "loss": 1.1764510869979858, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.4510948905109489, |
| "grad_norm": 8.9375, |
| "learning_rate": 1.3697144788679174e-06, |
| "loss": 1.2467272281646729, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.454014598540146, |
| "grad_norm": 5.90625, |
| "learning_rate": 1.3611491465716898e-06, |
| "loss": 1.4708714485168457, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.4569343065693432, |
| "grad_norm": 3.71875, |
| "learning_rate": 1.3526162079272495e-06, |
| "loss": 1.402409553527832, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.4598540145985401, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.34411586195744e-06, |
| "loss": 1.2477829456329346, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.4627737226277373, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.3356483069249088e-06, |
| "loss": 1.3877084255218506, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.4656934306569342, |
| "grad_norm": 7.875, |
| "learning_rate": 1.3272137403274844e-06, |
| "loss": 1.555393934249878, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.4686131386861314, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.318812358893572e-06, |
| "loss": 1.3621551990509033, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.4715328467153284, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.3104443585775642e-06, |
| "loss": 1.3545817136764526, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.4744525547445255, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.3021099345552695e-06, |
| "loss": 1.4017988443374634, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.4773722627737227, |
| "grad_norm": 6.21875, |
| "learning_rate": 1.2938092812193615e-06, |
| "loss": 1.3940372467041016, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.4802919708029196, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.285542592174842e-06, |
| "loss": 1.1765646934509277, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.4832116788321168, |
| "grad_norm": 6.0, |
| "learning_rate": 1.277310060234529e-06, |
| "loss": 1.385852336883545, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.486131386861314, |
| "grad_norm": 4.8125, |
| "learning_rate": 1.2691118774145577e-06, |
| "loss": 1.395111322402954, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.489051094890511, |
| "grad_norm": 1.640625, |
| "learning_rate": 1.2609482349299021e-06, |
| "loss": 1.325355052947998, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.491970802919708, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.2528193231899156e-06, |
| "loss": 1.2050141096115112, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.4948905109489052, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.2447253317938871e-06, |
| "loss": 1.6511290073394775, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.4978102189781022, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.236666449526623e-06, |
| "loss": 1.28155517578125, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.5007299270072991, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.2286428643540418e-06, |
| "loss": 1.4207556247711182, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.5036496350364965, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.22065476341879e-06, |
| "loss": 1.3519251346588135, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.5065693430656935, |
| "grad_norm": 5.84375, |
| "learning_rate": 1.2127023330358777e-06, |
| "loss": 1.396289587020874, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.5094890510948904, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.204785758688331e-06, |
| "loss": 1.3400771617889404, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.5124087591240876, |
| "grad_norm": 31.25, |
| "learning_rate": 1.1969052250228683e-06, |
| "loss": 1.1934255361557007, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.5153284671532847, |
| "grad_norm": 4.90625, |
| "learning_rate": 1.1890609158455949e-06, |
| "loss": 1.4513096809387207, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.5182481751824817, |
| "grad_norm": 2.625, |
| "learning_rate": 1.181253014117711e-06, |
| "loss": 1.1264418363571167, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.5211678832116788, |
| "grad_norm": 1.65625, |
| "learning_rate": 1.1734817019512465e-06, |
| "loss": 1.1497807502746582, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.524087591240876, |
| "grad_norm": 7.8125, |
| "learning_rate": 1.1657471606048157e-06, |
| "loss": 1.6058242321014404, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.527007299270073, |
| "grad_norm": 22.25, |
| "learning_rate": 1.1580495704793874e-06, |
| "loss": 1.4766197204589844, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.5299270072992701, |
| "grad_norm": 3.75, |
| "learning_rate": 1.1503891111140767e-06, |
| "loss": 1.2432148456573486, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.5328467153284673, |
| "grad_norm": 28.25, |
| "learning_rate": 1.1427659611819604e-06, |
| "loss": 1.1451390981674194, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.5357664233576642, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.1351802984859045e-06, |
| "loss": 1.3471091985702515, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.5386861313868612, |
| "grad_norm": 1.640625, |
| "learning_rate": 1.127632299954423e-06, |
| "loss": 1.1958954334259033, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.5416058394160586, |
| "grad_norm": 10.8125, |
| "learning_rate": 1.1201221416375456e-06, |
| "loss": 1.3556766510009766, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.5445255474452555, |
| "grad_norm": 4.75, |
| "learning_rate": 1.1126499987027172e-06, |
| "loss": 1.6111273765563965, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.5474452554744524, |
| "grad_norm": 12.5, |
| "learning_rate": 1.1052160454307085e-06, |
| "loss": 1.5189365148544312, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.5503649635036496, |
| "grad_norm": 3.96875, |
| "learning_rate": 1.0978204552115493e-06, |
| "loss": 1.3763346672058105, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.5532846715328468, |
| "grad_norm": 4.375, |
| "learning_rate": 1.0904634005404902e-06, |
| "loss": 1.450345754623413, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.5562043795620437, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.0831450530139747e-06, |
| "loss": 1.2109770774841309, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.5591240875912409, |
| "grad_norm": 7.0, |
| "learning_rate": 1.0758655833256381e-06, |
| "loss": 1.2681195735931396, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.562043795620438, |
| "grad_norm": 2.640625, |
| "learning_rate": 1.0686251612623277e-06, |
| "loss": 1.2694846391677856, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.564963503649635, |
| "grad_norm": 8.6875, |
| "learning_rate": 1.0614239557001389e-06, |
| "loss": 1.5101749897003174, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.5678832116788322, |
| "grad_norm": 3.171875, |
| "learning_rate": 1.0542621346004806e-06, |
| "loss": 1.313795566558838, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.5708029197080293, |
| "grad_norm": 9.0, |
| "learning_rate": 1.047139865006155e-06, |
| "loss": 1.1664808988571167, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.5737226277372263, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.0400573130374641e-06, |
| "loss": 1.203639030456543, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.5766423357664232, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.0330146438883304e-06, |
| "loss": 1.5285131931304932, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.5795620437956206, |
| "grad_norm": 6.5625, |
| "learning_rate": 1.0260120218224485e-06, |
| "loss": 1.516188144683838, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.5824817518248175, |
| "grad_norm": 6.9375, |
| "learning_rate": 1.019049610169452e-06, |
| "loss": 1.3165411949157715, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.5854014598540145, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.012127571321104e-06, |
| "loss": 1.1730577945709229, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.5883211678832116, |
| "grad_norm": 4.46875, |
| "learning_rate": 1.0052460667275102e-06, |
| "loss": 1.3837532997131348, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.5912408759124088, |
| "grad_norm": 4.71875, |
| "learning_rate": 9.984052568933507e-07, |
| "loss": 1.342604398727417, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.5941605839416058, |
| "grad_norm": 1.8046875, |
| "learning_rate": 9.916053013741396e-07, |
| "loss": 1.0345500707626343, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.597080291970803, |
| "grad_norm": 3.578125, |
| "learning_rate": 9.848463587725024e-07, |
| "loss": 1.3031237125396729, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 2.6875, |
| "learning_rate": 9.78128586734476e-07, |
| "loss": 1.4126646518707275, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.602919708029197, |
| "grad_norm": 2.796875, |
| "learning_rate": 9.714521419458333e-07, |
| "loss": 1.2036532163619995, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.6058394160583942, |
| "grad_norm": 5.34375, |
| "learning_rate": 9.648171801284254e-07, |
| "loss": 1.3445477485656738, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.6087591240875914, |
| "grad_norm": 6.875, |
| "learning_rate": 9.582238560365534e-07, |
| "loss": 1.4824466705322266, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.6116788321167883, |
| "grad_norm": 2.171875, |
| "learning_rate": 9.516723234533573e-07, |
| "loss": 0.6945338845252991, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.6145985401459853, |
| "grad_norm": 4.375, |
| "learning_rate": 9.451627351872289e-07, |
| "loss": 1.691240906715393, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.6175182481751826, |
| "grad_norm": 5.0625, |
| "learning_rate": 9.386952430682478e-07, |
| "loss": 1.6143536567687988, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.6204379562043796, |
| "grad_norm": 4.90625, |
| "learning_rate": 9.322699979446395e-07, |
| "loss": 1.0810116529464722, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.6233576642335765, |
| "grad_norm": 3.953125, |
| "learning_rate": 9.25887149679259e-07, |
| "loss": 1.3443822860717773, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.6262773722627737, |
| "grad_norm": 2.5, |
| "learning_rate": 9.19546847146093e-07, |
| "loss": 1.392272710800171, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.6291970802919709, |
| "grad_norm": 2.890625, |
| "learning_rate": 9.132492382267895e-07, |
| "loss": 1.2860863208770752, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.6321167883211678, |
| "grad_norm": 6.03125, |
| "learning_rate": 9.069944698072071e-07, |
| "loss": 1.4681463241577148, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.635036496350365, |
| "grad_norm": 1.828125, |
| "learning_rate": 9.0078268777399e-07, |
| "loss": 1.1984715461730957, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.6379562043795621, |
| "grad_norm": 3.328125, |
| "learning_rate": 8.946140370111651e-07, |
| "loss": 1.3620171546936035, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.640875912408759, |
| "grad_norm": 2.5625, |
| "learning_rate": 8.884886613967625e-07, |
| "loss": 1.0197124481201172, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.6437956204379562, |
| "grad_norm": 2.96875, |
| "learning_rate": 8.824067037994597e-07, |
| "loss": 1.2507963180541992, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.6467153284671534, |
| "grad_norm": 7.59375, |
| "learning_rate": 8.763683060752492e-07, |
| "loss": 1.5034403800964355, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.6496350364963503, |
| "grad_norm": 3.703125, |
| "learning_rate": 8.703736090641302e-07, |
| "loss": 1.250478744506836, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.6525547445255473, |
| "grad_norm": 2.921875, |
| "learning_rate": 8.644227525868238e-07, |
| "loss": 1.2682870626449585, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.6554744525547447, |
| "grad_norm": 8.5, |
| "learning_rate": 8.585158754415114e-07, |
| "loss": 1.5448431968688965, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.6583941605839416, |
| "grad_norm": 5.65625, |
| "learning_rate": 8.52653115400598e-07, |
| "loss": 1.3879718780517578, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.6613138686131386, |
| "grad_norm": 3.3125, |
| "learning_rate": 8.468346092074961e-07, |
| "loss": 1.3755671977996826, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.6642335766423357, |
| "grad_norm": 2.75, |
| "learning_rate": 8.410604925734411e-07, |
| "loss": 1.1513915061950684, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.667153284671533, |
| "grad_norm": 14.1875, |
| "learning_rate": 8.35330900174322e-07, |
| "loss": 1.5474663972854614, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.6700729927007298, |
| "grad_norm": 3.515625, |
| "learning_rate": 8.296459656475413e-07, |
| "loss": 0.8504141569137573, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.672992700729927, |
| "grad_norm": 5.78125, |
| "learning_rate": 8.240058215888998e-07, |
| "loss": 1.3289515972137451, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.6759124087591242, |
| "grad_norm": 6.9375, |
| "learning_rate": 8.184105995494998e-07, |
| "loss": 0.9470740556716919, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.6788321167883211, |
| "grad_norm": 3.359375, |
| "learning_rate": 8.128604300326812e-07, |
| "loss": 1.352350115776062, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.6817518248175183, |
| "grad_norm": 4.78125, |
| "learning_rate": 8.073554424909755e-07, |
| "loss": 1.3660526275634766, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.6846715328467154, |
| "grad_norm": 2.46875, |
| "learning_rate": 8.01895765323087e-07, |
| "loss": 1.2722463607788086, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.6875912408759124, |
| "grad_norm": 6.21875, |
| "learning_rate": 7.964815258708971e-07, |
| "loss": 1.13301420211792, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.6905109489051093, |
| "grad_norm": 2.03125, |
| "learning_rate": 7.911128504164947e-07, |
| "loss": 1.3945411443710327, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.6934306569343067, |
| "grad_norm": 1.7421875, |
| "learning_rate": 7.857898641792322e-07, |
| "loss": 1.1629891395568848, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.6963503649635037, |
| "grad_norm": 2.09375, |
| "learning_rate": 7.805126913128018e-07, |
| "loss": 1.1993281841278076, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.6992700729927006, |
| "grad_norm": 3.0625, |
| "learning_rate": 7.752814549023437e-07, |
| "loss": 1.4611374139785767, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.7021897810218978, |
| "grad_norm": 4.625, |
| "learning_rate": 7.700962769615704e-07, |
| "loss": 1.1919968128204346, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.705109489051095, |
| "grad_norm": 2.515625, |
| "learning_rate": 7.649572784299255e-07, |
| "loss": 1.2250781059265137, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.7080291970802919, |
| "grad_norm": 8.1875, |
| "learning_rate": 7.598645791697601e-07, |
| "loss": 1.3479260206222534, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.710948905109489, |
| "grad_norm": 4.25, |
| "learning_rate": 7.548182979635389e-07, |
| "loss": 1.3197946548461914, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.7138686131386862, |
| "grad_norm": 8.6875, |
| "learning_rate": 7.49818552511068e-07, |
| "loss": 1.1691796779632568, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.7167883211678832, |
| "grad_norm": 3.203125, |
| "learning_rate": 7.448654594267496e-07, |
| "loss": 1.2978925704956055, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.7197080291970803, |
| "grad_norm": 2.96875, |
| "learning_rate": 7.399591342368644e-07, |
| "loss": 1.174210786819458, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.7226277372262775, |
| "grad_norm": 4.625, |
| "learning_rate": 7.350996913768743e-07, |
| "loss": 1.2740840911865234, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.7255474452554744, |
| "grad_norm": 8.0625, |
| "learning_rate": 7.302872441887562e-07, |
| "loss": 1.1019668579101562, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.7284671532846714, |
| "grad_norm": 2.84375, |
| "learning_rate": 7.255219049183552e-07, |
| "loss": 1.3885023593902588, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.7313868613138688, |
| "grad_norm": 5.625, |
| "learning_rate": 7.208037847127683e-07, |
| "loss": 1.5192725658416748, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.7343065693430657, |
| "grad_norm": 6.625, |
| "learning_rate": 7.161329936177522e-07, |
| "loss": 1.3260494470596313, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.7372262773722627, |
| "grad_norm": 3.375, |
| "learning_rate": 7.115096405751567e-07, |
| "loss": 1.3762927055358887, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.7401459854014598, |
| "grad_norm": 1.8515625, |
| "learning_rate": 7.069338334203818e-07, |
| "loss": 1.0026099681854248, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.743065693430657, |
| "grad_norm": 1.1015625, |
| "learning_rate": 7.024056788798658e-07, |
| "loss": 1.1264629364013672, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.745985401459854, |
| "grad_norm": 16.75, |
| "learning_rate": 6.979252825685927e-07, |
| "loss": 1.5443601608276367, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.748905109489051, |
| "grad_norm": 1.8671875, |
| "learning_rate": 6.934927489876312e-07, |
| "loss": 1.0794442892074585, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.7518248175182483, |
| "grad_norm": 6.90625, |
| "learning_rate": 6.891081815216958e-07, |
| "loss": 1.348907470703125, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.7547445255474452, |
| "grad_norm": 3.140625, |
| "learning_rate": 6.847716824367369e-07, |
| "loss": 1.3414909839630127, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.7576642335766424, |
| "grad_norm": 4.59375, |
| "learning_rate": 6.804833528775531e-07, |
| "loss": 1.4073083400726318, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.7605839416058395, |
| "grad_norm": 3.671875, |
| "learning_rate": 6.762432928654358e-07, |
| "loss": 0.8366962671279907, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.7635036496350365, |
| "grad_norm": 5.53125, |
| "learning_rate": 6.720516012958325e-07, |
| "loss": 1.3547214269638062, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.7664233576642334, |
| "grad_norm": 5.21875, |
| "learning_rate": 6.679083759360433e-07, |
| "loss": 1.6114599704742432, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.7693430656934308, |
| "grad_norm": 4.5, |
| "learning_rate": 6.638137134229375e-07, |
| "loss": 1.5248315334320068, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.7722627737226277, |
| "grad_norm": 3.6875, |
| "learning_rate": 6.597677092607025e-07, |
| "loss": 1.093032956123352, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.7751824817518247, |
| "grad_norm": 4.5, |
| "learning_rate": 6.557704578186146e-07, |
| "loss": 1.408461093902588, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.7781021897810219, |
| "grad_norm": 9.9375, |
| "learning_rate": 6.518220523288382e-07, |
| "loss": 1.3268358707427979, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.781021897810219, |
| "grad_norm": 4.75, |
| "learning_rate": 6.479225848842523e-07, |
| "loss": 1.544386386871338, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.783941605839416, |
| "grad_norm": 5.9375, |
| "learning_rate": 6.440721464362998e-07, |
| "loss": 1.4272065162658691, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.7868613138686131, |
| "grad_norm": 3.515625, |
| "learning_rate": 6.402708267928694e-07, |
| "loss": 1.3150466680526733, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.7897810218978103, |
| "grad_norm": 5.0, |
| "learning_rate": 6.365187146161991e-07, |
| "loss": 1.2979998588562012, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.7927007299270072, |
| "grad_norm": 4.75, |
| "learning_rate": 6.32815897420809e-07, |
| "loss": 1.6841963529586792, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.7956204379562044, |
| "grad_norm": 5.0, |
| "learning_rate": 6.29162461571459e-07, |
| "loss": 1.6227900981903076, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.7985401459854016, |
| "grad_norm": 11.6875, |
| "learning_rate": 6.25558492281135e-07, |
| "loss": 1.4919426441192627, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.8014598540145985, |
| "grad_norm": 4.8125, |
| "learning_rate": 6.220040736090617e-07, |
| "loss": 1.3797836303710938, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.8043795620437955, |
| "grad_norm": 4.09375, |
| "learning_rate": 6.18499288458743e-07, |
| "loss": 1.6902371644973755, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.8072992700729928, |
| "grad_norm": 2.453125, |
| "learning_rate": 6.150442185760258e-07, |
| "loss": 1.2298048734664917, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.8102189781021898, |
| "grad_norm": 4.53125, |
| "learning_rate": 6.116389445471948e-07, |
| "loss": 1.3514063358306885, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.8131386861313867, |
| "grad_norm": 3.828125, |
| "learning_rate": 6.082835457970935e-07, |
| "loss": 1.3649213314056396, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.816058394160584, |
| "grad_norm": 4.15625, |
| "learning_rate": 6.0497810058727e-07, |
| "loss": 1.3873786926269531, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.818978102189781, |
| "grad_norm": 5.21875, |
| "learning_rate": 6.017226860141535e-07, |
| "loss": 1.6073391437530518, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.821897810218978, |
| "grad_norm": 2.90625, |
| "learning_rate": 5.985173780072558e-07, |
| "loss": 1.333566427230835, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.8248175182481752, |
| "grad_norm": 3.0625, |
| "learning_rate": 5.953622513273977e-07, |
| "loss": 1.3585089445114136, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.8277372262773723, |
| "grad_norm": 3.953125, |
| "learning_rate": 5.92257379564969e-07, |
| "loss": 1.195847749710083, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.8306569343065693, |
| "grad_norm": 4.84375, |
| "learning_rate": 5.892028351382101e-07, |
| "loss": 1.4418195486068726, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.8335766423357664, |
| "grad_norm": 4.09375, |
| "learning_rate": 5.861986892915227e-07, |
| "loss": 1.384018063545227, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.8364963503649636, |
| "grad_norm": 9.4375, |
| "learning_rate": 5.832450120938093e-07, |
| "loss": 1.3380024433135986, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.8394160583941606, |
| "grad_norm": 6.46875, |
| "learning_rate": 5.803418724368373e-07, |
| "loss": 1.3088436126708984, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.8423357664233575, |
| "grad_norm": 9.9375, |
| "learning_rate": 5.774893380336338e-07, |
| "loss": 1.5858633518218994, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.845255474452555, |
| "grad_norm": 6.375, |
| "learning_rate": 5.746874754169053e-07, |
| "loss": 1.5293078422546387, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.8481751824817518, |
| "grad_norm": 2.921875, |
| "learning_rate": 5.719363499374861e-07, |
| "loss": 1.1518256664276123, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.8510948905109488, |
| "grad_norm": 7.6875, |
| "learning_rate": 5.692360257628144e-07, |
| "loss": 1.3224802017211914, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.854014598540146, |
| "grad_norm": 4.28125, |
| "learning_rate": 5.665865658754341e-07, |
| "loss": 1.2233679294586182, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.856934306569343, |
| "grad_norm": 6.34375, |
| "learning_rate": 5.639880320715284e-07, |
| "loss": 1.4993672370910645, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.85985401459854, |
| "grad_norm": 3.703125, |
| "learning_rate": 5.614404849594762e-07, |
| "loss": 1.3802194595336914, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.8627737226277372, |
| "grad_norm": 2.5625, |
| "learning_rate": 5.589439839584404e-07, |
| "loss": 1.0489559173583984, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.8656934306569344, |
| "grad_norm": 1.40625, |
| "learning_rate": 5.564985872969791e-07, |
| "loss": 1.2326107025146484, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.8686131386861313, |
| "grad_norm": 5.4375, |
| "learning_rate": 5.541043520116912e-07, |
| "loss": 1.1945993900299072, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.8715328467153285, |
| "grad_norm": 2.625, |
| "learning_rate": 5.517613339458832e-07, |
| "loss": 1.2813007831573486, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.8744525547445257, |
| "grad_norm": 4.46875, |
| "learning_rate": 5.494695877482676e-07, |
| "loss": 1.1684314012527466, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.8773722627737226, |
| "grad_norm": 3.71875, |
| "learning_rate": 5.472291668716893e-07, |
| "loss": 1.222388505935669, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.8802919708029195, |
| "grad_norm": 2.984375, |
| "learning_rate": 5.450401235718762e-07, |
| "loss": 1.2156729698181152, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.883211678832117, |
| "grad_norm": 5.96875, |
| "learning_rate": 5.42902508906224e-07, |
| "loss": 1.311574935913086, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.8861313868613139, |
| "grad_norm": 7.96875, |
| "learning_rate": 5.408163727326021e-07, |
| "loss": 1.34036123752594, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.8890510948905108, |
| "grad_norm": 3.640625, |
| "learning_rate": 5.387817637081928e-07, |
| "loss": 1.1132798194885254, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.891970802919708, |
| "grad_norm": 3.359375, |
| "learning_rate": 5.367987292883554e-07, |
| "loss": 1.3646128177642822, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.8948905109489051, |
| "grad_norm": 5.1875, |
| "learning_rate": 5.348673157255195e-07, |
| "loss": 1.4554338455200195, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.897810218978102, |
| "grad_norm": 3.96875, |
| "learning_rate": 5.329875680681065e-07, |
| "loss": 1.4109296798706055, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.9007299270072993, |
| "grad_norm": 4.875, |
| "learning_rate": 5.311595301594783e-07, |
| "loss": 1.1961219310760498, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.9036496350364964, |
| "grad_norm": 2.921875, |
| "learning_rate": 5.293832446369158e-07, |
| "loss": 0.6657427549362183, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.9065693430656934, |
| "grad_norm": 10.4375, |
| "learning_rate": 5.276587529306236e-07, |
| "loss": 1.397131323814392, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.9094890510948905, |
| "grad_norm": 6.5, |
| "learning_rate": 5.25986095262763e-07, |
| "loss": 1.323398470878601, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.9124087591240877, |
| "grad_norm": 3.203125, |
| "learning_rate": 5.243653106465157e-07, |
| "loss": 1.3060777187347412, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.9153284671532846, |
| "grad_norm": 5.71875, |
| "learning_rate": 5.227964368851721e-07, |
| "loss": 1.5433318614959717, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.9182481751824818, |
| "grad_norm": 3.359375, |
| "learning_rate": 5.212795105712508e-07, |
| "loss": 1.4788509607315063, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.921167883211679, |
| "grad_norm": 4.8125, |
| "learning_rate": 5.198145670856438e-07, |
| "loss": 1.3976120948791504, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.924087591240876, |
| "grad_norm": 2.0625, |
| "learning_rate": 5.184016405967931e-07, |
| "loss": 1.1872693300247192, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.9270072992700729, |
| "grad_norm": 2.296875, |
| "learning_rate": 5.170407640598921e-07, |
| "loss": 1.1601970195770264, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.92992700729927, |
| "grad_norm": 3.5625, |
| "learning_rate": 5.157319692161178e-07, |
| "loss": 1.205195426940918, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.9328467153284672, |
| "grad_norm": 3.734375, |
| "learning_rate": 5.144752865918901e-07, |
| "loss": 1.1591906547546387, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.9357664233576641, |
| "grad_norm": 3.421875, |
| "learning_rate": 5.132707454981602e-07, |
| "loss": 1.3498120307922363, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.9386861313868613, |
| "grad_norm": 3.796875, |
| "learning_rate": 5.121183740297261e-07, |
| "loss": 1.3916034698486328, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.9416058394160585, |
| "grad_norm": 17.375, |
| "learning_rate": 5.110181990645788e-07, |
| "loss": 1.2117153406143188, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.9445255474452554, |
| "grad_norm": 1.734375, |
| "learning_rate": 5.099702462632737e-07, |
| "loss": 1.19834566116333, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.9474452554744526, |
| "grad_norm": 10.0625, |
| "learning_rate": 5.089745400683333e-07, |
| "loss": 0.8368179798126221, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.9503649635036497, |
| "grad_norm": 5.625, |
| "learning_rate": 5.080311037036767e-07, |
| "loss": 1.314239263534546, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.9532846715328467, |
| "grad_norm": 1.65625, |
| "learning_rate": 5.071399591740777e-07, |
| "loss": 1.216627597808838, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.9562043795620438, |
| "grad_norm": 6.375, |
| "learning_rate": 5.063011272646521e-07, |
| "loss": 1.2274556159973145, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.959124087591241, |
| "grad_norm": 2.546875, |
| "learning_rate": 5.055146275403725e-07, |
| "loss": 1.4812201261520386, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.962043795620438, |
| "grad_norm": 5.71875, |
| "learning_rate": 5.047804783456117e-07, |
| "loss": 1.215821623802185, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.964963503649635, |
| "grad_norm": 4.71875, |
| "learning_rate": 5.040986968037157e-07, |
| "loss": 1.318119764328003, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.967883211678832, |
| "grad_norm": 2.953125, |
| "learning_rate": 5.034692988166033e-07, |
| "loss": 1.2136964797973633, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.9708029197080292, |
| "grad_norm": 4.125, |
| "learning_rate": 5.028922990643963e-07, |
| "loss": 1.3341786861419678, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.9737226277372262, |
| "grad_norm": 3.75, |
| "learning_rate": 5.023677110050759e-07, |
| "loss": 1.4188188314437866, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.9766423357664233, |
| "grad_norm": 3.421875, |
| "learning_rate": 5.018955468741701e-07, |
| "loss": 1.608628511428833, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.9795620437956205, |
| "grad_norm": 3.359375, |
| "learning_rate": 5.014758176844665e-07, |
| "loss": 1.5936325788497925, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.9824817518248175, |
| "grad_norm": 2.796875, |
| "learning_rate": 5.011085332257579e-07, |
| "loss": 1.178612232208252, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.9854014598540146, |
| "grad_norm": 7.1875, |
| "learning_rate": 5.007937020646117e-07, |
| "loss": 1.1231637001037598, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.9883211678832118, |
| "grad_norm": 1.90625, |
| "learning_rate": 5.005313315441716e-07, |
| "loss": 0.6363063454627991, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.9912408759124087, |
| "grad_norm": 5.5, |
| "learning_rate": 5.003214277839851e-07, |
| "loss": 1.3855026960372925, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.994160583941606, |
| "grad_norm": 5.6875, |
| "learning_rate": 5.00163995679862e-07, |
| "loss": 1.346792459487915, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.997080291970803, |
| "grad_norm": 8.1875, |
| "learning_rate": 5.000590389037593e-07, |
| "loss": 1.3148702383041382, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 4.0625, |
| "learning_rate": 5.00006559903696e-07, |
| "loss": 1.6425683498382568, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 1370, |
| "total_flos": 1.984544544032555e+18, |
| "train_loss": 1.409229011779284, |
| "train_runtime": 8212.4061, |
| "train_samples_per_second": 2.669, |
| "train_steps_per_second": 0.167 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1370, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.984544544032555e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|