| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 19.54397394136808, |
| "eval_steps": 500, |
| "global_step": 6000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03257328990228013, |
| "grad_norm": 0.646706223487854, |
| "learning_rate": 9e-07, |
| "loss": 1.1814, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06514657980456026, |
| "grad_norm": 0.4603618383407593, |
| "learning_rate": 1.9e-06, |
| "loss": 1.1738, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09771986970684039, |
| "grad_norm": 0.4963187277317047, |
| "learning_rate": 2.9e-06, |
| "loss": 1.1665, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13029315960912052, |
| "grad_norm": 0.4428938329219818, |
| "learning_rate": 3.9e-06, |
| "loss": 1.1512, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16286644951140064, |
| "grad_norm": 0.36607691645622253, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.1333, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.19543973941368079, |
| "grad_norm": 0.30768465995788574, |
| "learning_rate": 5.9e-06, |
| "loss": 1.1202, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2280130293159609, |
| "grad_norm": 0.23890309035778046, |
| "learning_rate": 6.900000000000001e-06, |
| "loss": 1.1081, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.26058631921824105, |
| "grad_norm": 0.25988394021987915, |
| "learning_rate": 7.9e-06, |
| "loss": 1.0964, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2931596091205212, |
| "grad_norm": 0.2948506772518158, |
| "learning_rate": 8.9e-06, |
| "loss": 1.0804, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3257328990228013, |
| "grad_norm": 0.29228347539901733, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 1.0646, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3583061889250814, |
| "grad_norm": 0.24975909292697906, |
| "learning_rate": 1.09e-05, |
| "loss": 1.0531, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.39087947882736157, |
| "grad_norm": 0.30400097370147705, |
| "learning_rate": 1.19e-05, |
| "loss": 1.0387, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4234527687296417, |
| "grad_norm": 0.2392139881849289, |
| "learning_rate": 1.29e-05, |
| "loss": 1.0307, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4560260586319218, |
| "grad_norm": 0.2712036371231079, |
| "learning_rate": 1.3900000000000002e-05, |
| "loss": 1.0198, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.48859934853420195, |
| "grad_norm": 0.23245830833911896, |
| "learning_rate": 1.49e-05, |
| "loss": 1.0154, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5211726384364821, |
| "grad_norm": 0.2727172374725342, |
| "learning_rate": 1.59e-05, |
| "loss": 1.0088, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5537459283387622, |
| "grad_norm": 0.30211466550827026, |
| "learning_rate": 1.69e-05, |
| "loss": 0.9998, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5863192182410424, |
| "grad_norm": 0.36023062467575073, |
| "learning_rate": 1.79e-05, |
| "loss": 0.9862, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6188925081433225, |
| "grad_norm": 0.6321305632591248, |
| "learning_rate": 1.8900000000000002e-05, |
| "loss": 0.9607, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6514657980456026, |
| "grad_norm": 0.7605375647544861, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.9141, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6840390879478827, |
| "grad_norm": 1.6783995628356934, |
| "learning_rate": 2.09e-05, |
| "loss": 0.8218, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7166123778501629, |
| "grad_norm": 1.3917943239212036, |
| "learning_rate": 2.19e-05, |
| "loss": 0.726, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.749185667752443, |
| "grad_norm": 1.6959741115570068, |
| "learning_rate": 2.29e-05, |
| "loss": 0.6331, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7817589576547231, |
| "grad_norm": 1.6433072090148926, |
| "learning_rate": 2.39e-05, |
| "loss": 0.5575, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8143322475570033, |
| "grad_norm": 2.0065226554870605, |
| "learning_rate": 2.4900000000000002e-05, |
| "loss": 0.4904, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8469055374592834, |
| "grad_norm": 1.7093048095703125, |
| "learning_rate": 2.5900000000000003e-05, |
| "loss": 0.4331, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8794788273615635, |
| "grad_norm": 2.0112454891204834, |
| "learning_rate": 2.6900000000000003e-05, |
| "loss": 0.385, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9120521172638436, |
| "grad_norm": 2.178093910217285, |
| "learning_rate": 2.7900000000000004e-05, |
| "loss": 0.3391, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9446254071661238, |
| "grad_norm": 2.12821888923645, |
| "learning_rate": 2.8899999999999998e-05, |
| "loss": 0.3148, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9771986970684039, |
| "grad_norm": 2.353641986846924, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 0.2889, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.009771986970684, |
| "grad_norm": 2.5022237300872803, |
| "learning_rate": 3.09e-05, |
| "loss": 0.2709, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.0423452768729642, |
| "grad_norm": 2.0082218647003174, |
| "learning_rate": 3.19e-05, |
| "loss": 0.2614, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.0749185667752443, |
| "grad_norm": 2.1790828704833984, |
| "learning_rate": 3.29e-05, |
| "loss": 0.2539, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.1074918566775245, |
| "grad_norm": 2.4001519680023193, |
| "learning_rate": 3.3900000000000004e-05, |
| "loss": 0.2386, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.1400651465798046, |
| "grad_norm": 2.5760984420776367, |
| "learning_rate": 3.49e-05, |
| "loss": 0.2327, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.1726384364820848, |
| "grad_norm": 2.5301952362060547, |
| "learning_rate": 3.59e-05, |
| "loss": 0.2292, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.205211726384365, |
| "grad_norm": 2.474808692932129, |
| "learning_rate": 3.69e-05, |
| "loss": 0.2293, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.237785016286645, |
| "grad_norm": 2.524728298187256, |
| "learning_rate": 3.79e-05, |
| "loss": 0.2253, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.2703583061889252, |
| "grad_norm": 2.349524974822998, |
| "learning_rate": 3.8900000000000004e-05, |
| "loss": 0.2259, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.3029315960912053, |
| "grad_norm": 1.8449956178665161, |
| "learning_rate": 3.99e-05, |
| "loss": 0.2201, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.3355048859934853, |
| "grad_norm": 2.82806134223938, |
| "learning_rate": 4.09e-05, |
| "loss": 0.2162, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.3680781758957654, |
| "grad_norm": 2.695847988128662, |
| "learning_rate": 4.19e-05, |
| "loss": 0.2181, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.4006514657980456, |
| "grad_norm": 2.953904628753662, |
| "learning_rate": 4.29e-05, |
| "loss": 0.2181, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.4332247557003257, |
| "grad_norm": 2.7019174098968506, |
| "learning_rate": 4.39e-05, |
| "loss": 0.2166, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.4657980456026058, |
| "grad_norm": 2.1432275772094727, |
| "learning_rate": 4.49e-05, |
| "loss": 0.2164, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.498371335504886, |
| "grad_norm": 3.0038726329803467, |
| "learning_rate": 4.5900000000000004e-05, |
| "loss": 0.2136, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.5309446254071661, |
| "grad_norm": 3.1258187294006348, |
| "learning_rate": 4.69e-05, |
| "loss": 0.2132, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.5635179153094463, |
| "grad_norm": 2.207747459411621, |
| "learning_rate": 4.79e-05, |
| "loss": 0.2112, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.5960912052117264, |
| "grad_norm": 2.581730604171753, |
| "learning_rate": 4.89e-05, |
| "loss": 0.2099, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.6286644951140063, |
| "grad_norm": 3.099320411682129, |
| "learning_rate": 4.99e-05, |
| "loss": 0.2097, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.6612377850162865, |
| "grad_norm": 2.6373846530914307, |
| "learning_rate": 5.0900000000000004e-05, |
| "loss": 0.2079, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.6938110749185666, |
| "grad_norm": 2.9202771186828613, |
| "learning_rate": 5.19e-05, |
| "loss": 0.2043, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.7263843648208468, |
| "grad_norm": 2.989102363586426, |
| "learning_rate": 5.2900000000000005e-05, |
| "loss": 0.2033, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.758957654723127, |
| "grad_norm": 2.375894784927368, |
| "learning_rate": 5.390000000000001e-05, |
| "loss": 0.2059, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.791530944625407, |
| "grad_norm": 2.6863505840301514, |
| "learning_rate": 5.4900000000000006e-05, |
| "loss": 0.2032, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.8241042345276872, |
| "grad_norm": 2.326268434524536, |
| "learning_rate": 5.590000000000001e-05, |
| "loss": 0.2012, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.8566775244299674, |
| "grad_norm": 2.779634952545166, |
| "learning_rate": 5.69e-05, |
| "loss": 0.2027, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.8892508143322475, |
| "grad_norm": 2.6916491985321045, |
| "learning_rate": 5.79e-05, |
| "loss": 0.1983, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.9218241042345277, |
| "grad_norm": 2.6536519527435303, |
| "learning_rate": 5.89e-05, |
| "loss": 0.2007, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.9543973941368078, |
| "grad_norm": 2.6414382457733154, |
| "learning_rate": 5.99e-05, |
| "loss": 0.1943, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.986970684039088, |
| "grad_norm": 3.05230712890625, |
| "learning_rate": 6.09e-05, |
| "loss": 0.1898, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.019543973941368, |
| "grad_norm": 3.180234909057617, |
| "learning_rate": 6.19e-05, |
| "loss": 0.1873, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.0521172638436482, |
| "grad_norm": 2.8422200679779053, |
| "learning_rate": 6.29e-05, |
| "loss": 0.1811, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.0846905537459284, |
| "grad_norm": 3.6337952613830566, |
| "learning_rate": 6.390000000000001e-05, |
| "loss": 0.182, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.1172638436482085, |
| "grad_norm": 5.311783313751221, |
| "learning_rate": 6.49e-05, |
| "loss": 0.1789, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.1498371335504887, |
| "grad_norm": 2.879094123840332, |
| "learning_rate": 6.59e-05, |
| "loss": 0.1757, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.182410423452769, |
| "grad_norm": 2.7306885719299316, |
| "learning_rate": 6.690000000000001e-05, |
| "loss": 0.1756, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.214983713355049, |
| "grad_norm": 2.2274434566497803, |
| "learning_rate": 6.790000000000001e-05, |
| "loss": 0.1696, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.247557003257329, |
| "grad_norm": 2.76662540435791, |
| "learning_rate": 6.89e-05, |
| "loss": 0.1633, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.2801302931596092, |
| "grad_norm": 2.6583666801452637, |
| "learning_rate": 6.99e-05, |
| "loss": 0.1603, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.3127035830618894, |
| "grad_norm": 2.7752110958099365, |
| "learning_rate": 7.09e-05, |
| "loss": 0.1555, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.3452768729641695, |
| "grad_norm": 2.919973611831665, |
| "learning_rate": 7.19e-05, |
| "loss": 0.1509, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.3778501628664497, |
| "grad_norm": 2.369131565093994, |
| "learning_rate": 7.29e-05, |
| "loss": 0.1453, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.41042345276873, |
| "grad_norm": 6.763181686401367, |
| "learning_rate": 7.390000000000001e-05, |
| "loss": 0.1469, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.44299674267101, |
| "grad_norm": 5.001865386962891, |
| "learning_rate": 7.49e-05, |
| "loss": 0.1568, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.47557003257329, |
| "grad_norm": 3.4782662391662598, |
| "learning_rate": 7.59e-05, |
| "loss": 0.1372, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.5081433224755703, |
| "grad_norm": 5.221865177154541, |
| "learning_rate": 7.69e-05, |
| "loss": 0.1373, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.5407166123778504, |
| "grad_norm": 3.3979134559631348, |
| "learning_rate": 7.790000000000001e-05, |
| "loss": 0.131, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.5732899022801305, |
| "grad_norm": 3.932870626449585, |
| "learning_rate": 7.890000000000001e-05, |
| "loss": 0.1176, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.6058631921824107, |
| "grad_norm": 4.280229091644287, |
| "learning_rate": 7.99e-05, |
| "loss": 0.1133, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.6384364820846904, |
| "grad_norm": 2.9509708881378174, |
| "learning_rate": 8.090000000000001e-05, |
| "loss": 0.1046, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.6710097719869705, |
| "grad_norm": 2.813812255859375, |
| "learning_rate": 8.19e-05, |
| "loss": 0.0959, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.7035830618892507, |
| "grad_norm": 3.498904228210449, |
| "learning_rate": 8.29e-05, |
| "loss": 0.0842, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.736156351791531, |
| "grad_norm": 2.793267250061035, |
| "learning_rate": 8.39e-05, |
| "loss": 0.0821, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.768729641693811, |
| "grad_norm": 3.1398348808288574, |
| "learning_rate": 8.49e-05, |
| "loss": 0.0783, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.801302931596091, |
| "grad_norm": 3.613935947418213, |
| "learning_rate": 8.59e-05, |
| "loss": 0.0679, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.8338762214983713, |
| "grad_norm": 4.147997856140137, |
| "learning_rate": 8.69e-05, |
| "loss": 0.0601, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.8664495114006514, |
| "grad_norm": 4.906704425811768, |
| "learning_rate": 8.790000000000001e-05, |
| "loss": 0.0589, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.8990228013029316, |
| "grad_norm": 4.920569896697998, |
| "learning_rate": 8.89e-05, |
| "loss": 0.0602, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.9315960912052117, |
| "grad_norm": 3.0054733753204346, |
| "learning_rate": 8.99e-05, |
| "loss": 0.0567, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.964169381107492, |
| "grad_norm": 3.0714895725250244, |
| "learning_rate": 9.090000000000001e-05, |
| "loss": 0.0478, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.996742671009772, |
| "grad_norm": 2.0885047912597656, |
| "learning_rate": 9.190000000000001e-05, |
| "loss": 0.0417, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.029315960912052, |
| "grad_norm": 3.2938196659088135, |
| "learning_rate": 9.290000000000001e-05, |
| "loss": 0.0504, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.0618892508143323, |
| "grad_norm": 3.084390640258789, |
| "learning_rate": 9.39e-05, |
| "loss": 0.0482, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.0944625407166124, |
| "grad_norm": 3.5595154762268066, |
| "learning_rate": 9.49e-05, |
| "loss": 0.0464, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.1270358306188926, |
| "grad_norm": 3.5651934146881104, |
| "learning_rate": 9.59e-05, |
| "loss": 0.0461, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.1596091205211727, |
| "grad_norm": 2.7966909408569336, |
| "learning_rate": 9.69e-05, |
| "loss": 0.0455, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.192182410423453, |
| "grad_norm": 2.4500396251678467, |
| "learning_rate": 9.790000000000001e-05, |
| "loss": 0.0395, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.224755700325733, |
| "grad_norm": 3.2736151218414307, |
| "learning_rate": 9.89e-05, |
| "loss": 0.0403, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.257328990228013, |
| "grad_norm": 2.5784664154052734, |
| "learning_rate": 9.99e-05, |
| "loss": 0.0382, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.2899022801302933, |
| "grad_norm": 2.018731117248535, |
| "learning_rate": 9.999994463727085e-05, |
| "loss": 0.0376, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.3224755700325734, |
| "grad_norm": 2.5587472915649414, |
| "learning_rate": 9.999975326009292e-05, |
| "loss": 0.0385, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.3550488599348536, |
| "grad_norm": 2.2966997623443604, |
| "learning_rate": 9.999942518549879e-05, |
| "loss": 0.0364, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.3876221498371337, |
| "grad_norm": 2.336106061935425, |
| "learning_rate": 9.999896041438544e-05, |
| "loss": 0.036, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.420195439739414, |
| "grad_norm": 3.2040939331054688, |
| "learning_rate": 9.999835894802353e-05, |
| "loss": 0.0393, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.4527687296416936, |
| "grad_norm": 2.786553382873535, |
| "learning_rate": 9.999762078805743e-05, |
| "loss": 0.0345, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.4853420195439737, |
| "grad_norm": 1.6308751106262207, |
| "learning_rate": 9.999674593650526e-05, |
| "loss": 0.037, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.517915309446254, |
| "grad_norm": 1.8265235424041748, |
| "learning_rate": 9.99957343957588e-05, |
| "loss": 0.0344, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.550488599348534, |
| "grad_norm": 2.4011759757995605, |
| "learning_rate": 9.99945861685836e-05, |
| "loss": 0.0304, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.583061889250814, |
| "grad_norm": 2.0200209617614746, |
| "learning_rate": 9.999330125811884e-05, |
| "loss": 0.0328, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.6156351791530943, |
| "grad_norm": 2.044693946838379, |
| "learning_rate": 9.999187966787744e-05, |
| "loss": 0.0355, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.6482084690553744, |
| "grad_norm": 2.1637792587280273, |
| "learning_rate": 9.999032140174595e-05, |
| "loss": 0.0342, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.6807817589576546, |
| "grad_norm": 1.9781519174575806, |
| "learning_rate": 9.998862646398464e-05, |
| "loss": 0.0349, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.7133550488599347, |
| "grad_norm": 2.601254940032959, |
| "learning_rate": 9.998679485922739e-05, |
| "loss": 0.0311, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.745928338762215, |
| "grad_norm": 1.977401614189148, |
| "learning_rate": 9.998482659248174e-05, |
| "loss": 0.0317, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.778501628664495, |
| "grad_norm": 1.9435837268829346, |
| "learning_rate": 9.998272166912883e-05, |
| "loss": 0.0325, |
| "step": 1160 |
| }, |
| { |
| "epoch": 3.811074918566775, |
| "grad_norm": 2.054821014404297, |
| "learning_rate": 9.998048009492347e-05, |
| "loss": 0.0352, |
| "step": 1170 |
| }, |
| { |
| "epoch": 3.8436482084690553, |
| "grad_norm": 1.950892448425293, |
| "learning_rate": 9.997810187599403e-05, |
| "loss": 0.0325, |
| "step": 1180 |
| }, |
| { |
| "epoch": 3.8762214983713354, |
| "grad_norm": 2.4616150856018066, |
| "learning_rate": 9.997558701884249e-05, |
| "loss": 0.0316, |
| "step": 1190 |
| }, |
| { |
| "epoch": 3.9087947882736156, |
| "grad_norm": 1.8379050493240356, |
| "learning_rate": 9.997293553034433e-05, |
| "loss": 0.0307, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.9413680781758957, |
| "grad_norm": 1.9157487154006958, |
| "learning_rate": 9.997014741774866e-05, |
| "loss": 0.0319, |
| "step": 1210 |
| }, |
| { |
| "epoch": 3.973941368078176, |
| "grad_norm": 1.8840038776397705, |
| "learning_rate": 9.996722268867803e-05, |
| "loss": 0.0315, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.006514657980456, |
| "grad_norm": 1.825744867324829, |
| "learning_rate": 9.996416135112858e-05, |
| "loss": 0.0297, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.039087947882736, |
| "grad_norm": 1.5622966289520264, |
| "learning_rate": 9.996096341346988e-05, |
| "loss": 0.0291, |
| "step": 1240 |
| }, |
| { |
| "epoch": 4.071661237785016, |
| "grad_norm": 1.652907371520996, |
| "learning_rate": 9.995762888444495e-05, |
| "loss": 0.0298, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.1042345276872965, |
| "grad_norm": 2.1362810134887695, |
| "learning_rate": 9.995415777317027e-05, |
| "loss": 0.0304, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.136807817589577, |
| "grad_norm": 1.7935142517089844, |
| "learning_rate": 9.995055008913574e-05, |
| "loss": 0.028, |
| "step": 1270 |
| }, |
| { |
| "epoch": 4.169381107491857, |
| "grad_norm": 2.094823122024536, |
| "learning_rate": 9.994680584220463e-05, |
| "loss": 0.0294, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.201954397394137, |
| "grad_norm": 1.5280042886734009, |
| "learning_rate": 9.994292504261355e-05, |
| "loss": 0.0289, |
| "step": 1290 |
| }, |
| { |
| "epoch": 4.234527687296417, |
| "grad_norm": 1.7017203569412231, |
| "learning_rate": 9.993890770097247e-05, |
| "loss": 0.0267, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.267100977198697, |
| "grad_norm": 1.8569612503051758, |
| "learning_rate": 9.993475382826467e-05, |
| "loss": 0.0282, |
| "step": 1310 |
| }, |
| { |
| "epoch": 4.299674267100977, |
| "grad_norm": 1.792932152748108, |
| "learning_rate": 9.993046343584664e-05, |
| "loss": 0.0278, |
| "step": 1320 |
| }, |
| { |
| "epoch": 4.3322475570032575, |
| "grad_norm": 1.6156980991363525, |
| "learning_rate": 9.992603653544816e-05, |
| "loss": 0.0276, |
| "step": 1330 |
| }, |
| { |
| "epoch": 4.364820846905538, |
| "grad_norm": 1.6737850904464722, |
| "learning_rate": 9.992147313917222e-05, |
| "loss": 0.0276, |
| "step": 1340 |
| }, |
| { |
| "epoch": 4.397394136807818, |
| "grad_norm": 1.6165825128555298, |
| "learning_rate": 9.991677325949497e-05, |
| "loss": 0.0277, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.429967426710098, |
| "grad_norm": 1.5214238166809082, |
| "learning_rate": 9.991193690926568e-05, |
| "loss": 0.0258, |
| "step": 1360 |
| }, |
| { |
| "epoch": 4.462540716612378, |
| "grad_norm": 1.648809790611267, |
| "learning_rate": 9.990696410170678e-05, |
| "loss": 0.0274, |
| "step": 1370 |
| }, |
| { |
| "epoch": 4.495114006514658, |
| "grad_norm": 1.5280410051345825, |
| "learning_rate": 9.990185485041371e-05, |
| "loss": 0.0247, |
| "step": 1380 |
| }, |
| { |
| "epoch": 4.527687296416938, |
| "grad_norm": 1.4952828884124756, |
| "learning_rate": 9.989660916935498e-05, |
| "loss": 0.0251, |
| "step": 1390 |
| }, |
| { |
| "epoch": 4.5602605863192185, |
| "grad_norm": 1.3932770490646362, |
| "learning_rate": 9.989122707287208e-05, |
| "loss": 0.0256, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.592833876221499, |
| "grad_norm": 1.6718034744262695, |
| "learning_rate": 9.988570857567945e-05, |
| "loss": 0.0272, |
| "step": 1410 |
| }, |
| { |
| "epoch": 4.625407166123779, |
| "grad_norm": 1.5743407011032104, |
| "learning_rate": 9.988005369286446e-05, |
| "loss": 0.0261, |
| "step": 1420 |
| }, |
| { |
| "epoch": 4.657980456026059, |
| "grad_norm": 1.6346157789230347, |
| "learning_rate": 9.987426243988734e-05, |
| "loss": 0.0276, |
| "step": 1430 |
| }, |
| { |
| "epoch": 4.690553745928339, |
| "grad_norm": 1.754012942314148, |
| "learning_rate": 9.986833483258114e-05, |
| "loss": 0.026, |
| "step": 1440 |
| }, |
| { |
| "epoch": 4.723127035830619, |
| "grad_norm": 1.5971155166625977, |
| "learning_rate": 9.986227088715173e-05, |
| "loss": 0.0258, |
| "step": 1450 |
| }, |
| { |
| "epoch": 4.755700325732899, |
| "grad_norm": 1.5895884037017822, |
| "learning_rate": 9.98560706201777e-05, |
| "loss": 0.0255, |
| "step": 1460 |
| }, |
| { |
| "epoch": 4.7882736156351795, |
| "grad_norm": 1.799141764640808, |
| "learning_rate": 9.984973404861036e-05, |
| "loss": 0.028, |
| "step": 1470 |
| }, |
| { |
| "epoch": 4.82084690553746, |
| "grad_norm": 1.5767462253570557, |
| "learning_rate": 9.984326118977361e-05, |
| "loss": 0.0277, |
| "step": 1480 |
| }, |
| { |
| "epoch": 4.85342019543974, |
| "grad_norm": 1.547109842300415, |
| "learning_rate": 9.983665206136406e-05, |
| "loss": 0.0239, |
| "step": 1490 |
| }, |
| { |
| "epoch": 4.88599348534202, |
| "grad_norm": 1.6011266708374023, |
| "learning_rate": 9.982990668145075e-05, |
| "loss": 0.023, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.918566775244299, |
| "grad_norm": 1.5277559757232666, |
| "learning_rate": 9.982302506847534e-05, |
| "loss": 0.0232, |
| "step": 1510 |
| }, |
| { |
| "epoch": 4.95114006514658, |
| "grad_norm": 1.3489938974380493, |
| "learning_rate": 9.981600724125189e-05, |
| "loss": 0.0231, |
| "step": 1520 |
| }, |
| { |
| "epoch": 4.9837133550488595, |
| "grad_norm": 1.2097440958023071, |
| "learning_rate": 9.980885321896685e-05, |
| "loss": 0.0226, |
| "step": 1530 |
| }, |
| { |
| "epoch": 5.01628664495114, |
| "grad_norm": 1.5271075963974, |
| "learning_rate": 9.980156302117905e-05, |
| "loss": 0.0217, |
| "step": 1540 |
| }, |
| { |
| "epoch": 5.04885993485342, |
| "grad_norm": 1.4403456449508667, |
| "learning_rate": 9.979413666781963e-05, |
| "loss": 0.0259, |
| "step": 1550 |
| }, |
| { |
| "epoch": 5.0814332247557, |
| "grad_norm": 1.3812097311019897, |
| "learning_rate": 9.978657417919193e-05, |
| "loss": 0.023, |
| "step": 1560 |
| }, |
| { |
| "epoch": 5.11400651465798, |
| "grad_norm": 1.5333428382873535, |
| "learning_rate": 9.977887557597153e-05, |
| "loss": 0.0242, |
| "step": 1570 |
| }, |
| { |
| "epoch": 5.14657980456026, |
| "grad_norm": 1.1747639179229736, |
| "learning_rate": 9.97710408792061e-05, |
| "loss": 0.0232, |
| "step": 1580 |
| }, |
| { |
| "epoch": 5.17915309446254, |
| "grad_norm": 1.4856892824172974, |
| "learning_rate": 9.976307011031542e-05, |
| "loss": 0.0207, |
| "step": 1590 |
| }, |
| { |
| "epoch": 5.2117263843648205, |
| "grad_norm": 1.1282317638397217, |
| "learning_rate": 9.975496329109126e-05, |
| "loss": 0.0233, |
| "step": 1600 |
| }, |
| { |
| "epoch": 5.244299674267101, |
| "grad_norm": 1.4065935611724854, |
| "learning_rate": 9.974672044369732e-05, |
| "loss": 0.0218, |
| "step": 1610 |
| }, |
| { |
| "epoch": 5.276872964169381, |
| "grad_norm": 1.1849043369293213, |
| "learning_rate": 9.97383415906693e-05, |
| "loss": 0.0221, |
| "step": 1620 |
| }, |
| { |
| "epoch": 5.309446254071661, |
| "grad_norm": 1.325810194015503, |
| "learning_rate": 9.97298267549146e-05, |
| "loss": 0.0213, |
| "step": 1630 |
| }, |
| { |
| "epoch": 5.342019543973941, |
| "grad_norm": 1.113312840461731, |
| "learning_rate": 9.972117595971249e-05, |
| "loss": 0.0228, |
| "step": 1640 |
| }, |
| { |
| "epoch": 5.374592833876221, |
| "grad_norm": 1.230494499206543, |
| "learning_rate": 9.971238922871391e-05, |
| "loss": 0.0253, |
| "step": 1650 |
| }, |
| { |
| "epoch": 5.407166123778501, |
| "grad_norm": 1.4597903490066528, |
| "learning_rate": 9.970346658594142e-05, |
| "loss": 0.0224, |
| "step": 1660 |
| }, |
| { |
| "epoch": 5.4397394136807815, |
| "grad_norm": 1.2914201021194458, |
| "learning_rate": 9.969440805578923e-05, |
| "loss": 0.0224, |
| "step": 1670 |
| }, |
| { |
| "epoch": 5.472312703583062, |
| "grad_norm": 1.2426233291625977, |
| "learning_rate": 9.968521366302298e-05, |
| "loss": 0.0233, |
| "step": 1680 |
| }, |
| { |
| "epoch": 5.504885993485342, |
| "grad_norm": 1.2101874351501465, |
| "learning_rate": 9.967588343277981e-05, |
| "loss": 0.0233, |
| "step": 1690 |
| }, |
| { |
| "epoch": 5.537459283387622, |
| "grad_norm": 1.576130747795105, |
| "learning_rate": 9.966641739056818e-05, |
| "loss": 0.0204, |
| "step": 1700 |
| }, |
| { |
| "epoch": 5.570032573289902, |
| "grad_norm": 1.4965754747390747, |
| "learning_rate": 9.965681556226793e-05, |
| "loss": 0.021, |
| "step": 1710 |
| }, |
| { |
| "epoch": 5.602605863192182, |
| "grad_norm": 1.2106349468231201, |
| "learning_rate": 9.964707797413006e-05, |
| "loss": 0.0217, |
| "step": 1720 |
| }, |
| { |
| "epoch": 5.635179153094462, |
| "grad_norm": 1.2536280155181885, |
| "learning_rate": 9.963720465277679e-05, |
| "loss": 0.0229, |
| "step": 1730 |
| }, |
| { |
| "epoch": 5.6677524429967425, |
| "grad_norm": 1.2137837409973145, |
| "learning_rate": 9.96271956252014e-05, |
| "loss": 0.0227, |
| "step": 1740 |
| }, |
| { |
| "epoch": 5.700325732899023, |
| "grad_norm": 1.4352983236312866, |
| "learning_rate": 9.961705091876816e-05, |
| "loss": 0.0217, |
| "step": 1750 |
| }, |
| { |
| "epoch": 5.732899022801303, |
| "grad_norm": 1.0887129306793213, |
| "learning_rate": 9.960677056121235e-05, |
| "loss": 0.02, |
| "step": 1760 |
| }, |
| { |
| "epoch": 5.765472312703583, |
| "grad_norm": 1.2834426164627075, |
| "learning_rate": 9.959635458064005e-05, |
| "loss": 0.0232, |
| "step": 1770 |
| }, |
| { |
| "epoch": 5.798045602605863, |
| "grad_norm": 1.3449937105178833, |
| "learning_rate": 9.958580300552815e-05, |
| "loss": 0.0205, |
| "step": 1780 |
| }, |
| { |
| "epoch": 5.830618892508143, |
| "grad_norm": 1.375016689300537, |
| "learning_rate": 9.957511586472426e-05, |
| "loss": 0.0231, |
| "step": 1790 |
| }, |
| { |
| "epoch": 5.863192182410423, |
| "grad_norm": 1.2283987998962402, |
| "learning_rate": 9.956429318744662e-05, |
| "loss": 0.0229, |
| "step": 1800 |
| }, |
| { |
| "epoch": 5.8957654723127035, |
| "grad_norm": 1.226679801940918, |
| "learning_rate": 9.955333500328404e-05, |
| "loss": 0.0201, |
| "step": 1810 |
| }, |
| { |
| "epoch": 5.928338762214984, |
| "grad_norm": 1.6469484567642212, |
| "learning_rate": 9.95422413421957e-05, |
| "loss": 0.0211, |
| "step": 1820 |
| }, |
| { |
| "epoch": 5.960912052117264, |
| "grad_norm": 1.1678240299224854, |
| "learning_rate": 9.953101223451133e-05, |
| "loss": 0.0214, |
| "step": 1830 |
| }, |
| { |
| "epoch": 5.993485342019544, |
| "grad_norm": 1.2131609916687012, |
| "learning_rate": 9.951964771093085e-05, |
| "loss": 0.0202, |
| "step": 1840 |
| }, |
| { |
| "epoch": 6.026058631921824, |
| "grad_norm": 1.3158583641052246, |
| "learning_rate": 9.950814780252442e-05, |
| "loss": 0.0204, |
| "step": 1850 |
| }, |
| { |
| "epoch": 6.058631921824104, |
| "grad_norm": 1.2186533212661743, |
| "learning_rate": 9.949651254073236e-05, |
| "loss": 0.0192, |
| "step": 1860 |
| }, |
| { |
| "epoch": 6.091205211726384, |
| "grad_norm": 1.2744988203048706, |
| "learning_rate": 9.948474195736504e-05, |
| "loss": 0.0209, |
| "step": 1870 |
| }, |
| { |
| "epoch": 6.1237785016286646, |
| "grad_norm": 1.1233289241790771, |
| "learning_rate": 9.947283608460277e-05, |
| "loss": 0.02, |
| "step": 1880 |
| }, |
| { |
| "epoch": 6.156351791530945, |
| "grad_norm": 1.092411994934082, |
| "learning_rate": 9.946079495499577e-05, |
| "loss": 0.0202, |
| "step": 1890 |
| }, |
| { |
| "epoch": 6.188925081433225, |
| "grad_norm": 1.3971725702285767, |
| "learning_rate": 9.944861860146401e-05, |
| "loss": 0.021, |
| "step": 1900 |
| }, |
| { |
| "epoch": 6.221498371335505, |
| "grad_norm": 1.7205326557159424, |
| "learning_rate": 9.943630705729719e-05, |
| "loss": 0.0205, |
| "step": 1910 |
| }, |
| { |
| "epoch": 6.254071661237785, |
| "grad_norm": 1.2655634880065918, |
| "learning_rate": 9.942386035615459e-05, |
| "loss": 0.0222, |
| "step": 1920 |
| }, |
| { |
| "epoch": 6.286644951140065, |
| "grad_norm": 0.9892485737800598, |
| "learning_rate": 9.941127853206503e-05, |
| "loss": 0.0191, |
| "step": 1930 |
| }, |
| { |
| "epoch": 6.319218241042345, |
| "grad_norm": 1.2320622205734253, |
| "learning_rate": 9.939856161942673e-05, |
| "loss": 0.0196, |
| "step": 1940 |
| }, |
| { |
| "epoch": 6.351791530944626, |
| "grad_norm": 1.315510630607605, |
| "learning_rate": 9.938570965300724e-05, |
| "loss": 0.0184, |
| "step": 1950 |
| }, |
| { |
| "epoch": 6.384364820846906, |
| "grad_norm": 0.9320159554481506, |
| "learning_rate": 9.937272266794335e-05, |
| "loss": 0.0231, |
| "step": 1960 |
| }, |
| { |
| "epoch": 6.416938110749186, |
| "grad_norm": 0.9621773958206177, |
| "learning_rate": 9.935960069974096e-05, |
| "loss": 0.0178, |
| "step": 1970 |
| }, |
| { |
| "epoch": 6.449511400651466, |
| "grad_norm": 1.2875672578811646, |
| "learning_rate": 9.934634378427506e-05, |
| "loss": 0.0204, |
| "step": 1980 |
| }, |
| { |
| "epoch": 6.482084690553746, |
| "grad_norm": 1.1186437606811523, |
| "learning_rate": 9.933295195778954e-05, |
| "loss": 0.0211, |
| "step": 1990 |
| }, |
| { |
| "epoch": 6.514657980456026, |
| "grad_norm": 1.0017237663269043, |
| "learning_rate": 9.931942525689715e-05, |
| "loss": 0.023, |
| "step": 2000 |
| }, |
| { |
| "epoch": 6.547231270358306, |
| "grad_norm": 0.9961879253387451, |
| "learning_rate": 9.930576371857936e-05, |
| "loss": 0.0205, |
| "step": 2010 |
| }, |
| { |
| "epoch": 6.579804560260587, |
| "grad_norm": 1.1414068937301636, |
| "learning_rate": 9.929196738018629e-05, |
| "loss": 0.0182, |
| "step": 2020 |
| }, |
| { |
| "epoch": 6.612377850162867, |
| "grad_norm": 1.139420509338379, |
| "learning_rate": 9.927803627943662e-05, |
| "loss": 0.0175, |
| "step": 2030 |
| }, |
| { |
| "epoch": 6.644951140065147, |
| "grad_norm": 1.1123589277267456, |
| "learning_rate": 9.926397045441744e-05, |
| "loss": 0.0195, |
| "step": 2040 |
| }, |
| { |
| "epoch": 6.677524429967427, |
| "grad_norm": 1.2396354675292969, |
| "learning_rate": 9.924976994358417e-05, |
| "loss": 0.0199, |
| "step": 2050 |
| }, |
| { |
| "epoch": 6.710097719869707, |
| "grad_norm": 1.2193408012390137, |
| "learning_rate": 9.923543478576048e-05, |
| "loss": 0.0197, |
| "step": 2060 |
| }, |
| { |
| "epoch": 6.742671009771987, |
| "grad_norm": 1.1543775796890259, |
| "learning_rate": 9.922096502013813e-05, |
| "loss": 0.0192, |
| "step": 2070 |
| }, |
| { |
| "epoch": 6.7752442996742674, |
| "grad_norm": 1.1310466527938843, |
| "learning_rate": 9.92063606862769e-05, |
| "loss": 0.0196, |
| "step": 2080 |
| }, |
| { |
| "epoch": 6.807817589576548, |
| "grad_norm": 1.2145164012908936, |
| "learning_rate": 9.919162182410453e-05, |
| "loss": 0.0196, |
| "step": 2090 |
| }, |
| { |
| "epoch": 6.840390879478828, |
| "grad_norm": 1.0499675273895264, |
| "learning_rate": 9.917674847391645e-05, |
| "loss": 0.0179, |
| "step": 2100 |
| }, |
| { |
| "epoch": 6.872964169381108, |
| "grad_norm": 0.8699261546134949, |
| "learning_rate": 9.916174067637584e-05, |
| "loss": 0.0168, |
| "step": 2110 |
| }, |
| { |
| "epoch": 6.905537459283387, |
| "grad_norm": 0.8845049142837524, |
| "learning_rate": 9.914659847251348e-05, |
| "loss": 0.0163, |
| "step": 2120 |
| }, |
| { |
| "epoch": 6.938110749185668, |
| "grad_norm": 0.8768344521522522, |
| "learning_rate": 9.913132190372753e-05, |
| "loss": 0.0189, |
| "step": 2130 |
| }, |
| { |
| "epoch": 6.970684039087947, |
| "grad_norm": 1.0072346925735474, |
| "learning_rate": 9.911591101178359e-05, |
| "loss": 0.017, |
| "step": 2140 |
| }, |
| { |
| "epoch": 7.003257328990228, |
| "grad_norm": 0.9805561304092407, |
| "learning_rate": 9.910036583881443e-05, |
| "loss": 0.0168, |
| "step": 2150 |
| }, |
| { |
| "epoch": 7.035830618892508, |
| "grad_norm": 0.9882418513298035, |
| "learning_rate": 9.908468642731995e-05, |
| "loss": 0.019, |
| "step": 2160 |
| }, |
| { |
| "epoch": 7.068403908794788, |
| "grad_norm": 1.0311851501464844, |
| "learning_rate": 9.906887282016707e-05, |
| "loss": 0.018, |
| "step": 2170 |
| }, |
| { |
| "epoch": 7.100977198697068, |
| "grad_norm": 1.3635246753692627, |
| "learning_rate": 9.90529250605896e-05, |
| "loss": 0.019, |
| "step": 2180 |
| }, |
| { |
| "epoch": 7.133550488599348, |
| "grad_norm": 0.94489586353302, |
| "learning_rate": 9.903684319218809e-05, |
| "loss": 0.0186, |
| "step": 2190 |
| }, |
| { |
| "epoch": 7.166123778501628, |
| "grad_norm": 1.1365646123886108, |
| "learning_rate": 9.902062725892976e-05, |
| "loss": 0.0195, |
| "step": 2200 |
| }, |
| { |
| "epoch": 7.198697068403908, |
| "grad_norm": 1.1778250932693481, |
| "learning_rate": 9.900427730514834e-05, |
| "loss": 0.0196, |
| "step": 2210 |
| }, |
| { |
| "epoch": 7.231270358306189, |
| "grad_norm": 1.0461801290512085, |
| "learning_rate": 9.8987793375544e-05, |
| "loss": 0.0183, |
| "step": 2220 |
| }, |
| { |
| "epoch": 7.263843648208469, |
| "grad_norm": 1.068851351737976, |
| "learning_rate": 9.897117551518318e-05, |
| "loss": 0.0172, |
| "step": 2230 |
| }, |
| { |
| "epoch": 7.296416938110749, |
| "grad_norm": 0.906645655632019, |
| "learning_rate": 9.895442376949844e-05, |
| "loss": 0.016, |
| "step": 2240 |
| }, |
| { |
| "epoch": 7.328990228013029, |
| "grad_norm": 0.9077069759368896, |
| "learning_rate": 9.893753818428845e-05, |
| "loss": 0.0178, |
| "step": 2250 |
| }, |
| { |
| "epoch": 7.361563517915309, |
| "grad_norm": 0.8832686543464661, |
| "learning_rate": 9.892051880571773e-05, |
| "loss": 0.017, |
| "step": 2260 |
| }, |
| { |
| "epoch": 7.394136807817589, |
| "grad_norm": 0.9024226665496826, |
| "learning_rate": 9.890336568031663e-05, |
| "loss": 0.0154, |
| "step": 2270 |
| }, |
| { |
| "epoch": 7.4267100977198695, |
| "grad_norm": 0.8265984058380127, |
| "learning_rate": 9.888607885498113e-05, |
| "loss": 0.0166, |
| "step": 2280 |
| }, |
| { |
| "epoch": 7.45928338762215, |
| "grad_norm": 0.9777111411094666, |
| "learning_rate": 9.886865837697275e-05, |
| "loss": 0.0172, |
| "step": 2290 |
| }, |
| { |
| "epoch": 7.49185667752443, |
| "grad_norm": 0.9763800501823425, |
| "learning_rate": 9.88511042939184e-05, |
| "loss": 0.0188, |
| "step": 2300 |
| }, |
| { |
| "epoch": 7.52442996742671, |
| "grad_norm": 1.0408334732055664, |
| "learning_rate": 9.883341665381028e-05, |
| "loss": 0.0187, |
| "step": 2310 |
| }, |
| { |
| "epoch": 7.55700325732899, |
| "grad_norm": 0.6989148259162903, |
| "learning_rate": 9.881559550500575e-05, |
| "loss": 0.0174, |
| "step": 2320 |
| }, |
| { |
| "epoch": 7.58957654723127, |
| "grad_norm": 1.2441219091415405, |
| "learning_rate": 9.879764089622712e-05, |
| "loss": 0.0175, |
| "step": 2330 |
| }, |
| { |
| "epoch": 7.62214983713355, |
| "grad_norm": 0.9775832891464233, |
| "learning_rate": 9.87795528765616e-05, |
| "loss": 0.0174, |
| "step": 2340 |
| }, |
| { |
| "epoch": 7.6547231270358305, |
| "grad_norm": 1.1409343481063843, |
| "learning_rate": 9.876133149546118e-05, |
| "loss": 0.0198, |
| "step": 2350 |
| }, |
| { |
| "epoch": 7.687296416938111, |
| "grad_norm": 0.9530286192893982, |
| "learning_rate": 9.874297680274238e-05, |
| "loss": 0.0174, |
| "step": 2360 |
| }, |
| { |
| "epoch": 7.719869706840391, |
| "grad_norm": 0.9352869391441345, |
| "learning_rate": 9.872448884858624e-05, |
| "loss": 0.0175, |
| "step": 2370 |
| }, |
| { |
| "epoch": 7.752442996742671, |
| "grad_norm": 0.9207677245140076, |
| "learning_rate": 9.870586768353815e-05, |
| "loss": 0.0183, |
| "step": 2380 |
| }, |
| { |
| "epoch": 7.785016286644951, |
| "grad_norm": 0.8910165429115295, |
| "learning_rate": 9.868711335850764e-05, |
| "loss": 0.0189, |
| "step": 2390 |
| }, |
| { |
| "epoch": 7.817589576547231, |
| "grad_norm": 1.199159026145935, |
| "learning_rate": 9.866822592476833e-05, |
| "loss": 0.0181, |
| "step": 2400 |
| }, |
| { |
| "epoch": 7.850162866449511, |
| "grad_norm": 1.0165252685546875, |
| "learning_rate": 9.86492054339577e-05, |
| "loss": 0.0193, |
| "step": 2410 |
| }, |
| { |
| "epoch": 7.8827361563517915, |
| "grad_norm": 0.8919486999511719, |
| "learning_rate": 9.863005193807711e-05, |
| "loss": 0.0173, |
| "step": 2420 |
| }, |
| { |
| "epoch": 7.915309446254072, |
| "grad_norm": 0.7932745218276978, |
| "learning_rate": 9.861076548949143e-05, |
| "loss": 0.017, |
| "step": 2430 |
| }, |
| { |
| "epoch": 7.947882736156352, |
| "grad_norm": 0.8434972763061523, |
| "learning_rate": 9.859134614092912e-05, |
| "loss": 0.0177, |
| "step": 2440 |
| }, |
| { |
| "epoch": 7.980456026058632, |
| "grad_norm": 1.005811333656311, |
| "learning_rate": 9.857179394548191e-05, |
| "loss": 0.0156, |
| "step": 2450 |
| }, |
| { |
| "epoch": 8.013029315960912, |
| "grad_norm": 0.9450655579566956, |
| "learning_rate": 9.855210895660477e-05, |
| "loss": 0.0179, |
| "step": 2460 |
| }, |
| { |
| "epoch": 8.045602605863191, |
| "grad_norm": 1.0200849771499634, |
| "learning_rate": 9.853229122811568e-05, |
| "loss": 0.0182, |
| "step": 2470 |
| }, |
| { |
| "epoch": 8.078175895765472, |
| "grad_norm": 0.7630968689918518, |
| "learning_rate": 9.851234081419559e-05, |
| "loss": 0.0175, |
| "step": 2480 |
| }, |
| { |
| "epoch": 8.110749185667752, |
| "grad_norm": 0.7456977963447571, |
| "learning_rate": 9.849225776938814e-05, |
| "loss": 0.0182, |
| "step": 2490 |
| }, |
| { |
| "epoch": 8.143322475570033, |
| "grad_norm": 0.7413007020950317, |
| "learning_rate": 9.847204214859964e-05, |
| "loss": 0.0169, |
| "step": 2500 |
| }, |
| { |
| "epoch": 8.175895765472312, |
| "grad_norm": 0.704645037651062, |
| "learning_rate": 9.845169400709879e-05, |
| "loss": 0.0169, |
| "step": 2510 |
| }, |
| { |
| "epoch": 8.208469055374593, |
| "grad_norm": 0.9419644474983215, |
| "learning_rate": 9.843121340051664e-05, |
| "loss": 0.0164, |
| "step": 2520 |
| }, |
| { |
| "epoch": 8.241042345276872, |
| "grad_norm": 1.1566686630249023, |
| "learning_rate": 9.841060038484641e-05, |
| "loss": 0.0159, |
| "step": 2530 |
| }, |
| { |
| "epoch": 8.273615635179153, |
| "grad_norm": 0.862409770488739, |
| "learning_rate": 9.838985501644328e-05, |
| "loss": 0.016, |
| "step": 2540 |
| }, |
| { |
| "epoch": 8.306188925081432, |
| "grad_norm": 0.8754869103431702, |
| "learning_rate": 9.83689773520243e-05, |
| "loss": 0.016, |
| "step": 2550 |
| }, |
| { |
| "epoch": 8.338762214983714, |
| "grad_norm": 1.02450692653656, |
| "learning_rate": 9.834796744866819e-05, |
| "loss": 0.0164, |
| "step": 2560 |
| }, |
| { |
| "epoch": 8.371335504885993, |
| "grad_norm": 0.7608270645141602, |
| "learning_rate": 9.832682536381525e-05, |
| "loss": 0.0155, |
| "step": 2570 |
| }, |
| { |
| "epoch": 8.403908794788274, |
| "grad_norm": 0.7060204744338989, |
| "learning_rate": 9.830555115526711e-05, |
| "loss": 0.0164, |
| "step": 2580 |
| }, |
| { |
| "epoch": 8.436482084690553, |
| "grad_norm": 0.6277144551277161, |
| "learning_rate": 9.828414488118667e-05, |
| "loss": 0.0153, |
| "step": 2590 |
| }, |
| { |
| "epoch": 8.469055374592834, |
| "grad_norm": 0.7759043574333191, |
| "learning_rate": 9.826260660009785e-05, |
| "loss": 0.0147, |
| "step": 2600 |
| }, |
| { |
| "epoch": 8.501628664495113, |
| "grad_norm": 0.8001388311386108, |
| "learning_rate": 9.824093637088547e-05, |
| "loss": 0.0151, |
| "step": 2610 |
| }, |
| { |
| "epoch": 8.534201954397394, |
| "grad_norm": 0.7903552055358887, |
| "learning_rate": 9.821913425279514e-05, |
| "loss": 0.016, |
| "step": 2620 |
| }, |
| { |
| "epoch": 8.566775244299674, |
| "grad_norm": 0.840638279914856, |
| "learning_rate": 9.8197200305433e-05, |
| "loss": 0.0155, |
| "step": 2630 |
| }, |
| { |
| "epoch": 8.599348534201955, |
| "grad_norm": 0.9356780648231506, |
| "learning_rate": 9.817513458876564e-05, |
| "loss": 0.0176, |
| "step": 2640 |
| }, |
| { |
| "epoch": 8.631921824104234, |
| "grad_norm": 0.8237717151641846, |
| "learning_rate": 9.815293716311987e-05, |
| "loss": 0.0157, |
| "step": 2650 |
| }, |
| { |
| "epoch": 8.664495114006515, |
| "grad_norm": 1.1324107646942139, |
| "learning_rate": 9.813060808918262e-05, |
| "loss": 0.0156, |
| "step": 2660 |
| }, |
| { |
| "epoch": 8.697068403908794, |
| "grad_norm": 0.7818335294723511, |
| "learning_rate": 9.810814742800069e-05, |
| "loss": 0.0156, |
| "step": 2670 |
| }, |
| { |
| "epoch": 8.729641693811075, |
| "grad_norm": 0.8096736669540405, |
| "learning_rate": 9.808555524098074e-05, |
| "loss": 0.0148, |
| "step": 2680 |
| }, |
| { |
| "epoch": 8.762214983713354, |
| "grad_norm": 1.0488592386245728, |
| "learning_rate": 9.806283158988887e-05, |
| "loss": 0.0151, |
| "step": 2690 |
| }, |
| { |
| "epoch": 8.794788273615636, |
| "grad_norm": 0.8972393870353699, |
| "learning_rate": 9.803997653685072e-05, |
| "loss": 0.0153, |
| "step": 2700 |
| }, |
| { |
| "epoch": 8.827361563517915, |
| "grad_norm": 0.8795937299728394, |
| "learning_rate": 9.801699014435112e-05, |
| "loss": 0.0159, |
| "step": 2710 |
| }, |
| { |
| "epoch": 8.859934853420196, |
| "grad_norm": 0.7514339685440063, |
| "learning_rate": 9.799387247523398e-05, |
| "loss": 0.0153, |
| "step": 2720 |
| }, |
| { |
| "epoch": 8.892508143322475, |
| "grad_norm": 0.8946635127067566, |
| "learning_rate": 9.797062359270215e-05, |
| "loss": 0.0136, |
| "step": 2730 |
| }, |
| { |
| "epoch": 8.925081433224756, |
| "grad_norm": 0.8136707544326782, |
| "learning_rate": 9.794724356031715e-05, |
| "loss": 0.0143, |
| "step": 2740 |
| }, |
| { |
| "epoch": 8.957654723127035, |
| "grad_norm": 0.6647421717643738, |
| "learning_rate": 9.792373244199913e-05, |
| "loss": 0.0142, |
| "step": 2750 |
| }, |
| { |
| "epoch": 8.990228013029316, |
| "grad_norm": 0.8160433769226074, |
| "learning_rate": 9.790009030202658e-05, |
| "loss": 0.0158, |
| "step": 2760 |
| }, |
| { |
| "epoch": 9.022801302931596, |
| "grad_norm": 0.6846116185188293, |
| "learning_rate": 9.78763172050362e-05, |
| "loss": 0.0133, |
| "step": 2770 |
| }, |
| { |
| "epoch": 9.055374592833877, |
| "grad_norm": 0.806671142578125, |
| "learning_rate": 9.785241321602274e-05, |
| "loss": 0.0163, |
| "step": 2780 |
| }, |
| { |
| "epoch": 9.087947882736156, |
| "grad_norm": 0.7517706751823425, |
| "learning_rate": 9.782837840033879e-05, |
| "loss": 0.0163, |
| "step": 2790 |
| }, |
| { |
| "epoch": 9.120521172638437, |
| "grad_norm": 0.866486668586731, |
| "learning_rate": 9.780421282369461e-05, |
| "loss": 0.017, |
| "step": 2800 |
| }, |
| { |
| "epoch": 9.153094462540716, |
| "grad_norm": 0.7923924326896667, |
| "learning_rate": 9.777991655215797e-05, |
| "loss": 0.0187, |
| "step": 2810 |
| }, |
| { |
| "epoch": 9.185667752442997, |
| "grad_norm": 0.8920378684997559, |
| "learning_rate": 9.775548965215394e-05, |
| "loss": 0.0161, |
| "step": 2820 |
| }, |
| { |
| "epoch": 9.218241042345277, |
| "grad_norm": 0.7856971621513367, |
| "learning_rate": 9.773093219046474e-05, |
| "loss": 0.0154, |
| "step": 2830 |
| }, |
| { |
| "epoch": 9.250814332247558, |
| "grad_norm": 1.1112109422683716, |
| "learning_rate": 9.770624423422954e-05, |
| "loss": 0.0164, |
| "step": 2840 |
| }, |
| { |
| "epoch": 9.283387622149837, |
| "grad_norm": 0.8900606632232666, |
| "learning_rate": 9.768142585094426e-05, |
| "loss": 0.0148, |
| "step": 2850 |
| }, |
| { |
| "epoch": 9.315960912052118, |
| "grad_norm": 0.808012068271637, |
| "learning_rate": 9.765647710846142e-05, |
| "loss": 0.015, |
| "step": 2860 |
| }, |
| { |
| "epoch": 9.348534201954397, |
| "grad_norm": 0.7280744910240173, |
| "learning_rate": 9.763139807498991e-05, |
| "loss": 0.0168, |
| "step": 2870 |
| }, |
| { |
| "epoch": 9.381107491856678, |
| "grad_norm": 0.7034526467323303, |
| "learning_rate": 9.760618881909487e-05, |
| "loss": 0.0142, |
| "step": 2880 |
| }, |
| { |
| "epoch": 9.413680781758957, |
| "grad_norm": 0.6753115653991699, |
| "learning_rate": 9.758084940969744e-05, |
| "loss": 0.0147, |
| "step": 2890 |
| }, |
| { |
| "epoch": 9.446254071661238, |
| "grad_norm": 0.7895611524581909, |
| "learning_rate": 9.755537991607459e-05, |
| "loss": 0.0136, |
| "step": 2900 |
| }, |
| { |
| "epoch": 9.478827361563518, |
| "grad_norm": 0.7646326422691345, |
| "learning_rate": 9.752978040785895e-05, |
| "loss": 0.0153, |
| "step": 2910 |
| }, |
| { |
| "epoch": 9.511400651465799, |
| "grad_norm": 0.784275233745575, |
| "learning_rate": 9.750405095503859e-05, |
| "loss": 0.0136, |
| "step": 2920 |
| }, |
| { |
| "epoch": 9.543973941368078, |
| "grad_norm": 0.6947634816169739, |
| "learning_rate": 9.747819162795686e-05, |
| "loss": 0.0156, |
| "step": 2930 |
| }, |
| { |
| "epoch": 9.576547231270359, |
| "grad_norm": 0.7826408743858337, |
| "learning_rate": 9.745220249731217e-05, |
| "loss": 0.0134, |
| "step": 2940 |
| }, |
| { |
| "epoch": 9.609120521172638, |
| "grad_norm": 0.8155382871627808, |
| "learning_rate": 9.742608363415781e-05, |
| "loss": 0.0131, |
| "step": 2950 |
| }, |
| { |
| "epoch": 9.64169381107492, |
| "grad_norm": 0.7903912663459778, |
| "learning_rate": 9.739983510990176e-05, |
| "loss": 0.0151, |
| "step": 2960 |
| }, |
| { |
| "epoch": 9.674267100977199, |
| "grad_norm": 0.6831985712051392, |
| "learning_rate": 9.737345699630647e-05, |
| "loss": 0.0142, |
| "step": 2970 |
| }, |
| { |
| "epoch": 9.70684039087948, |
| "grad_norm": 0.7938959002494812, |
| "learning_rate": 9.734694936548869e-05, |
| "loss": 0.0157, |
| "step": 2980 |
| }, |
| { |
| "epoch": 9.739413680781759, |
| "grad_norm": 0.8478028774261475, |
| "learning_rate": 9.732031228991932e-05, |
| "loss": 0.0135, |
| "step": 2990 |
| }, |
| { |
| "epoch": 9.77198697068404, |
| "grad_norm": 0.8881486058235168, |
| "learning_rate": 9.729354584242302e-05, |
| "loss": 0.0145, |
| "step": 3000 |
| }, |
| { |
| "epoch": 9.80456026058632, |
| "grad_norm": 0.8107954859733582, |
| "learning_rate": 9.726665009617832e-05, |
| "loss": 0.0147, |
| "step": 3010 |
| }, |
| { |
| "epoch": 9.8371335504886, |
| "grad_norm": 0.593552827835083, |
| "learning_rate": 9.723962512471714e-05, |
| "loss": 0.0151, |
| "step": 3020 |
| }, |
| { |
| "epoch": 9.86970684039088, |
| "grad_norm": 0.7691318392753601, |
| "learning_rate": 9.72124710019247e-05, |
| "loss": 0.0133, |
| "step": 3030 |
| }, |
| { |
| "epoch": 9.90228013029316, |
| "grad_norm": 0.8209340572357178, |
| "learning_rate": 9.718518780203934e-05, |
| "loss": 0.0143, |
| "step": 3040 |
| }, |
| { |
| "epoch": 9.93485342019544, |
| "grad_norm": 0.786946713924408, |
| "learning_rate": 9.715777559965228e-05, |
| "loss": 0.016, |
| "step": 3050 |
| }, |
| { |
| "epoch": 9.967426710097719, |
| "grad_norm": 0.833220899105072, |
| "learning_rate": 9.713023446970746e-05, |
| "loss": 0.0142, |
| "step": 3060 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.8491772413253784, |
| "learning_rate": 9.710256448750126e-05, |
| "loss": 0.0155, |
| "step": 3070 |
| }, |
| { |
| "epoch": 10.03257328990228, |
| "grad_norm": 0.7305663228034973, |
| "learning_rate": 9.707476572868235e-05, |
| "loss": 0.0135, |
| "step": 3080 |
| }, |
| { |
| "epoch": 10.06514657980456, |
| "grad_norm": 0.7415571808815002, |
| "learning_rate": 9.704683826925149e-05, |
| "loss": 0.0127, |
| "step": 3090 |
| }, |
| { |
| "epoch": 10.09771986970684, |
| "grad_norm": 0.723970890045166, |
| "learning_rate": 9.701878218556129e-05, |
| "loss": 0.0139, |
| "step": 3100 |
| }, |
| { |
| "epoch": 10.13029315960912, |
| "grad_norm": 0.7780591249465942, |
| "learning_rate": 9.699059755431598e-05, |
| "loss": 0.015, |
| "step": 3110 |
| }, |
| { |
| "epoch": 10.1628664495114, |
| "grad_norm": 0.8553524017333984, |
| "learning_rate": 9.696228445257132e-05, |
| "loss": 0.0144, |
| "step": 3120 |
| }, |
| { |
| "epoch": 10.19543973941368, |
| "grad_norm": 0.6987228393554688, |
| "learning_rate": 9.693384295773419e-05, |
| "loss": 0.0143, |
| "step": 3130 |
| }, |
| { |
| "epoch": 10.22801302931596, |
| "grad_norm": 0.6953451633453369, |
| "learning_rate": 9.690527314756259e-05, |
| "loss": 0.0126, |
| "step": 3140 |
| }, |
| { |
| "epoch": 10.260586319218241, |
| "grad_norm": 0.7900610566139221, |
| "learning_rate": 9.687657510016527e-05, |
| "loss": 0.0142, |
| "step": 3150 |
| }, |
| { |
| "epoch": 10.29315960912052, |
| "grad_norm": 0.7502635717391968, |
| "learning_rate": 9.684774889400161e-05, |
| "loss": 0.0137, |
| "step": 3160 |
| }, |
| { |
| "epoch": 10.325732899022801, |
| "grad_norm": 0.8514730334281921, |
| "learning_rate": 9.681879460788135e-05, |
| "loss": 0.0145, |
| "step": 3170 |
| }, |
| { |
| "epoch": 10.35830618892508, |
| "grad_norm": 0.6699235439300537, |
| "learning_rate": 9.67897123209644e-05, |
| "loss": 0.016, |
| "step": 3180 |
| }, |
| { |
| "epoch": 10.390879478827362, |
| "grad_norm": 0.9717312455177307, |
| "learning_rate": 9.676050211276062e-05, |
| "loss": 0.0139, |
| "step": 3190 |
| }, |
| { |
| "epoch": 10.423452768729641, |
| "grad_norm": 0.8204063177108765, |
| "learning_rate": 9.673116406312962e-05, |
| "loss": 0.0149, |
| "step": 3200 |
| }, |
| { |
| "epoch": 10.456026058631922, |
| "grad_norm": 0.6988447308540344, |
| "learning_rate": 9.67016982522805e-05, |
| "loss": 0.0169, |
| "step": 3210 |
| }, |
| { |
| "epoch": 10.488599348534201, |
| "grad_norm": 0.7764069437980652, |
| "learning_rate": 9.667210476077164e-05, |
| "loss": 0.0134, |
| "step": 3220 |
| }, |
| { |
| "epoch": 10.521172638436482, |
| "grad_norm": 0.8181342482566833, |
| "learning_rate": 9.664238366951055e-05, |
| "loss": 0.014, |
| "step": 3230 |
| }, |
| { |
| "epoch": 10.553745928338762, |
| "grad_norm": 0.8816471099853516, |
| "learning_rate": 9.661253505975355e-05, |
| "loss": 0.0132, |
| "step": 3240 |
| }, |
| { |
| "epoch": 10.586319218241043, |
| "grad_norm": 0.6358229517936707, |
| "learning_rate": 9.658255901310557e-05, |
| "loss": 0.0141, |
| "step": 3250 |
| }, |
| { |
| "epoch": 10.618892508143322, |
| "grad_norm": 0.8587698340415955, |
| "learning_rate": 9.655245561152e-05, |
| "loss": 0.0147, |
| "step": 3260 |
| }, |
| { |
| "epoch": 10.651465798045603, |
| "grad_norm": 0.7497543692588806, |
| "learning_rate": 9.65222249372984e-05, |
| "loss": 0.0138, |
| "step": 3270 |
| }, |
| { |
| "epoch": 10.684039087947882, |
| "grad_norm": 0.7951953411102295, |
| "learning_rate": 9.649186707309026e-05, |
| "loss": 0.0124, |
| "step": 3280 |
| }, |
| { |
| "epoch": 10.716612377850163, |
| "grad_norm": 0.7938198447227478, |
| "learning_rate": 9.646138210189283e-05, |
| "loss": 0.0144, |
| "step": 3290 |
| }, |
| { |
| "epoch": 10.749185667752442, |
| "grad_norm": 0.7425335049629211, |
| "learning_rate": 9.643077010705087e-05, |
| "loss": 0.0124, |
| "step": 3300 |
| }, |
| { |
| "epoch": 10.781758957654723, |
| "grad_norm": 0.6020338535308838, |
| "learning_rate": 9.640003117225637e-05, |
| "loss": 0.014, |
| "step": 3310 |
| }, |
| { |
| "epoch": 10.814332247557003, |
| "grad_norm": 0.7467803359031677, |
| "learning_rate": 9.636916538154846e-05, |
| "loss": 0.016, |
| "step": 3320 |
| }, |
| { |
| "epoch": 10.846905537459284, |
| "grad_norm": 0.7082899212837219, |
| "learning_rate": 9.633817281931296e-05, |
| "loss": 0.0168, |
| "step": 3330 |
| }, |
| { |
| "epoch": 10.879478827361563, |
| "grad_norm": 0.7912280559539795, |
| "learning_rate": 9.630705357028242e-05, |
| "loss": 0.0136, |
| "step": 3340 |
| }, |
| { |
| "epoch": 10.912052117263844, |
| "grad_norm": 0.6572038531303406, |
| "learning_rate": 9.627580771953563e-05, |
| "loss": 0.0129, |
| "step": 3350 |
| }, |
| { |
| "epoch": 10.944625407166123, |
| "grad_norm": 0.8878430724143982, |
| "learning_rate": 9.624443535249759e-05, |
| "loss": 0.0134, |
| "step": 3360 |
| }, |
| { |
| "epoch": 10.977198697068404, |
| "grad_norm": 0.6283133625984192, |
| "learning_rate": 9.621293655493913e-05, |
| "loss": 0.0159, |
| "step": 3370 |
| }, |
| { |
| "epoch": 11.009771986970684, |
| "grad_norm": 0.7855213284492493, |
| "learning_rate": 9.618131141297675e-05, |
| "loss": 0.0151, |
| "step": 3380 |
| }, |
| { |
| "epoch": 11.042345276872965, |
| "grad_norm": 0.6657193303108215, |
| "learning_rate": 9.614956001307242e-05, |
| "loss": 0.0143, |
| "step": 3390 |
| }, |
| { |
| "epoch": 11.074918566775244, |
| "grad_norm": 0.99737948179245, |
| "learning_rate": 9.611768244203321e-05, |
| "loss": 0.0149, |
| "step": 3400 |
| }, |
| { |
| "epoch": 11.107491856677525, |
| "grad_norm": 0.5985683798789978, |
| "learning_rate": 9.60856787870112e-05, |
| "loss": 0.0142, |
| "step": 3410 |
| }, |
| { |
| "epoch": 11.140065146579804, |
| "grad_norm": 0.7139508724212646, |
| "learning_rate": 9.605354913550318e-05, |
| "loss": 0.0149, |
| "step": 3420 |
| }, |
| { |
| "epoch": 11.172638436482085, |
| "grad_norm": 0.6530821919441223, |
| "learning_rate": 9.602129357535037e-05, |
| "loss": 0.0135, |
| "step": 3430 |
| }, |
| { |
| "epoch": 11.205211726384364, |
| "grad_norm": 0.6087831854820251, |
| "learning_rate": 9.598891219473825e-05, |
| "loss": 0.0132, |
| "step": 3440 |
| }, |
| { |
| "epoch": 11.237785016286646, |
| "grad_norm": 0.631524384021759, |
| "learning_rate": 9.595640508219625e-05, |
| "loss": 0.0136, |
| "step": 3450 |
| }, |
| { |
| "epoch": 11.270358306188925, |
| "grad_norm": 0.7701088786125183, |
| "learning_rate": 9.592377232659761e-05, |
| "loss": 0.0146, |
| "step": 3460 |
| }, |
| { |
| "epoch": 11.302931596091206, |
| "grad_norm": 0.6873641610145569, |
| "learning_rate": 9.589101401715904e-05, |
| "loss": 0.0139, |
| "step": 3470 |
| }, |
| { |
| "epoch": 11.335504885993485, |
| "grad_norm": 0.7974376678466797, |
| "learning_rate": 9.585813024344045e-05, |
| "loss": 0.0128, |
| "step": 3480 |
| }, |
| { |
| "epoch": 11.368078175895766, |
| "grad_norm": 0.7017616033554077, |
| "learning_rate": 9.58251210953449e-05, |
| "loss": 0.0138, |
| "step": 3490 |
| }, |
| { |
| "epoch": 11.400651465798045, |
| "grad_norm": 0.7259907722473145, |
| "learning_rate": 9.579198666311809e-05, |
| "loss": 0.013, |
| "step": 3500 |
| }, |
| { |
| "epoch": 11.433224755700326, |
| "grad_norm": 0.6088874340057373, |
| "learning_rate": 9.575872703734832e-05, |
| "loss": 0.0126, |
| "step": 3510 |
| }, |
| { |
| "epoch": 11.465798045602606, |
| "grad_norm": 0.5169392824172974, |
| "learning_rate": 9.572534230896611e-05, |
| "loss": 0.0116, |
| "step": 3520 |
| }, |
| { |
| "epoch": 11.498371335504887, |
| "grad_norm": 0.8139016628265381, |
| "learning_rate": 9.569183256924403e-05, |
| "loss": 0.0126, |
| "step": 3530 |
| }, |
| { |
| "epoch": 11.530944625407166, |
| "grad_norm": 0.7374736070632935, |
| "learning_rate": 9.565819790979646e-05, |
| "loss": 0.0131, |
| "step": 3540 |
| }, |
| { |
| "epoch": 11.563517915309447, |
| "grad_norm": 0.6468128561973572, |
| "learning_rate": 9.562443842257925e-05, |
| "loss": 0.0129, |
| "step": 3550 |
| }, |
| { |
| "epoch": 11.596091205211726, |
| "grad_norm": 0.6643866896629333, |
| "learning_rate": 9.559055419988956e-05, |
| "loss": 0.0133, |
| "step": 3560 |
| }, |
| { |
| "epoch": 11.628664495114007, |
| "grad_norm": 0.681177020072937, |
| "learning_rate": 9.555654533436557e-05, |
| "loss": 0.0135, |
| "step": 3570 |
| }, |
| { |
| "epoch": 11.661237785016286, |
| "grad_norm": 0.7795754075050354, |
| "learning_rate": 9.552241191898621e-05, |
| "loss": 0.0114, |
| "step": 3580 |
| }, |
| { |
| "epoch": 11.693811074918568, |
| "grad_norm": 0.6136661171913147, |
| "learning_rate": 9.548815404707092e-05, |
| "loss": 0.0139, |
| "step": 3590 |
| }, |
| { |
| "epoch": 11.726384364820847, |
| "grad_norm": 0.6191911697387695, |
| "learning_rate": 9.545377181227942e-05, |
| "loss": 0.0147, |
| "step": 3600 |
| }, |
| { |
| "epoch": 11.758957654723128, |
| "grad_norm": 0.67829829454422, |
| "learning_rate": 9.541926530861145e-05, |
| "loss": 0.013, |
| "step": 3610 |
| }, |
| { |
| "epoch": 11.791530944625407, |
| "grad_norm": 0.7790681719779968, |
| "learning_rate": 9.538463463040645e-05, |
| "loss": 0.0131, |
| "step": 3620 |
| }, |
| { |
| "epoch": 11.824104234527688, |
| "grad_norm": 0.694274365901947, |
| "learning_rate": 9.534987987234337e-05, |
| "loss": 0.0138, |
| "step": 3630 |
| }, |
| { |
| "epoch": 11.856677524429967, |
| "grad_norm": 0.5702154636383057, |
| "learning_rate": 9.53150011294404e-05, |
| "loss": 0.0121, |
| "step": 3640 |
| }, |
| { |
| "epoch": 11.889250814332247, |
| "grad_norm": 0.5818066000938416, |
| "learning_rate": 9.527999849705471e-05, |
| "loss": 0.0132, |
| "step": 3650 |
| }, |
| { |
| "epoch": 11.921824104234528, |
| "grad_norm": 0.6952799558639526, |
| "learning_rate": 9.524487207088213e-05, |
| "loss": 0.0117, |
| "step": 3660 |
| }, |
| { |
| "epoch": 11.954397394136809, |
| "grad_norm": 0.5978224277496338, |
| "learning_rate": 9.520962194695698e-05, |
| "loss": 0.0137, |
| "step": 3670 |
| }, |
| { |
| "epoch": 11.986970684039088, |
| "grad_norm": 0.6350916028022766, |
| "learning_rate": 9.517424822165175e-05, |
| "loss": 0.0116, |
| "step": 3680 |
| }, |
| { |
| "epoch": 12.019543973941367, |
| "grad_norm": 0.651490330696106, |
| "learning_rate": 9.513875099167685e-05, |
| "loss": 0.0164, |
| "step": 3690 |
| }, |
| { |
| "epoch": 12.052117263843648, |
| "grad_norm": 0.7070964574813843, |
| "learning_rate": 9.510313035408035e-05, |
| "loss": 0.0142, |
| "step": 3700 |
| }, |
| { |
| "epoch": 12.084690553745927, |
| "grad_norm": 0.8875077962875366, |
| "learning_rate": 9.506738640624775e-05, |
| "loss": 0.0121, |
| "step": 3710 |
| }, |
| { |
| "epoch": 12.117263843648209, |
| "grad_norm": 0.8056143522262573, |
| "learning_rate": 9.50315192459016e-05, |
| "loss": 0.013, |
| "step": 3720 |
| }, |
| { |
| "epoch": 12.149837133550488, |
| "grad_norm": 0.7540460228919983, |
| "learning_rate": 9.499552897110136e-05, |
| "loss": 0.0126, |
| "step": 3730 |
| }, |
| { |
| "epoch": 12.182410423452769, |
| "grad_norm": 0.7277035713195801, |
| "learning_rate": 9.495941568024304e-05, |
| "loss": 0.014, |
| "step": 3740 |
| }, |
| { |
| "epoch": 12.214983713355048, |
| "grad_norm": 0.7212410569190979, |
| "learning_rate": 9.492317947205904e-05, |
| "loss": 0.0116, |
| "step": 3750 |
| }, |
| { |
| "epoch": 12.247557003257329, |
| "grad_norm": 0.6804754137992859, |
| "learning_rate": 9.488682044561775e-05, |
| "loss": 0.0124, |
| "step": 3760 |
| }, |
| { |
| "epoch": 12.280130293159608, |
| "grad_norm": 0.7375156879425049, |
| "learning_rate": 9.485033870032335e-05, |
| "loss": 0.0132, |
| "step": 3770 |
| }, |
| { |
| "epoch": 12.31270358306189, |
| "grad_norm": 0.8549667000770569, |
| "learning_rate": 9.481373433591556e-05, |
| "loss": 0.0123, |
| "step": 3780 |
| }, |
| { |
| "epoch": 12.345276872964169, |
| "grad_norm": 0.6877515912055969, |
| "learning_rate": 9.47770074524693e-05, |
| "loss": 0.0149, |
| "step": 3790 |
| }, |
| { |
| "epoch": 12.37785016286645, |
| "grad_norm": 0.7423529624938965, |
| "learning_rate": 9.474015815039446e-05, |
| "loss": 0.0115, |
| "step": 3800 |
| }, |
| { |
| "epoch": 12.410423452768729, |
| "grad_norm": 0.7478250861167908, |
| "learning_rate": 9.470318653043565e-05, |
| "loss": 0.0125, |
| "step": 3810 |
| }, |
| { |
| "epoch": 12.44299674267101, |
| "grad_norm": 0.6396955847740173, |
| "learning_rate": 9.466609269367185e-05, |
| "loss": 0.0128, |
| "step": 3820 |
| }, |
| { |
| "epoch": 12.47557003257329, |
| "grad_norm": 0.6483830809593201, |
| "learning_rate": 9.46288767415162e-05, |
| "loss": 0.0137, |
| "step": 3830 |
| }, |
| { |
| "epoch": 12.50814332247557, |
| "grad_norm": 0.7733943462371826, |
| "learning_rate": 9.459153877571567e-05, |
| "loss": 0.015, |
| "step": 3840 |
| }, |
| { |
| "epoch": 12.54071661237785, |
| "grad_norm": 0.6195006370544434, |
| "learning_rate": 9.455407889835087e-05, |
| "loss": 0.012, |
| "step": 3850 |
| }, |
| { |
| "epoch": 12.57328990228013, |
| "grad_norm": 0.6243380308151245, |
| "learning_rate": 9.451649721183564e-05, |
| "loss": 0.0149, |
| "step": 3860 |
| }, |
| { |
| "epoch": 12.60586319218241, |
| "grad_norm": 0.6876091361045837, |
| "learning_rate": 9.447879381891692e-05, |
| "loss": 0.0118, |
| "step": 3870 |
| }, |
| { |
| "epoch": 12.63843648208469, |
| "grad_norm": 0.7486156225204468, |
| "learning_rate": 9.444096882267428e-05, |
| "loss": 0.0125, |
| "step": 3880 |
| }, |
| { |
| "epoch": 12.67100977198697, |
| "grad_norm": 0.6939566731452942, |
| "learning_rate": 9.440302232651988e-05, |
| "loss": 0.0136, |
| "step": 3890 |
| }, |
| { |
| "epoch": 12.703583061889251, |
| "grad_norm": 0.6740759015083313, |
| "learning_rate": 9.436495443419795e-05, |
| "loss": 0.0121, |
| "step": 3900 |
| }, |
| { |
| "epoch": 12.73615635179153, |
| "grad_norm": 0.7866451144218445, |
| "learning_rate": 9.432676524978466e-05, |
| "loss": 0.0121, |
| "step": 3910 |
| }, |
| { |
| "epoch": 12.768729641693811, |
| "grad_norm": 0.5861152410507202, |
| "learning_rate": 9.42884548776878e-05, |
| "loss": 0.0111, |
| "step": 3920 |
| }, |
| { |
| "epoch": 12.80130293159609, |
| "grad_norm": 0.5740252137184143, |
| "learning_rate": 9.425002342264646e-05, |
| "loss": 0.0119, |
| "step": 3930 |
| }, |
| { |
| "epoch": 12.833876221498372, |
| "grad_norm": 0.6962918043136597, |
| "learning_rate": 9.421147098973077e-05, |
| "loss": 0.0118, |
| "step": 3940 |
| }, |
| { |
| "epoch": 12.866449511400651, |
| "grad_norm": 0.4894341826438904, |
| "learning_rate": 9.41727976843416e-05, |
| "loss": 0.0123, |
| "step": 3950 |
| }, |
| { |
| "epoch": 12.899022801302932, |
| "grad_norm": 0.6954609155654907, |
| "learning_rate": 9.413400361221029e-05, |
| "loss": 0.0131, |
| "step": 3960 |
| }, |
| { |
| "epoch": 12.931596091205211, |
| "grad_norm": 0.576310932636261, |
| "learning_rate": 9.409508887939835e-05, |
| "loss": 0.0121, |
| "step": 3970 |
| }, |
| { |
| "epoch": 12.964169381107492, |
| "grad_norm": 0.4614481031894684, |
| "learning_rate": 9.40560535922972e-05, |
| "loss": 0.0132, |
| "step": 3980 |
| }, |
| { |
| "epoch": 12.996742671009772, |
| "grad_norm": 0.47341176867485046, |
| "learning_rate": 9.40168978576278e-05, |
| "loss": 0.0137, |
| "step": 3990 |
| }, |
| { |
| "epoch": 13.029315960912053, |
| "grad_norm": 0.5697060227394104, |
| "learning_rate": 9.397762178244043e-05, |
| "loss": 0.0133, |
| "step": 4000 |
| }, |
| { |
| "epoch": 13.061889250814332, |
| "grad_norm": 0.612964928150177, |
| "learning_rate": 9.393822547411439e-05, |
| "loss": 0.0109, |
| "step": 4010 |
| }, |
| { |
| "epoch": 13.094462540716613, |
| "grad_norm": 0.6282061338424683, |
| "learning_rate": 9.389870904035769e-05, |
| "loss": 0.0116, |
| "step": 4020 |
| }, |
| { |
| "epoch": 13.127035830618892, |
| "grad_norm": 0.5586996078491211, |
| "learning_rate": 9.385907258920672e-05, |
| "loss": 0.0123, |
| "step": 4030 |
| }, |
| { |
| "epoch": 13.159609120521173, |
| "grad_norm": 0.5667091012001038, |
| "learning_rate": 9.381931622902607e-05, |
| "loss": 0.0119, |
| "step": 4040 |
| }, |
| { |
| "epoch": 13.192182410423452, |
| "grad_norm": 0.6335421800613403, |
| "learning_rate": 9.377944006850807e-05, |
| "loss": 0.0133, |
| "step": 4050 |
| }, |
| { |
| "epoch": 13.224755700325733, |
| "grad_norm": 0.8205068111419678, |
| "learning_rate": 9.373944421667265e-05, |
| "loss": 0.0122, |
| "step": 4060 |
| }, |
| { |
| "epoch": 13.257328990228013, |
| "grad_norm": 0.676856279373169, |
| "learning_rate": 9.369932878286691e-05, |
| "loss": 0.0126, |
| "step": 4070 |
| }, |
| { |
| "epoch": 13.289902280130294, |
| "grad_norm": 0.6334593296051025, |
| "learning_rate": 9.365909387676494e-05, |
| "loss": 0.0126, |
| "step": 4080 |
| }, |
| { |
| "epoch": 13.322475570032573, |
| "grad_norm": 0.6614803671836853, |
| "learning_rate": 9.361873960836744e-05, |
| "loss": 0.0126, |
| "step": 4090 |
| }, |
| { |
| "epoch": 13.355048859934854, |
| "grad_norm": 0.6303547620773315, |
| "learning_rate": 9.357826608800142e-05, |
| "loss": 0.0121, |
| "step": 4100 |
| }, |
| { |
| "epoch": 13.387622149837133, |
| "grad_norm": 0.5380171537399292, |
| "learning_rate": 9.353767342631994e-05, |
| "loss": 0.0121, |
| "step": 4110 |
| }, |
| { |
| "epoch": 13.420195439739414, |
| "grad_norm": 0.5387251377105713, |
| "learning_rate": 9.34969617343018e-05, |
| "loss": 0.0102, |
| "step": 4120 |
| }, |
| { |
| "epoch": 13.452768729641694, |
| "grad_norm": 0.5303720235824585, |
| "learning_rate": 9.345613112325122e-05, |
| "loss": 0.0126, |
| "step": 4130 |
| }, |
| { |
| "epoch": 13.485342019543975, |
| "grad_norm": 0.5876774787902832, |
| "learning_rate": 9.34151817047975e-05, |
| "loss": 0.0111, |
| "step": 4140 |
| }, |
| { |
| "epoch": 13.517915309446254, |
| "grad_norm": 0.6997594237327576, |
| "learning_rate": 9.33741135908948e-05, |
| "loss": 0.0119, |
| "step": 4150 |
| }, |
| { |
| "epoch": 13.550488599348535, |
| "grad_norm": 0.602103054523468, |
| "learning_rate": 9.33329268938218e-05, |
| "loss": 0.0118, |
| "step": 4160 |
| }, |
| { |
| "epoch": 13.583061889250814, |
| "grad_norm": 0.6966025829315186, |
| "learning_rate": 9.329162172618132e-05, |
| "loss": 0.0127, |
| "step": 4170 |
| }, |
| { |
| "epoch": 13.615635179153095, |
| "grad_norm": 0.5898999571800232, |
| "learning_rate": 9.325019820090013e-05, |
| "loss": 0.0113, |
| "step": 4180 |
| }, |
| { |
| "epoch": 13.648208469055374, |
| "grad_norm": 0.6419972777366638, |
| "learning_rate": 9.320865643122855e-05, |
| "loss": 0.0122, |
| "step": 4190 |
| }, |
| { |
| "epoch": 13.680781758957655, |
| "grad_norm": 0.48004379868507385, |
| "learning_rate": 9.316699653074023e-05, |
| "loss": 0.0115, |
| "step": 4200 |
| }, |
| { |
| "epoch": 13.713355048859935, |
| "grad_norm": 0.6654482483863831, |
| "learning_rate": 9.312521861333172e-05, |
| "loss": 0.0128, |
| "step": 4210 |
| }, |
| { |
| "epoch": 13.745928338762216, |
| "grad_norm": 0.5661275386810303, |
| "learning_rate": 9.308332279322224e-05, |
| "loss": 0.0114, |
| "step": 4220 |
| }, |
| { |
| "epoch": 13.778501628664495, |
| "grad_norm": 0.5483682155609131, |
| "learning_rate": 9.304130918495338e-05, |
| "loss": 0.0133, |
| "step": 4230 |
| }, |
| { |
| "epoch": 13.811074918566776, |
| "grad_norm": 0.7615593075752258, |
| "learning_rate": 9.299917790338874e-05, |
| "loss": 0.0119, |
| "step": 4240 |
| }, |
| { |
| "epoch": 13.843648208469055, |
| "grad_norm": 0.8771687150001526, |
| "learning_rate": 9.295692906371363e-05, |
| "loss": 0.013, |
| "step": 4250 |
| }, |
| { |
| "epoch": 13.876221498371336, |
| "grad_norm": 0.7540078163146973, |
| "learning_rate": 9.291456278143476e-05, |
| "loss": 0.0138, |
| "step": 4260 |
| }, |
| { |
| "epoch": 13.908794788273616, |
| "grad_norm": 0.711463212966919, |
| "learning_rate": 9.287207917237994e-05, |
| "loss": 0.0115, |
| "step": 4270 |
| }, |
| { |
| "epoch": 13.941368078175895, |
| "grad_norm": 0.6685953736305237, |
| "learning_rate": 9.282947835269773e-05, |
| "loss": 0.0128, |
| "step": 4280 |
| }, |
| { |
| "epoch": 13.973941368078176, |
| "grad_norm": 0.6462761163711548, |
| "learning_rate": 9.278676043885715e-05, |
| "loss": 0.0142, |
| "step": 4290 |
| }, |
| { |
| "epoch": 14.006514657980455, |
| "grad_norm": 0.577616274356842, |
| "learning_rate": 9.274392554764733e-05, |
| "loss": 0.0114, |
| "step": 4300 |
| }, |
| { |
| "epoch": 14.039087947882736, |
| "grad_norm": 0.6425755620002747, |
| "learning_rate": 9.270097379617723e-05, |
| "loss": 0.0124, |
| "step": 4310 |
| }, |
| { |
| "epoch": 14.071661237785015, |
| "grad_norm": 0.5634329319000244, |
| "learning_rate": 9.26579053018753e-05, |
| "loss": 0.0125, |
| "step": 4320 |
| }, |
| { |
| "epoch": 14.104234527687296, |
| "grad_norm": 0.5592512488365173, |
| "learning_rate": 9.261472018248918e-05, |
| "loss": 0.0131, |
| "step": 4330 |
| }, |
| { |
| "epoch": 14.136807817589576, |
| "grad_norm": 0.7388976216316223, |
| "learning_rate": 9.25714185560853e-05, |
| "loss": 0.013, |
| "step": 4340 |
| }, |
| { |
| "epoch": 14.169381107491857, |
| "grad_norm": 0.5503047108650208, |
| "learning_rate": 9.252800054104868e-05, |
| "loss": 0.0122, |
| "step": 4350 |
| }, |
| { |
| "epoch": 14.201954397394136, |
| "grad_norm": 0.4611617922782898, |
| "learning_rate": 9.248446625608252e-05, |
| "loss": 0.0111, |
| "step": 4360 |
| }, |
| { |
| "epoch": 14.234527687296417, |
| "grad_norm": 0.5560048222541809, |
| "learning_rate": 9.244081582020789e-05, |
| "loss": 0.0124, |
| "step": 4370 |
| }, |
| { |
| "epoch": 14.267100977198696, |
| "grad_norm": 0.6567198634147644, |
| "learning_rate": 9.239704935276339e-05, |
| "loss": 0.0119, |
| "step": 4380 |
| }, |
| { |
| "epoch": 14.299674267100977, |
| "grad_norm": 0.5934193134307861, |
| "learning_rate": 9.235316697340489e-05, |
| "loss": 0.0108, |
| "step": 4390 |
| }, |
| { |
| "epoch": 14.332247557003257, |
| "grad_norm": 0.7520450353622437, |
| "learning_rate": 9.230916880210512e-05, |
| "loss": 0.0141, |
| "step": 4400 |
| }, |
| { |
| "epoch": 14.364820846905538, |
| "grad_norm": 0.6309568285942078, |
| "learning_rate": 9.226505495915342e-05, |
| "loss": 0.0106, |
| "step": 4410 |
| }, |
| { |
| "epoch": 14.397394136807817, |
| "grad_norm": 0.5420092344284058, |
| "learning_rate": 9.222082556515536e-05, |
| "loss": 0.0115, |
| "step": 4420 |
| }, |
| { |
| "epoch": 14.429967426710098, |
| "grad_norm": 0.5959463715553284, |
| "learning_rate": 9.217648074103242e-05, |
| "loss": 0.0129, |
| "step": 4430 |
| }, |
| { |
| "epoch": 14.462540716612377, |
| "grad_norm": 0.5967207551002502, |
| "learning_rate": 9.213202060802161e-05, |
| "loss": 0.0127, |
| "step": 4440 |
| }, |
| { |
| "epoch": 14.495114006514658, |
| "grad_norm": 0.4098617732524872, |
| "learning_rate": 9.208744528767528e-05, |
| "loss": 0.0119, |
| "step": 4450 |
| }, |
| { |
| "epoch": 14.527687296416937, |
| "grad_norm": 0.6198092699050903, |
| "learning_rate": 9.204275490186064e-05, |
| "loss": 0.0125, |
| "step": 4460 |
| }, |
| { |
| "epoch": 14.560260586319218, |
| "grad_norm": 0.5683520436286926, |
| "learning_rate": 9.199794957275949e-05, |
| "loss": 0.014, |
| "step": 4470 |
| }, |
| { |
| "epoch": 14.592833876221498, |
| "grad_norm": 0.4835983216762543, |
| "learning_rate": 9.19530294228679e-05, |
| "loss": 0.011, |
| "step": 4480 |
| }, |
| { |
| "epoch": 14.625407166123779, |
| "grad_norm": 0.5733904242515564, |
| "learning_rate": 9.190799457499583e-05, |
| "loss": 0.0131, |
| "step": 4490 |
| }, |
| { |
| "epoch": 14.657980456026058, |
| "grad_norm": 0.4585092067718506, |
| "learning_rate": 9.186284515226686e-05, |
| "loss": 0.0128, |
| "step": 4500 |
| }, |
| { |
| "epoch": 14.690553745928339, |
| "grad_norm": 0.4910542070865631, |
| "learning_rate": 9.181758127811777e-05, |
| "loss": 0.0135, |
| "step": 4510 |
| }, |
| { |
| "epoch": 14.723127035830618, |
| "grad_norm": 0.5943559408187866, |
| "learning_rate": 9.177220307629825e-05, |
| "loss": 0.0128, |
| "step": 4520 |
| }, |
| { |
| "epoch": 14.7557003257329, |
| "grad_norm": 0.6429430246353149, |
| "learning_rate": 9.172671067087059e-05, |
| "loss": 0.012, |
| "step": 4530 |
| }, |
| { |
| "epoch": 14.788273615635179, |
| "grad_norm": 0.5697653889656067, |
| "learning_rate": 9.16811041862093e-05, |
| "loss": 0.0107, |
| "step": 4540 |
| }, |
| { |
| "epoch": 14.82084690553746, |
| "grad_norm": 0.5715034008026123, |
| "learning_rate": 9.163538374700076e-05, |
| "loss": 0.0118, |
| "step": 4550 |
| }, |
| { |
| "epoch": 14.853420195439739, |
| "grad_norm": 0.638119101524353, |
| "learning_rate": 9.158954947824287e-05, |
| "loss": 0.011, |
| "step": 4560 |
| }, |
| { |
| "epoch": 14.88599348534202, |
| "grad_norm": 0.5536983609199524, |
| "learning_rate": 9.154360150524482e-05, |
| "loss": 0.013, |
| "step": 4570 |
| }, |
| { |
| "epoch": 14.9185667752443, |
| "grad_norm": 0.6488440632820129, |
| "learning_rate": 9.14975399536266e-05, |
| "loss": 0.0108, |
| "step": 4580 |
| }, |
| { |
| "epoch": 14.95114006514658, |
| "grad_norm": 0.5250594019889832, |
| "learning_rate": 9.14513649493187e-05, |
| "loss": 0.0112, |
| "step": 4590 |
| }, |
| { |
| "epoch": 14.98371335504886, |
| "grad_norm": 0.53516685962677, |
| "learning_rate": 9.140507661856187e-05, |
| "loss": 0.0117, |
| "step": 4600 |
| }, |
| { |
| "epoch": 15.01628664495114, |
| "grad_norm": 0.722775936126709, |
| "learning_rate": 9.135867508790661e-05, |
| "loss": 0.0133, |
| "step": 4610 |
| }, |
| { |
| "epoch": 15.04885993485342, |
| "grad_norm": 0.5718010067939758, |
| "learning_rate": 9.131216048421291e-05, |
| "loss": 0.0121, |
| "step": 4620 |
| }, |
| { |
| "epoch": 15.0814332247557, |
| "grad_norm": 0.705929696559906, |
| "learning_rate": 9.126553293464998e-05, |
| "loss": 0.0113, |
| "step": 4630 |
| }, |
| { |
| "epoch": 15.11400651465798, |
| "grad_norm": 0.5129148960113525, |
| "learning_rate": 9.121879256669572e-05, |
| "loss": 0.013, |
| "step": 4640 |
| }, |
| { |
| "epoch": 15.146579804560261, |
| "grad_norm": 0.5438552498817444, |
| "learning_rate": 9.117193950813652e-05, |
| "loss": 0.0137, |
| "step": 4650 |
| }, |
| { |
| "epoch": 15.17915309446254, |
| "grad_norm": 0.6388025283813477, |
| "learning_rate": 9.112497388706685e-05, |
| "loss": 0.0127, |
| "step": 4660 |
| }, |
| { |
| "epoch": 15.211726384364821, |
| "grad_norm": 0.6098414659500122, |
| "learning_rate": 9.10778958318889e-05, |
| "loss": 0.0101, |
| "step": 4670 |
| }, |
| { |
| "epoch": 15.2442996742671, |
| "grad_norm": 0.6415327191352844, |
| "learning_rate": 9.103070547131232e-05, |
| "loss": 0.0142, |
| "step": 4680 |
| }, |
| { |
| "epoch": 15.276872964169382, |
| "grad_norm": 0.6037464141845703, |
| "learning_rate": 9.098340293435375e-05, |
| "loss": 0.0117, |
| "step": 4690 |
| }, |
| { |
| "epoch": 15.309446254071661, |
| "grad_norm": 0.5115535855293274, |
| "learning_rate": 9.093598835033649e-05, |
| "loss": 0.0105, |
| "step": 4700 |
| }, |
| { |
| "epoch": 15.342019543973942, |
| "grad_norm": 0.5303134918212891, |
| "learning_rate": 9.088846184889021e-05, |
| "loss": 0.0118, |
| "step": 4710 |
| }, |
| { |
| "epoch": 15.374592833876221, |
| "grad_norm": 0.5767205357551575, |
| "learning_rate": 9.084082355995057e-05, |
| "loss": 0.0105, |
| "step": 4720 |
| }, |
| { |
| "epoch": 15.407166123778502, |
| "grad_norm": 0.5441441535949707, |
| "learning_rate": 9.079307361375882e-05, |
| "loss": 0.0105, |
| "step": 4730 |
| }, |
| { |
| "epoch": 15.439739413680782, |
| "grad_norm": 0.5901165008544922, |
| "learning_rate": 9.074521214086149e-05, |
| "loss": 0.0123, |
| "step": 4740 |
| }, |
| { |
| "epoch": 15.472312703583063, |
| "grad_norm": 0.4205402731895447, |
| "learning_rate": 9.069723927211001e-05, |
| "loss": 0.0103, |
| "step": 4750 |
| }, |
| { |
| "epoch": 15.504885993485342, |
| "grad_norm": 0.6429307460784912, |
| "learning_rate": 9.064915513866037e-05, |
| "loss": 0.0093, |
| "step": 4760 |
| }, |
| { |
| "epoch": 15.537459283387623, |
| "grad_norm": 0.47192737460136414, |
| "learning_rate": 9.060095987197279e-05, |
| "loss": 0.0109, |
| "step": 4770 |
| }, |
| { |
| "epoch": 15.570032573289902, |
| "grad_norm": 0.5266391634941101, |
| "learning_rate": 9.055265360381126e-05, |
| "loss": 0.0097, |
| "step": 4780 |
| }, |
| { |
| "epoch": 15.602605863192183, |
| "grad_norm": 0.485973984003067, |
| "learning_rate": 9.050423646624326e-05, |
| "loss": 0.0101, |
| "step": 4790 |
| }, |
| { |
| "epoch": 15.635179153094462, |
| "grad_norm": 0.5286204218864441, |
| "learning_rate": 9.045570859163943e-05, |
| "loss": 0.0117, |
| "step": 4800 |
| }, |
| { |
| "epoch": 15.667752442996743, |
| "grad_norm": 0.6801819205284119, |
| "learning_rate": 9.04070701126731e-05, |
| "loss": 0.0106, |
| "step": 4810 |
| }, |
| { |
| "epoch": 15.700325732899023, |
| "grad_norm": 0.5797834396362305, |
| "learning_rate": 9.035832116232001e-05, |
| "loss": 0.0115, |
| "step": 4820 |
| }, |
| { |
| "epoch": 15.732899022801304, |
| "grad_norm": 0.7416790723800659, |
| "learning_rate": 9.030946187385796e-05, |
| "loss": 0.0112, |
| "step": 4830 |
| }, |
| { |
| "epoch": 15.765472312703583, |
| "grad_norm": 0.5395382046699524, |
| "learning_rate": 9.026049238086635e-05, |
| "loss": 0.0101, |
| "step": 4840 |
| }, |
| { |
| "epoch": 15.798045602605864, |
| "grad_norm": 0.667809784412384, |
| "learning_rate": 9.021141281722591e-05, |
| "loss": 0.0112, |
| "step": 4850 |
| }, |
| { |
| "epoch": 15.830618892508143, |
| "grad_norm": 0.543626606464386, |
| "learning_rate": 9.01622233171183e-05, |
| "loss": 0.0117, |
| "step": 4860 |
| }, |
| { |
| "epoch": 15.863192182410424, |
| "grad_norm": 0.5138244032859802, |
| "learning_rate": 9.011292401502574e-05, |
| "loss": 0.0106, |
| "step": 4870 |
| }, |
| { |
| "epoch": 15.895765472312704, |
| "grad_norm": 0.491041898727417, |
| "learning_rate": 9.006351504573063e-05, |
| "loss": 0.0126, |
| "step": 4880 |
| }, |
| { |
| "epoch": 15.928338762214985, |
| "grad_norm": 0.3895576298236847, |
| "learning_rate": 9.001399654431519e-05, |
| "loss": 0.0108, |
| "step": 4890 |
| }, |
| { |
| "epoch": 15.960912052117264, |
| "grad_norm": 0.6080408692359924, |
| "learning_rate": 8.996436864616116e-05, |
| "loss": 0.013, |
| "step": 4900 |
| }, |
| { |
| "epoch": 15.993485342019543, |
| "grad_norm": 0.6155601143836975, |
| "learning_rate": 8.991463148694925e-05, |
| "loss": 0.0096, |
| "step": 4910 |
| }, |
| { |
| "epoch": 16.026058631921824, |
| "grad_norm": 0.6588138341903687, |
| "learning_rate": 8.986478520265902e-05, |
| "loss": 0.0104, |
| "step": 4920 |
| }, |
| { |
| "epoch": 16.058631921824105, |
| "grad_norm": 0.6062794327735901, |
| "learning_rate": 8.981482992956827e-05, |
| "loss": 0.0107, |
| "step": 4930 |
| }, |
| { |
| "epoch": 16.091205211726383, |
| "grad_norm": 0.6590397357940674, |
| "learning_rate": 8.976476580425282e-05, |
| "loss": 0.0131, |
| "step": 4940 |
| }, |
| { |
| "epoch": 16.123778501628664, |
| "grad_norm": 0.5601251125335693, |
| "learning_rate": 8.971459296358606e-05, |
| "loss": 0.0121, |
| "step": 4950 |
| }, |
| { |
| "epoch": 16.156351791530945, |
| "grad_norm": 0.5221347212791443, |
| "learning_rate": 8.966431154473864e-05, |
| "loss": 0.0121, |
| "step": 4960 |
| }, |
| { |
| "epoch": 16.188925081433226, |
| "grad_norm": 0.57920902967453, |
| "learning_rate": 8.961392168517803e-05, |
| "loss": 0.0124, |
| "step": 4970 |
| }, |
| { |
| "epoch": 16.221498371335503, |
| "grad_norm": 0.5354865789413452, |
| "learning_rate": 8.956342352266821e-05, |
| "loss": 0.012, |
| "step": 4980 |
| }, |
| { |
| "epoch": 16.254071661237784, |
| "grad_norm": 0.618729293346405, |
| "learning_rate": 8.95128171952692e-05, |
| "loss": 0.0104, |
| "step": 4990 |
| }, |
| { |
| "epoch": 16.286644951140065, |
| "grad_norm": 0.42464056611061096, |
| "learning_rate": 8.946210284133676e-05, |
| "loss": 0.0114, |
| "step": 5000 |
| }, |
| { |
| "epoch": 16.319218241042346, |
| "grad_norm": 0.4815506041049957, |
| "learning_rate": 8.941128059952201e-05, |
| "loss": 0.01, |
| "step": 5010 |
| }, |
| { |
| "epoch": 16.351791530944624, |
| "grad_norm": 0.49649620056152344, |
| "learning_rate": 8.936035060877102e-05, |
| "loss": 0.0106, |
| "step": 5020 |
| }, |
| { |
| "epoch": 16.384364820846905, |
| "grad_norm": 0.5027061700820923, |
| "learning_rate": 8.930931300832443e-05, |
| "loss": 0.0104, |
| "step": 5030 |
| }, |
| { |
| "epoch": 16.416938110749186, |
| "grad_norm": 0.5375000834465027, |
| "learning_rate": 8.925816793771711e-05, |
| "loss": 0.0095, |
| "step": 5040 |
| }, |
| { |
| "epoch": 16.449511400651467, |
| "grad_norm": 0.4966764450073242, |
| "learning_rate": 8.92069155367777e-05, |
| "loss": 0.0114, |
| "step": 5050 |
| }, |
| { |
| "epoch": 16.482084690553744, |
| "grad_norm": 0.7109740376472473, |
| "learning_rate": 8.915555594562834e-05, |
| "loss": 0.0129, |
| "step": 5060 |
| }, |
| { |
| "epoch": 16.514657980456025, |
| "grad_norm": 0.5642775297164917, |
| "learning_rate": 8.910408930468416e-05, |
| "loss": 0.0102, |
| "step": 5070 |
| }, |
| { |
| "epoch": 16.547231270358306, |
| "grad_norm": 0.5686492323875427, |
| "learning_rate": 8.905251575465303e-05, |
| "loss": 0.0102, |
| "step": 5080 |
| }, |
| { |
| "epoch": 16.579804560260587, |
| "grad_norm": 0.5111770033836365, |
| "learning_rate": 8.900083543653502e-05, |
| "loss": 0.0101, |
| "step": 5090 |
| }, |
| { |
| "epoch": 16.612377850162865, |
| "grad_norm": 0.48311617970466614, |
| "learning_rate": 8.894904849162218e-05, |
| "loss": 0.0113, |
| "step": 5100 |
| }, |
| { |
| "epoch": 16.644951140065146, |
| "grad_norm": 0.4612133502960205, |
| "learning_rate": 8.889715506149802e-05, |
| "loss": 0.0097, |
| "step": 5110 |
| }, |
| { |
| "epoch": 16.677524429967427, |
| "grad_norm": 0.6265704035758972, |
| "learning_rate": 8.884515528803722e-05, |
| "loss": 0.0101, |
| "step": 5120 |
| }, |
| { |
| "epoch": 16.710097719869708, |
| "grad_norm": 0.5671060681343079, |
| "learning_rate": 8.879304931340517e-05, |
| "loss": 0.0098, |
| "step": 5130 |
| }, |
| { |
| "epoch": 16.742671009771986, |
| "grad_norm": 0.4591500461101532, |
| "learning_rate": 8.874083728005759e-05, |
| "loss": 0.0106, |
| "step": 5140 |
| }, |
| { |
| "epoch": 16.775244299674267, |
| "grad_norm": 0.7012014389038086, |
| "learning_rate": 8.868851933074021e-05, |
| "loss": 0.0115, |
| "step": 5150 |
| }, |
| { |
| "epoch": 16.807817589576548, |
| "grad_norm": 0.6138771176338196, |
| "learning_rate": 8.863609560848829e-05, |
| "loss": 0.0117, |
| "step": 5160 |
| }, |
| { |
| "epoch": 16.84039087947883, |
| "grad_norm": 0.5631691813468933, |
| "learning_rate": 8.85835662566263e-05, |
| "loss": 0.0096, |
| "step": 5170 |
| }, |
| { |
| "epoch": 16.872964169381106, |
| "grad_norm": 0.6066752076148987, |
| "learning_rate": 8.853093141876747e-05, |
| "loss": 0.0118, |
| "step": 5180 |
| }, |
| { |
| "epoch": 16.905537459283387, |
| "grad_norm": 0.3970053791999817, |
| "learning_rate": 8.847819123881343e-05, |
| "loss": 0.0103, |
| "step": 5190 |
| }, |
| { |
| "epoch": 16.938110749185668, |
| "grad_norm": 0.5872803330421448, |
| "learning_rate": 8.842534586095383e-05, |
| "loss": 0.0093, |
| "step": 5200 |
| }, |
| { |
| "epoch": 16.97068403908795, |
| "grad_norm": 0.5278239846229553, |
| "learning_rate": 8.837239542966593e-05, |
| "loss": 0.0116, |
| "step": 5210 |
| }, |
| { |
| "epoch": 17.003257328990227, |
| "grad_norm": 0.5052501559257507, |
| "learning_rate": 8.831934008971417e-05, |
| "loss": 0.0102, |
| "step": 5220 |
| }, |
| { |
| "epoch": 17.035830618892508, |
| "grad_norm": 0.5707674026489258, |
| "learning_rate": 8.826617998614982e-05, |
| "loss": 0.0085, |
| "step": 5230 |
| }, |
| { |
| "epoch": 17.06840390879479, |
| "grad_norm": 0.5154997706413269, |
| "learning_rate": 8.821291526431056e-05, |
| "loss": 0.0113, |
| "step": 5240 |
| }, |
| { |
| "epoch": 17.10097719869707, |
| "grad_norm": 0.4334968328475952, |
| "learning_rate": 8.815954606982015e-05, |
| "loss": 0.0112, |
| "step": 5250 |
| }, |
| { |
| "epoch": 17.133550488599347, |
| "grad_norm": 0.6048435568809509, |
| "learning_rate": 8.810607254858789e-05, |
| "loss": 0.0117, |
| "step": 5260 |
| }, |
| { |
| "epoch": 17.16612377850163, |
| "grad_norm": 0.4900098145008087, |
| "learning_rate": 8.805249484680838e-05, |
| "loss": 0.0098, |
| "step": 5270 |
| }, |
| { |
| "epoch": 17.19869706840391, |
| "grad_norm": 0.43989741802215576, |
| "learning_rate": 8.799881311096096e-05, |
| "loss": 0.0119, |
| "step": 5280 |
| }, |
| { |
| "epoch": 17.23127035830619, |
| "grad_norm": 0.4381119906902313, |
| "learning_rate": 8.794502748780949e-05, |
| "loss": 0.0098, |
| "step": 5290 |
| }, |
| { |
| "epoch": 17.263843648208468, |
| "grad_norm": 0.5834327340126038, |
| "learning_rate": 8.78911381244018e-05, |
| "loss": 0.0115, |
| "step": 5300 |
| }, |
| { |
| "epoch": 17.29641693811075, |
| "grad_norm": 0.43369826674461365, |
| "learning_rate": 8.783714516806933e-05, |
| "loss": 0.0093, |
| "step": 5310 |
| }, |
| { |
| "epoch": 17.32899022801303, |
| "grad_norm": 0.5850085020065308, |
| "learning_rate": 8.77830487664268e-05, |
| "loss": 0.0112, |
| "step": 5320 |
| }, |
| { |
| "epoch": 17.36156351791531, |
| "grad_norm": 0.5535557270050049, |
| "learning_rate": 8.772884906737167e-05, |
| "loss": 0.0117, |
| "step": 5330 |
| }, |
| { |
| "epoch": 17.39413680781759, |
| "grad_norm": 0.5345462560653687, |
| "learning_rate": 8.767454621908387e-05, |
| "loss": 0.0112, |
| "step": 5340 |
| }, |
| { |
| "epoch": 17.42671009771987, |
| "grad_norm": 0.5913922190666199, |
| "learning_rate": 8.76201403700253e-05, |
| "loss": 0.0118, |
| "step": 5350 |
| }, |
| { |
| "epoch": 17.45928338762215, |
| "grad_norm": 0.5394867658615112, |
| "learning_rate": 8.756563166893949e-05, |
| "loss": 0.0103, |
| "step": 5360 |
| }, |
| { |
| "epoch": 17.49185667752443, |
| "grad_norm": 0.637417197227478, |
| "learning_rate": 8.751102026485113e-05, |
| "loss": 0.0121, |
| "step": 5370 |
| }, |
| { |
| "epoch": 17.52442996742671, |
| "grad_norm": 0.5312429666519165, |
| "learning_rate": 8.745630630706571e-05, |
| "loss": 0.0112, |
| "step": 5380 |
| }, |
| { |
| "epoch": 17.55700325732899, |
| "grad_norm": 0.5353013873100281, |
| "learning_rate": 8.740148994516912e-05, |
| "loss": 0.0104, |
| "step": 5390 |
| }, |
| { |
| "epoch": 17.58957654723127, |
| "grad_norm": 0.5983322858810425, |
| "learning_rate": 8.73465713290272e-05, |
| "loss": 0.0091, |
| "step": 5400 |
| }, |
| { |
| "epoch": 17.622149837133552, |
| "grad_norm": 0.6481793522834778, |
| "learning_rate": 8.729155060878533e-05, |
| "loss": 0.0109, |
| "step": 5410 |
| }, |
| { |
| "epoch": 17.65472312703583, |
| "grad_norm": 0.51454097032547, |
| "learning_rate": 8.723642793486809e-05, |
| "loss": 0.0104, |
| "step": 5420 |
| }, |
| { |
| "epoch": 17.68729641693811, |
| "grad_norm": 0.6376858949661255, |
| "learning_rate": 8.718120345797873e-05, |
| "loss": 0.01, |
| "step": 5430 |
| }, |
| { |
| "epoch": 17.71986970684039, |
| "grad_norm": 0.5232966542243958, |
| "learning_rate": 8.712587732909889e-05, |
| "loss": 0.0104, |
| "step": 5440 |
| }, |
| { |
| "epoch": 17.752442996742673, |
| "grad_norm": 0.40592941641807556, |
| "learning_rate": 8.707044969948806e-05, |
| "loss": 0.0103, |
| "step": 5450 |
| }, |
| { |
| "epoch": 17.78501628664495, |
| "grad_norm": 0.5707107782363892, |
| "learning_rate": 8.701492072068329e-05, |
| "loss": 0.0097, |
| "step": 5460 |
| }, |
| { |
| "epoch": 17.81758957654723, |
| "grad_norm": 0.5730787515640259, |
| "learning_rate": 8.695929054449869e-05, |
| "loss": 0.0109, |
| "step": 5470 |
| }, |
| { |
| "epoch": 17.850162866449512, |
| "grad_norm": 0.5907419323921204, |
| "learning_rate": 8.690355932302501e-05, |
| "loss": 0.0106, |
| "step": 5480 |
| }, |
| { |
| "epoch": 17.88273615635179, |
| "grad_norm": 0.5448195934295654, |
| "learning_rate": 8.684772720862931e-05, |
| "loss": 0.0102, |
| "step": 5490 |
| }, |
| { |
| "epoch": 17.91530944625407, |
| "grad_norm": 0.5052299499511719, |
| "learning_rate": 8.679179435395446e-05, |
| "loss": 0.0102, |
| "step": 5500 |
| }, |
| { |
| "epoch": 17.94788273615635, |
| "grad_norm": 0.57944256067276, |
| "learning_rate": 8.673576091191874e-05, |
| "loss": 0.0115, |
| "step": 5510 |
| }, |
| { |
| "epoch": 17.980456026058633, |
| "grad_norm": 0.5408090949058533, |
| "learning_rate": 8.667962703571541e-05, |
| "loss": 0.0105, |
| "step": 5520 |
| }, |
| { |
| "epoch": 18.01302931596091, |
| "grad_norm": 0.5815545320510864, |
| "learning_rate": 8.662339287881238e-05, |
| "loss": 0.0098, |
| "step": 5530 |
| }, |
| { |
| "epoch": 18.04560260586319, |
| "grad_norm": 0.5216434001922607, |
| "learning_rate": 8.656705859495169e-05, |
| "loss": 0.0108, |
| "step": 5540 |
| }, |
| { |
| "epoch": 18.078175895765472, |
| "grad_norm": 0.41256457567214966, |
| "learning_rate": 8.651062433814912e-05, |
| "loss": 0.0096, |
| "step": 5550 |
| }, |
| { |
| "epoch": 18.110749185667753, |
| "grad_norm": 0.4612780809402466, |
| "learning_rate": 8.645409026269375e-05, |
| "loss": 0.0098, |
| "step": 5560 |
| }, |
| { |
| "epoch": 18.14332247557003, |
| "grad_norm": 0.6036962866783142, |
| "learning_rate": 8.639745652314759e-05, |
| "loss": 0.0109, |
| "step": 5570 |
| }, |
| { |
| "epoch": 18.175895765472312, |
| "grad_norm": 0.45757317543029785, |
| "learning_rate": 8.634072327434515e-05, |
| "loss": 0.0104, |
| "step": 5580 |
| }, |
| { |
| "epoch": 18.208469055374593, |
| "grad_norm": 0.5633235573768616, |
| "learning_rate": 8.628389067139294e-05, |
| "loss": 0.0105, |
| "step": 5590 |
| }, |
| { |
| "epoch": 18.241042345276874, |
| "grad_norm": 0.48329007625579834, |
| "learning_rate": 8.622695886966911e-05, |
| "loss": 0.01, |
| "step": 5600 |
| }, |
| { |
| "epoch": 18.27361563517915, |
| "grad_norm": 0.4269546568393707, |
| "learning_rate": 8.616992802482308e-05, |
| "loss": 0.0123, |
| "step": 5610 |
| }, |
| { |
| "epoch": 18.306188925081432, |
| "grad_norm": 0.4761641323566437, |
| "learning_rate": 8.611279829277496e-05, |
| "loss": 0.0112, |
| "step": 5620 |
| }, |
| { |
| "epoch": 18.338762214983714, |
| "grad_norm": 0.5688640475273132, |
| "learning_rate": 8.605556982971528e-05, |
| "loss": 0.011, |
| "step": 5630 |
| }, |
| { |
| "epoch": 18.371335504885995, |
| "grad_norm": 0.5268420577049255, |
| "learning_rate": 8.599824279210447e-05, |
| "loss": 0.0111, |
| "step": 5640 |
| }, |
| { |
| "epoch": 18.403908794788272, |
| "grad_norm": 0.5499677658081055, |
| "learning_rate": 8.594081733667243e-05, |
| "loss": 0.0087, |
| "step": 5650 |
| }, |
| { |
| "epoch": 18.436482084690553, |
| "grad_norm": 0.592589259147644, |
| "learning_rate": 8.58832936204182e-05, |
| "loss": 0.0095, |
| "step": 5660 |
| }, |
| { |
| "epoch": 18.469055374592834, |
| "grad_norm": 0.4770977795124054, |
| "learning_rate": 8.582567180060942e-05, |
| "loss": 0.0099, |
| "step": 5670 |
| }, |
| { |
| "epoch": 18.501628664495115, |
| "grad_norm": 0.5618078708648682, |
| "learning_rate": 8.576795203478194e-05, |
| "loss": 0.0107, |
| "step": 5680 |
| }, |
| { |
| "epoch": 18.534201954397393, |
| "grad_norm": 0.46116217970848083, |
| "learning_rate": 8.571013448073939e-05, |
| "loss": 0.0114, |
| "step": 5690 |
| }, |
| { |
| "epoch": 18.566775244299674, |
| "grad_norm": 0.4571772515773773, |
| "learning_rate": 8.565221929655275e-05, |
| "loss": 0.0102, |
| "step": 5700 |
| }, |
| { |
| "epoch": 18.599348534201955, |
| "grad_norm": 0.4506760537624359, |
| "learning_rate": 8.559420664055992e-05, |
| "loss": 0.0114, |
| "step": 5710 |
| }, |
| { |
| "epoch": 18.631921824104236, |
| "grad_norm": 0.487032413482666, |
| "learning_rate": 8.553609667136532e-05, |
| "loss": 0.0098, |
| "step": 5720 |
| }, |
| { |
| "epoch": 18.664495114006513, |
| "grad_norm": 0.40095847845077515, |
| "learning_rate": 8.547788954783936e-05, |
| "loss": 0.0093, |
| "step": 5730 |
| }, |
| { |
| "epoch": 18.697068403908794, |
| "grad_norm": 0.43920227885246277, |
| "learning_rate": 8.541958542911808e-05, |
| "loss": 0.0108, |
| "step": 5740 |
| }, |
| { |
| "epoch": 18.729641693811075, |
| "grad_norm": 0.45471495389938354, |
| "learning_rate": 8.536118447460275e-05, |
| "loss": 0.0085, |
| "step": 5750 |
| }, |
| { |
| "epoch": 18.762214983713356, |
| "grad_norm": 0.3742855191230774, |
| "learning_rate": 8.530268684395932e-05, |
| "loss": 0.0098, |
| "step": 5760 |
| }, |
| { |
| "epoch": 18.794788273615634, |
| "grad_norm": 0.5401120781898499, |
| "learning_rate": 8.524409269711807e-05, |
| "loss": 0.011, |
| "step": 5770 |
| }, |
| { |
| "epoch": 18.827361563517915, |
| "grad_norm": 0.4368399679660797, |
| "learning_rate": 8.51854021942732e-05, |
| "loss": 0.0117, |
| "step": 5780 |
| }, |
| { |
| "epoch": 18.859934853420196, |
| "grad_norm": 0.5863309502601624, |
| "learning_rate": 8.512661549588227e-05, |
| "loss": 0.0108, |
| "step": 5790 |
| }, |
| { |
| "epoch": 18.892508143322477, |
| "grad_norm": 0.4833745062351227, |
| "learning_rate": 8.506773276266588e-05, |
| "loss": 0.011, |
| "step": 5800 |
| }, |
| { |
| "epoch": 18.925081433224754, |
| "grad_norm": 0.5938926339149475, |
| "learning_rate": 8.500875415560721e-05, |
| "loss": 0.0107, |
| "step": 5810 |
| }, |
| { |
| "epoch": 18.957654723127035, |
| "grad_norm": 0.47496920824050903, |
| "learning_rate": 8.494967983595144e-05, |
| "loss": 0.0128, |
| "step": 5820 |
| }, |
| { |
| "epoch": 18.990228013029316, |
| "grad_norm": 0.43583986163139343, |
| "learning_rate": 8.489050996520558e-05, |
| "loss": 0.0116, |
| "step": 5830 |
| }, |
| { |
| "epoch": 19.022801302931597, |
| "grad_norm": 0.4981800317764282, |
| "learning_rate": 8.483124470513775e-05, |
| "loss": 0.0097, |
| "step": 5840 |
| }, |
| { |
| "epoch": 19.055374592833875, |
| "grad_norm": 0.5075828433036804, |
| "learning_rate": 8.477188421777692e-05, |
| "loss": 0.0094, |
| "step": 5850 |
| }, |
| { |
| "epoch": 19.087947882736156, |
| "grad_norm": 0.49366408586502075, |
| "learning_rate": 8.47124286654124e-05, |
| "loss": 0.0113, |
| "step": 5860 |
| }, |
| { |
| "epoch": 19.120521172638437, |
| "grad_norm": 0.6152781844139099, |
| "learning_rate": 8.465287821059341e-05, |
| "loss": 0.01, |
| "step": 5870 |
| }, |
| { |
| "epoch": 19.153094462540718, |
| "grad_norm": 0.5412830710411072, |
| "learning_rate": 8.45932330161286e-05, |
| "loss": 0.0107, |
| "step": 5880 |
| }, |
| { |
| "epoch": 19.185667752442995, |
| "grad_norm": 0.46815627813339233, |
| "learning_rate": 8.453349324508567e-05, |
| "loss": 0.0104, |
| "step": 5890 |
| }, |
| { |
| "epoch": 19.218241042345277, |
| "grad_norm": 0.43253058195114136, |
| "learning_rate": 8.447365906079088e-05, |
| "loss": 0.0093, |
| "step": 5900 |
| }, |
| { |
| "epoch": 19.250814332247558, |
| "grad_norm": 0.40782538056373596, |
| "learning_rate": 8.441373062682856e-05, |
| "loss": 0.0104, |
| "step": 5910 |
| }, |
| { |
| "epoch": 19.28338762214984, |
| "grad_norm": 0.4392983317375183, |
| "learning_rate": 8.43537081070408e-05, |
| "loss": 0.0103, |
| "step": 5920 |
| }, |
| { |
| "epoch": 19.315960912052116, |
| "grad_norm": 0.47564175724983215, |
| "learning_rate": 8.429359166552689e-05, |
| "loss": 0.011, |
| "step": 5930 |
| }, |
| { |
| "epoch": 19.348534201954397, |
| "grad_norm": 0.4908657670021057, |
| "learning_rate": 8.423338146664284e-05, |
| "loss": 0.0112, |
| "step": 5940 |
| }, |
| { |
| "epoch": 19.381107491856678, |
| "grad_norm": 0.4445395767688751, |
| "learning_rate": 8.417307767500107e-05, |
| "loss": 0.0104, |
| "step": 5950 |
| }, |
| { |
| "epoch": 19.41368078175896, |
| "grad_norm": 0.38990992307662964, |
| "learning_rate": 8.411268045546983e-05, |
| "loss": 0.0104, |
| "step": 5960 |
| }, |
| { |
| "epoch": 19.446254071661237, |
| "grad_norm": 0.5295190811157227, |
| "learning_rate": 8.405218997317281e-05, |
| "loss": 0.0113, |
| "step": 5970 |
| }, |
| { |
| "epoch": 19.478827361563518, |
| "grad_norm": 0.5957738757133484, |
| "learning_rate": 8.399160639348869e-05, |
| "loss": 0.0136, |
| "step": 5980 |
| }, |
| { |
| "epoch": 19.5114006514658, |
| "grad_norm": 0.5141053199768066, |
| "learning_rate": 8.393092988205065e-05, |
| "loss": 0.0107, |
| "step": 5990 |
| }, |
| { |
| "epoch": 19.54397394136808, |
| "grad_norm": 0.5684329271316528, |
| "learning_rate": 8.387016060474597e-05, |
| "loss": 0.0105, |
| "step": 6000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 66, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|