| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9996113486202876, |
| "eval_steps": 500, |
| "global_step": 1929, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005182018396165306, |
| "grad_norm": 5.4082818031311035, |
| "learning_rate": 6.896551724137931e-07, |
| "loss": 0.2466, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0010364036792330613, |
| "grad_norm": 3.835052013397217, |
| "learning_rate": 1.3793103448275862e-06, |
| "loss": 0.2488, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001554605518849592, |
| "grad_norm": 5.501279354095459, |
| "learning_rate": 2.0689655172413796e-06, |
| "loss": 0.28, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0020728073584661225, |
| "grad_norm": 4.11079216003418, |
| "learning_rate": 2.7586206896551725e-06, |
| "loss": 0.245, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.002591009198082653, |
| "grad_norm": 4.3664326667785645, |
| "learning_rate": 3.448275862068966e-06, |
| "loss": 0.2753, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.003109211037699184, |
| "grad_norm": 5.692226409912109, |
| "learning_rate": 4.137931034482759e-06, |
| "loss": 0.234, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0036274128773157144, |
| "grad_norm": 4.383513450622559, |
| "learning_rate": 4.8275862068965525e-06, |
| "loss": 0.2831, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004145614716932245, |
| "grad_norm": 4.188362121582031, |
| "learning_rate": 5.517241379310345e-06, |
| "loss": 0.2912, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004663816556548776, |
| "grad_norm": 6.689868450164795, |
| "learning_rate": 6.206896551724138e-06, |
| "loss": 0.2132, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.005182018396165306, |
| "grad_norm": 5.252689361572266, |
| "learning_rate": 6.896551724137932e-06, |
| "loss": 0.1583, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005700220235781837, |
| "grad_norm": 4.101102828979492, |
| "learning_rate": 7.586206896551724e-06, |
| "loss": 0.2285, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.006218422075398368, |
| "grad_norm": 3.757774829864502, |
| "learning_rate": 8.275862068965518e-06, |
| "loss": 0.2126, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.006736623915014899, |
| "grad_norm": 3.110372543334961, |
| "learning_rate": 8.965517241379312e-06, |
| "loss": 0.1666, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.007254825754631429, |
| "grad_norm": 1.5280357599258423, |
| "learning_rate": 9.655172413793105e-06, |
| "loss": 0.1976, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00777302759424796, |
| "grad_norm": 2.1373836994171143, |
| "learning_rate": 1.0344827586206898e-05, |
| "loss": 0.1906, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.00829122943386449, |
| "grad_norm": 1.0903468132019043, |
| "learning_rate": 1.103448275862069e-05, |
| "loss": 0.1151, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.00880943127348102, |
| "grad_norm": 1.881485104560852, |
| "learning_rate": 1.1724137931034483e-05, |
| "loss": 0.1236, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.009327633113097552, |
| "grad_norm": 1.7081018686294556, |
| "learning_rate": 1.2413793103448277e-05, |
| "loss": 0.1583, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.009845834952714082, |
| "grad_norm": 1.3075495958328247, |
| "learning_rate": 1.310344827586207e-05, |
| "loss": 0.0965, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.010364036792330613, |
| "grad_norm": 1.3644860982894897, |
| "learning_rate": 1.3793103448275863e-05, |
| "loss": 0.0981, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.010882238631947143, |
| "grad_norm": 0.7825227379798889, |
| "learning_rate": 1.4482758620689657e-05, |
| "loss": 0.0718, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.011400440471563675, |
| "grad_norm": 0.6305981874465942, |
| "learning_rate": 1.5172413793103448e-05, |
| "loss": 0.0577, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.011918642311180205, |
| "grad_norm": 0.3038203716278076, |
| "learning_rate": 1.586206896551724e-05, |
| "loss": 0.0714, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.012436844150796735, |
| "grad_norm": 0.2078988254070282, |
| "learning_rate": 1.6551724137931037e-05, |
| "loss": 0.0471, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.012955045990413265, |
| "grad_norm": 0.20070360600948334, |
| "learning_rate": 1.7241379310344828e-05, |
| "loss": 0.0506, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.013473247830029797, |
| "grad_norm": 0.2870819568634033, |
| "learning_rate": 1.7931034482758623e-05, |
| "loss": 0.0459, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.013991449669646328, |
| "grad_norm": 0.5004845857620239, |
| "learning_rate": 1.8620689655172415e-05, |
| "loss": 0.0744, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.014509651509262858, |
| "grad_norm": 0.32954147458076477, |
| "learning_rate": 1.931034482758621e-05, |
| "loss": 0.0553, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.015027853348879388, |
| "grad_norm": 0.32314950227737427, |
| "learning_rate": 2e-05, |
| "loss": 0.0569, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01554605518849592, |
| "grad_norm": 0.41272085905075073, |
| "learning_rate": 2.0689655172413797e-05, |
| "loss": 0.0765, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01606425702811245, |
| "grad_norm": 0.27000927925109863, |
| "learning_rate": 2.1379310344827585e-05, |
| "loss": 0.0629, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.01658245886772898, |
| "grad_norm": 0.28079530596733093, |
| "learning_rate": 2.206896551724138e-05, |
| "loss": 0.0569, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.017100660707345512, |
| "grad_norm": 0.2628077268600464, |
| "learning_rate": 2.2758620689655175e-05, |
| "loss": 0.0516, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.01761886254696204, |
| "grad_norm": 0.21723352372646332, |
| "learning_rate": 2.3448275862068967e-05, |
| "loss": 0.0464, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.018137064386578573, |
| "grad_norm": 0.18918418884277344, |
| "learning_rate": 2.413793103448276e-05, |
| "loss": 0.0517, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.018655266226195105, |
| "grad_norm": 0.188095822930336, |
| "learning_rate": 2.4827586206896553e-05, |
| "loss": 0.0429, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.019173468065811633, |
| "grad_norm": 0.23603039979934692, |
| "learning_rate": 2.551724137931035e-05, |
| "loss": 0.0458, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.019691669905428165, |
| "grad_norm": 0.21862974762916565, |
| "learning_rate": 2.620689655172414e-05, |
| "loss": 0.0586, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.020209871745044693, |
| "grad_norm": 0.31196755170822144, |
| "learning_rate": 2.6896551724137935e-05, |
| "loss": 0.066, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.020728073584661225, |
| "grad_norm": 0.24622637033462524, |
| "learning_rate": 2.7586206896551727e-05, |
| "loss": 0.0592, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.021246275424277757, |
| "grad_norm": 0.3382416367530823, |
| "learning_rate": 2.8275862068965518e-05, |
| "loss": 0.0632, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.021764477263894286, |
| "grad_norm": 0.19901607930660248, |
| "learning_rate": 2.8965517241379313e-05, |
| "loss": 0.0423, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.022282679103510818, |
| "grad_norm": 0.21523790061473846, |
| "learning_rate": 2.965517241379311e-05, |
| "loss": 0.0476, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.02280088094312735, |
| "grad_norm": 0.18428052961826324, |
| "learning_rate": 3.0344827586206897e-05, |
| "loss": 0.0515, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.023319082782743878, |
| "grad_norm": 0.16002054512500763, |
| "learning_rate": 3.103448275862069e-05, |
| "loss": 0.043, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02383728462236041, |
| "grad_norm": 0.26238277554512024, |
| "learning_rate": 3.172413793103448e-05, |
| "loss": 0.0568, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.02435548646197694, |
| "grad_norm": 0.18134547770023346, |
| "learning_rate": 3.2413793103448275e-05, |
| "loss": 0.0456, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02487368830159347, |
| "grad_norm": 0.1988374888896942, |
| "learning_rate": 3.310344827586207e-05, |
| "loss": 0.0491, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.025391890141210002, |
| "grad_norm": 0.13628098368644714, |
| "learning_rate": 3.3793103448275865e-05, |
| "loss": 0.0408, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.02591009198082653, |
| "grad_norm": 0.19092515110969543, |
| "learning_rate": 3.4482758620689657e-05, |
| "loss": 0.0569, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.026428293820443063, |
| "grad_norm": 0.23275373876094818, |
| "learning_rate": 3.517241379310345e-05, |
| "loss": 0.0468, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.026946495660059595, |
| "grad_norm": 0.28532490134239197, |
| "learning_rate": 3.586206896551725e-05, |
| "loss": 0.0536, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.027464697499676123, |
| "grad_norm": 0.1914573460817337, |
| "learning_rate": 3.655172413793104e-05, |
| "loss": 0.0392, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.027982899339292655, |
| "grad_norm": 0.17403478920459747, |
| "learning_rate": 3.724137931034483e-05, |
| "loss": 0.0509, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.028501101178909184, |
| "grad_norm": 0.16115383803844452, |
| "learning_rate": 3.793103448275862e-05, |
| "loss": 0.0502, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.029019303018525715, |
| "grad_norm": 0.17531007528305054, |
| "learning_rate": 3.862068965517242e-05, |
| "loss": 0.0466, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.029537504858142247, |
| "grad_norm": 0.19580255448818207, |
| "learning_rate": 3.931034482758621e-05, |
| "loss": 0.0503, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.030055706697758776, |
| "grad_norm": 0.15387947857379913, |
| "learning_rate": 4e-05, |
| "loss": 0.0365, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.030573908537375308, |
| "grad_norm": 0.1713232398033142, |
| "learning_rate": 3.999997180630039e-05, |
| "loss": 0.035, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03109211037699184, |
| "grad_norm": 0.2288016676902771, |
| "learning_rate": 3.9999887225281024e-05, |
| "loss": 0.0388, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03161031221660837, |
| "grad_norm": 0.23962470889091492, |
| "learning_rate": 3.9999746257180374e-05, |
| "loss": 0.0483, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0321285140562249, |
| "grad_norm": 0.1429242640733719, |
| "learning_rate": 3.9999548902395895e-05, |
| "loss": 0.038, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03264671589584143, |
| "grad_norm": 0.2013135850429535, |
| "learning_rate": 3.999929516148398e-05, |
| "loss": 0.0515, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.03316491773545796, |
| "grad_norm": 0.14072024822235107, |
| "learning_rate": 3.999898503516004e-05, |
| "loss": 0.0413, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.03368311957507449, |
| "grad_norm": 0.201124906539917, |
| "learning_rate": 3.999861852429842e-05, |
| "loss": 0.0394, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.034201321414691024, |
| "grad_norm": 0.1803581863641739, |
| "learning_rate": 3.999819562993246e-05, |
| "loss": 0.0443, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03471952325430755, |
| "grad_norm": 0.26812461018562317, |
| "learning_rate": 3.9997716353254456e-05, |
| "loss": 0.0505, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.03523772509392408, |
| "grad_norm": 0.2087101936340332, |
| "learning_rate": 3.999718069561565e-05, |
| "loss": 0.0491, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03575592693354061, |
| "grad_norm": 0.145724818110466, |
| "learning_rate": 3.999658865852628e-05, |
| "loss": 0.0256, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.036274128773157145, |
| "grad_norm": 0.5044896006584167, |
| "learning_rate": 3.999594024365551e-05, |
| "loss": 0.0593, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03679233061277368, |
| "grad_norm": 0.3319794535636902, |
| "learning_rate": 3.999523545283146e-05, |
| "loss": 0.0527, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03731053245239021, |
| "grad_norm": 0.16803030669689178, |
| "learning_rate": 3.999447428804119e-05, |
| "loss": 0.0406, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.037828734292006734, |
| "grad_norm": 0.18300779163837433, |
| "learning_rate": 3.999365675143071e-05, |
| "loss": 0.0415, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.038346936131623266, |
| "grad_norm": 0.26276132464408875, |
| "learning_rate": 3.999278284530498e-05, |
| "loss": 0.0488, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0388651379712398, |
| "grad_norm": 0.18061015009880066, |
| "learning_rate": 3.999185257212782e-05, |
| "loss": 0.0457, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03938333981085633, |
| "grad_norm": 0.15918686985969543, |
| "learning_rate": 3.999086593452205e-05, |
| "loss": 0.0315, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03990154165047286, |
| "grad_norm": 0.2396080493927002, |
| "learning_rate": 3.998982293526935e-05, |
| "loss": 0.054, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.04041974349008939, |
| "grad_norm": 0.18997341394424438, |
| "learning_rate": 3.998872357731033e-05, |
| "loss": 0.0453, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.04093794532970592, |
| "grad_norm": 0.16604867577552795, |
| "learning_rate": 3.998756786374448e-05, |
| "loss": 0.0412, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.04145614716932245, |
| "grad_norm": 0.17019790410995483, |
| "learning_rate": 3.998635579783019e-05, |
| "loss": 0.0322, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04197434900893898, |
| "grad_norm": 0.17750458419322968, |
| "learning_rate": 3.9985087382984716e-05, |
| "loss": 0.0336, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.042492550848555515, |
| "grad_norm": 0.33200982213020325, |
| "learning_rate": 3.998376262278419e-05, |
| "loss": 0.0478, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.043010752688172046, |
| "grad_norm": 0.26823684573173523, |
| "learning_rate": 3.99823815209636e-05, |
| "loss": 0.0374, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04352895452778857, |
| "grad_norm": 0.1756100207567215, |
| "learning_rate": 3.99809440814168e-05, |
| "loss": 0.0259, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0440471563674051, |
| "grad_norm": 0.26577091217041016, |
| "learning_rate": 3.997945030819644e-05, |
| "loss": 0.0328, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.044565358207021635, |
| "grad_norm": 0.2032586932182312, |
| "learning_rate": 3.997790020551403e-05, |
| "loss": 0.0323, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04508356004663817, |
| "grad_norm": 0.16400256752967834, |
| "learning_rate": 3.997629377773988e-05, |
| "loss": 0.0316, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0456017618862547, |
| "grad_norm": 0.13048818707466125, |
| "learning_rate": 3.997463102940311e-05, |
| "loss": 0.0227, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.046119963725871224, |
| "grad_norm": 0.200350821018219, |
| "learning_rate": 3.997291196519161e-05, |
| "loss": 0.0152, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.046638165565487756, |
| "grad_norm": 0.14069367945194244, |
| "learning_rate": 3.997113658995207e-05, |
| "loss": 0.0232, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.04715636740510429, |
| "grad_norm": 0.2060389518737793, |
| "learning_rate": 3.9969304908689934e-05, |
| "loss": 0.0313, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04767456924472082, |
| "grad_norm": 0.5540250539779663, |
| "learning_rate": 3.996741692656938e-05, |
| "loss": 0.0637, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.04819277108433735, |
| "grad_norm": 0.38743162155151367, |
| "learning_rate": 3.996547264891332e-05, |
| "loss": 0.0501, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.04871097292395388, |
| "grad_norm": 0.33079805970191956, |
| "learning_rate": 3.996347208120341e-05, |
| "loss": 0.0491, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04922917476357041, |
| "grad_norm": 0.2402544617652893, |
| "learning_rate": 3.996141522907998e-05, |
| "loss": 0.0308, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04974737660318694, |
| "grad_norm": 0.151661679148674, |
| "learning_rate": 3.995930209834206e-05, |
| "loss": 0.0288, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.05026557844280347, |
| "grad_norm": 0.28846490383148193, |
| "learning_rate": 3.995713269494734e-05, |
| "loss": 0.0417, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.050783780282420005, |
| "grad_norm": 0.20232760906219482, |
| "learning_rate": 3.995490702501218e-05, |
| "loss": 0.0338, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.05130198212203654, |
| "grad_norm": 0.15520133078098297, |
| "learning_rate": 3.995262509481157e-05, |
| "loss": 0.0269, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.05182018396165306, |
| "grad_norm": 0.19639192521572113, |
| "learning_rate": 3.9950286910779106e-05, |
| "loss": 0.0391, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.052338385801269594, |
| "grad_norm": 0.21983079612255096, |
| "learning_rate": 3.9947892479507e-05, |
| "loss": 0.0378, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.052856587640886125, |
| "grad_norm": 0.21260757744312286, |
| "learning_rate": 3.994544180774603e-05, |
| "loss": 0.0379, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.05337478948050266, |
| "grad_norm": 0.19602173566818237, |
| "learning_rate": 3.9942934902405564e-05, |
| "loss": 0.039, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05389299132011919, |
| "grad_norm": 0.1708235889673233, |
| "learning_rate": 3.9940371770553484e-05, |
| "loss": 0.0379, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.054411193159735714, |
| "grad_norm": 0.18058320879936218, |
| "learning_rate": 3.99377524194162e-05, |
| "loss": 0.0337, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.054929394999352246, |
| "grad_norm": 0.20318712294101715, |
| "learning_rate": 3.9935076856378646e-05, |
| "loss": 0.0309, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.05544759683896878, |
| "grad_norm": 0.1709553748369217, |
| "learning_rate": 3.993234508898422e-05, |
| "loss": 0.0365, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.05596579867858531, |
| "grad_norm": 0.3021198809146881, |
| "learning_rate": 3.992955712493477e-05, |
| "loss": 0.043, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.05648400051820184, |
| "grad_norm": 0.20962610840797424, |
| "learning_rate": 3.9926712972090624e-05, |
| "loss": 0.0318, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05700220235781837, |
| "grad_norm": 0.12813960015773773, |
| "learning_rate": 3.992381263847048e-05, |
| "loss": 0.035, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0575204041974349, |
| "grad_norm": 0.16706594824790955, |
| "learning_rate": 3.992085613225147e-05, |
| "loss": 0.0339, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.05803860603705143, |
| "grad_norm": 0.21272806823253632, |
| "learning_rate": 3.991784346176906e-05, |
| "loss": 0.0394, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05855680787666796, |
| "grad_norm": 0.11596448719501495, |
| "learning_rate": 3.99147746355171e-05, |
| "loss": 0.0333, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.059075009716284495, |
| "grad_norm": 0.19818900525569916, |
| "learning_rate": 3.991164966214773e-05, |
| "loss": 0.0294, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05959321155590103, |
| "grad_norm": 0.4205951690673828, |
| "learning_rate": 3.990846855047141e-05, |
| "loss": 0.0628, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.06011141339551755, |
| "grad_norm": 0.23930928111076355, |
| "learning_rate": 3.9905231309456884e-05, |
| "loss": 0.045, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.060629615235134084, |
| "grad_norm": 0.3190346658229828, |
| "learning_rate": 3.9901937948231124e-05, |
| "loss": 0.044, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.061147817074750616, |
| "grad_norm": 0.21698309481143951, |
| "learning_rate": 3.989858847607932e-05, |
| "loss": 0.0417, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.06166601891436715, |
| "grad_norm": 0.14343884587287903, |
| "learning_rate": 3.9895182902444895e-05, |
| "loss": 0.0345, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.06218422075398368, |
| "grad_norm": 0.2800877094268799, |
| "learning_rate": 3.989172123692941e-05, |
| "loss": 0.0431, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0627024225936002, |
| "grad_norm": 0.21430440247058868, |
| "learning_rate": 3.988820348929258e-05, |
| "loss": 0.0365, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.06322062443321674, |
| "grad_norm": 0.18680894374847412, |
| "learning_rate": 3.988462966945224e-05, |
| "loss": 0.0277, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.06373882627283327, |
| "grad_norm": 0.18713238835334778, |
| "learning_rate": 3.988099978748431e-05, |
| "loss": 0.0359, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0642570281124498, |
| "grad_norm": 0.16081608831882477, |
| "learning_rate": 3.987731385362277e-05, |
| "loss": 0.0256, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.06477522995206633, |
| "grad_norm": 0.13975271582603455, |
| "learning_rate": 3.987357187825963e-05, |
| "loss": 0.0272, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06529343179168286, |
| "grad_norm": 0.18400432169437408, |
| "learning_rate": 3.98697738719449e-05, |
| "loss": 0.0295, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0658116336312994, |
| "grad_norm": 0.19633497297763824, |
| "learning_rate": 3.986591984538658e-05, |
| "loss": 0.0304, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.06632983547091592, |
| "grad_norm": 0.12133795768022537, |
| "learning_rate": 3.986200980945057e-05, |
| "loss": 0.0253, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.06684803731053245, |
| "grad_norm": 0.22974886000156403, |
| "learning_rate": 3.985804377516074e-05, |
| "loss": 0.0377, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.06736623915014898, |
| "grad_norm": 0.1624593436717987, |
| "learning_rate": 3.985402175369878e-05, |
| "loss": 0.0252, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06788444098976551, |
| "grad_norm": 0.1626574844121933, |
| "learning_rate": 3.984994375640427e-05, |
| "loss": 0.03, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06840264282938205, |
| "grad_norm": 0.17271150648593903, |
| "learning_rate": 3.984580979477459e-05, |
| "loss": 0.0288, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06892084466899857, |
| "grad_norm": 0.38058826327323914, |
| "learning_rate": 3.984161988046491e-05, |
| "loss": 0.0392, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.0694390465086151, |
| "grad_norm": 0.14003795385360718, |
| "learning_rate": 3.983737402528815e-05, |
| "loss": 0.0278, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.06995724834823164, |
| "grad_norm": 0.22751788794994354, |
| "learning_rate": 3.983307224121494e-05, |
| "loss": 0.0273, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.07047545018784816, |
| "grad_norm": 0.22172273695468903, |
| "learning_rate": 3.9828714540373605e-05, |
| "loss": 0.0306, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.0709936520274647, |
| "grad_norm": 0.24642212688922882, |
| "learning_rate": 3.982430093505011e-05, |
| "loss": 0.0421, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.07151185386708123, |
| "grad_norm": 0.15056243538856506, |
| "learning_rate": 3.9819831437688046e-05, |
| "loss": 0.029, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.07203005570669777, |
| "grad_norm": 0.18077677488327026, |
| "learning_rate": 3.9815306060888585e-05, |
| "loss": 0.0306, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.07254825754631429, |
| "grad_norm": 0.33231616020202637, |
| "learning_rate": 3.981072481741043e-05, |
| "loss": 0.0463, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07306645938593082, |
| "grad_norm": 0.4319797158241272, |
| "learning_rate": 3.980608772016981e-05, |
| "loss": 0.0507, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.07358466122554735, |
| "grad_norm": 0.20595696568489075, |
| "learning_rate": 3.980139478224041e-05, |
| "loss": 0.0353, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.07410286306516388, |
| "grad_norm": 0.21615833044052124, |
| "learning_rate": 3.979664601685336e-05, |
| "loss": 0.0386, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.07462106490478042, |
| "grad_norm": 0.11346781253814697, |
| "learning_rate": 3.979184143739718e-05, |
| "loss": 0.0219, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.07513926674439694, |
| "grad_norm": 0.4296473264694214, |
| "learning_rate": 3.978698105741777e-05, |
| "loss": 0.0483, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.07565746858401347, |
| "grad_norm": 0.18602745234966278, |
| "learning_rate": 3.978206489061833e-05, |
| "loss": 0.0318, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.07617567042363001, |
| "grad_norm": 0.1680869311094284, |
| "learning_rate": 3.9777092950859367e-05, |
| "loss": 0.0312, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.07669387226324653, |
| "grad_norm": 0.29125967621803284, |
| "learning_rate": 3.9772065252158594e-05, |
| "loss": 0.0343, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.07721207410286307, |
| "grad_norm": 0.19228827953338623, |
| "learning_rate": 3.976698180869097e-05, |
| "loss": 0.0336, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0777302759424796, |
| "grad_norm": 0.13979323208332062, |
| "learning_rate": 3.9761842634788606e-05, |
| "loss": 0.0217, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07824847778209612, |
| "grad_norm": 0.23602689802646637, |
| "learning_rate": 3.975664774494073e-05, |
| "loss": 0.0435, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.07876667962171266, |
| "grad_norm": 0.19915208220481873, |
| "learning_rate": 3.975139715379364e-05, |
| "loss": 0.0248, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.07928488146132918, |
| "grad_norm": 0.15488211810588837, |
| "learning_rate": 3.9746090876150736e-05, |
| "loss": 0.0226, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07980308330094572, |
| "grad_norm": 0.27361470460891724, |
| "learning_rate": 3.974072892697234e-05, |
| "loss": 0.0412, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.08032128514056225, |
| "grad_norm": 0.12932442128658295, |
| "learning_rate": 3.973531132137579e-05, |
| "loss": 0.0214, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.08083948698017877, |
| "grad_norm": 0.13963568210601807, |
| "learning_rate": 3.972983807463531e-05, |
| "loss": 0.0242, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.08135768881979531, |
| "grad_norm": 0.22062024474143982, |
| "learning_rate": 3.9724309202182014e-05, |
| "loss": 0.0303, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.08187589065941184, |
| "grad_norm": 0.3001038730144501, |
| "learning_rate": 3.9718724719603836e-05, |
| "loss": 0.0353, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.08239409249902838, |
| "grad_norm": 0.2961874008178711, |
| "learning_rate": 3.9713084642645504e-05, |
| "loss": 0.0298, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0829122943386449, |
| "grad_norm": 0.30271297693252563, |
| "learning_rate": 3.970738898720847e-05, |
| "loss": 0.0353, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08343049617826143, |
| "grad_norm": 0.18537764251232147, |
| "learning_rate": 3.9701637769350906e-05, |
| "loss": 0.0276, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.08394869801787797, |
| "grad_norm": 0.45981863141059875, |
| "learning_rate": 3.969583100528762e-05, |
| "loss": 0.0332, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.08446689985749449, |
| "grad_norm": 0.3007035255432129, |
| "learning_rate": 3.968996871139002e-05, |
| "loss": 0.0417, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.08498510169711103, |
| "grad_norm": 0.2484988421201706, |
| "learning_rate": 3.9684050904186094e-05, |
| "loss": 0.0332, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.08550330353672755, |
| "grad_norm": 0.1978878676891327, |
| "learning_rate": 3.9678077600360316e-05, |
| "loss": 0.0267, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.08602150537634409, |
| "grad_norm": 0.14420898258686066, |
| "learning_rate": 3.9672048816753654e-05, |
| "loss": 0.0224, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.08653970721596062, |
| "grad_norm": 0.2479882538318634, |
| "learning_rate": 3.966596457036347e-05, |
| "loss": 0.0317, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.08705790905557714, |
| "grad_norm": 0.2385600358247757, |
| "learning_rate": 3.96598248783435e-05, |
| "loss": 0.0362, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.08757611089519368, |
| "grad_norm": 0.30355146527290344, |
| "learning_rate": 3.965362975800382e-05, |
| "loss": 0.0372, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.0880943127348102, |
| "grad_norm": 0.440682590007782, |
| "learning_rate": 3.964737922681077e-05, |
| "loss": 0.0434, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08861251457442675, |
| "grad_norm": 0.1279064565896988, |
| "learning_rate": 3.964107330238689e-05, |
| "loss": 0.0164, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.08913071641404327, |
| "grad_norm": 0.2853245735168457, |
| "learning_rate": 3.9634712002510925e-05, |
| "loss": 0.0453, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.0896489182536598, |
| "grad_norm": 0.20429256558418274, |
| "learning_rate": 3.962829534511774e-05, |
| "loss": 0.024, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.09016712009327633, |
| "grad_norm": 0.4442828595638275, |
| "learning_rate": 3.962182334829825e-05, |
| "loss": 0.0525, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.09068532193289286, |
| "grad_norm": 0.2360175997018814, |
| "learning_rate": 3.961529603029942e-05, |
| "loss": 0.0282, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0912035237725094, |
| "grad_norm": 0.09913711994886398, |
| "learning_rate": 3.9608713409524175e-05, |
| "loss": 0.0217, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.09172172561212592, |
| "grad_norm": 0.18438151478767395, |
| "learning_rate": 3.9602075504531356e-05, |
| "loss": 0.0312, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.09223992745174245, |
| "grad_norm": 0.23954534530639648, |
| "learning_rate": 3.959538233403567e-05, |
| "loss": 0.0206, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.09275812929135899, |
| "grad_norm": 0.28380924463272095, |
| "learning_rate": 3.9588633916907635e-05, |
| "loss": 0.0388, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.09327633113097551, |
| "grad_norm": 0.16261892020702362, |
| "learning_rate": 3.9581830272173556e-05, |
| "loss": 0.0191, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.09379453297059205, |
| "grad_norm": 0.1379668265581131, |
| "learning_rate": 3.95749714190154e-05, |
| "loss": 0.0289, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.09431273481020858, |
| "grad_norm": 0.29320958256721497, |
| "learning_rate": 3.9568057376770826e-05, |
| "loss": 0.0227, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0948309366498251, |
| "grad_norm": 0.3081492781639099, |
| "learning_rate": 3.956108816493309e-05, |
| "loss": 0.0388, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.09534913848944164, |
| "grad_norm": 0.14883936941623688, |
| "learning_rate": 3.955406380315094e-05, |
| "loss": 0.0262, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.09586734032905816, |
| "grad_norm": 0.18635591864585876, |
| "learning_rate": 3.954698431122868e-05, |
| "loss": 0.0204, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.0963855421686747, |
| "grad_norm": 0.2801106572151184, |
| "learning_rate": 3.953984970912601e-05, |
| "loss": 0.0223, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.09690374400829123, |
| "grad_norm": 0.4112246334552765, |
| "learning_rate": 3.953266001695802e-05, |
| "loss": 0.0434, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.09742194584790775, |
| "grad_norm": 0.19749994575977325, |
| "learning_rate": 3.95254152549951e-05, |
| "loss": 0.0266, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.09794014768752429, |
| "grad_norm": 0.1635226160287857, |
| "learning_rate": 3.951811544366292e-05, |
| "loss": 0.0216, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.09845834952714082, |
| "grad_norm": 0.15637846291065216, |
| "learning_rate": 3.9510760603542354e-05, |
| "loss": 0.0294, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.09897655136675736, |
| "grad_norm": 0.376310259103775, |
| "learning_rate": 3.950335075536941e-05, |
| "loss": 0.0426, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.09949475320637388, |
| "grad_norm": 0.32836809754371643, |
| "learning_rate": 3.949588592003518e-05, |
| "loss": 0.023, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.1000129550459904, |
| "grad_norm": 0.14829964935779572, |
| "learning_rate": 3.948836611858582e-05, |
| "loss": 0.0296, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.10053115688560695, |
| "grad_norm": 0.13914668560028076, |
| "learning_rate": 3.948079137222242e-05, |
| "loss": 0.0209, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.10104935872522347, |
| "grad_norm": 0.28442588448524475, |
| "learning_rate": 3.9473161702301e-05, |
| "loss": 0.0308, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.10156756056484001, |
| "grad_norm": 0.1649635136127472, |
| "learning_rate": 3.9465477130332405e-05, |
| "loss": 0.0183, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.10208576240445653, |
| "grad_norm": 0.22778348624706268, |
| "learning_rate": 3.945773767798231e-05, |
| "loss": 0.0298, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.10260396424407307, |
| "grad_norm": 0.5452020764350891, |
| "learning_rate": 3.944994336707107e-05, |
| "loss": 0.0299, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.1031221660836896, |
| "grad_norm": 0.20926618576049805, |
| "learning_rate": 3.944209421957375e-05, |
| "loss": 0.0268, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.10364036792330612, |
| "grad_norm": 0.15481847524642944, |
| "learning_rate": 3.943419025762e-05, |
| "loss": 0.021, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.10415856976292266, |
| "grad_norm": 0.8367764353752136, |
| "learning_rate": 3.942623150349399e-05, |
| "loss": 0.0549, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.10467677160253919, |
| "grad_norm": 0.37851259112358093, |
| "learning_rate": 3.9418217979634425e-05, |
| "loss": 0.0382, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.10519497344215573, |
| "grad_norm": 0.506738543510437, |
| "learning_rate": 3.941014970863437e-05, |
| "loss": 0.0491, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.10571317528177225, |
| "grad_norm": 0.37913596630096436, |
| "learning_rate": 3.940202671324127e-05, |
| "loss": 0.0275, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.10623137712138878, |
| "grad_norm": 0.20694205164909363, |
| "learning_rate": 3.9393849016356866e-05, |
| "loss": 0.0182, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.10674957896100531, |
| "grad_norm": 0.2608841061592102, |
| "learning_rate": 3.9385616641037104e-05, |
| "loss": 0.0296, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.10726778080062184, |
| "grad_norm": 0.18457987904548645, |
| "learning_rate": 3.9377329610492084e-05, |
| "loss": 0.0202, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.10778598264023838, |
| "grad_norm": 0.34302592277526855, |
| "learning_rate": 3.9368987948086024e-05, |
| "loss": 0.0348, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.1083041844798549, |
| "grad_norm": 0.25883880257606506, |
| "learning_rate": 3.9360591677337166e-05, |
| "loss": 0.0264, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.10882238631947143, |
| "grad_norm": 0.1379159390926361, |
| "learning_rate": 3.935214082191768e-05, |
| "loss": 0.0198, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.10934058815908797, |
| "grad_norm": 0.1821633279323578, |
| "learning_rate": 3.934363540565366e-05, |
| "loss": 0.0215, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.10985878999870449, |
| "grad_norm": 0.18341860175132751, |
| "learning_rate": 3.933507545252504e-05, |
| "loss": 0.0209, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.11037699183832103, |
| "grad_norm": 0.22454297542572021, |
| "learning_rate": 3.9326460986665476e-05, |
| "loss": 0.0294, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.11089519367793756, |
| "grad_norm": 0.16194742918014526, |
| "learning_rate": 3.931779203236233e-05, |
| "loss": 0.0254, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.11141339551755408, |
| "grad_norm": 0.47000735998153687, |
| "learning_rate": 3.9309068614056615e-05, |
| "loss": 0.048, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.11193159735717062, |
| "grad_norm": 0.28025323152542114, |
| "learning_rate": 3.930029075634286e-05, |
| "loss": 0.0253, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.11244979919678715, |
| "grad_norm": 0.5879072546958923, |
| "learning_rate": 3.9291458483969086e-05, |
| "loss": 0.062, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.11296800103640368, |
| "grad_norm": 0.18934841454029083, |
| "learning_rate": 3.928257182183674e-05, |
| "loss": 0.0318, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.11348620287602021, |
| "grad_norm": 0.21203094720840454, |
| "learning_rate": 3.9273630795000626e-05, |
| "loss": 0.026, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.11400440471563673, |
| "grad_norm": 0.25133925676345825, |
| "learning_rate": 3.926463542866879e-05, |
| "loss": 0.0183, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.11452260655525327, |
| "grad_norm": 0.25150206685066223, |
| "learning_rate": 3.92555857482025e-05, |
| "loss": 0.0223, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.1150408083948698, |
| "grad_norm": 0.20962639153003693, |
| "learning_rate": 3.924648177911616e-05, |
| "loss": 0.0309, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.11555901023448634, |
| "grad_norm": 0.21745115518569946, |
| "learning_rate": 3.9237323547077216e-05, |
| "loss": 0.0257, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.11607721207410286, |
| "grad_norm": 0.2105790674686432, |
| "learning_rate": 3.922811107790613e-05, |
| "loss": 0.0251, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.11659541391371939, |
| "grad_norm": 0.1776510328054428, |
| "learning_rate": 3.921884439757624e-05, |
| "loss": 0.0211, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.11711361575333593, |
| "grad_norm": 0.4401101768016815, |
| "learning_rate": 3.920952353221376e-05, |
| "loss": 0.0413, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.11763181759295245, |
| "grad_norm": 0.33000439405441284, |
| "learning_rate": 3.9200148508097656e-05, |
| "loss": 0.0378, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.11815001943256899, |
| "grad_norm": 0.22733037173748016, |
| "learning_rate": 3.919071935165958e-05, |
| "loss": 0.0309, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.11866822127218551, |
| "grad_norm": 0.27271509170532227, |
| "learning_rate": 3.918123608948382e-05, |
| "loss": 0.0332, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.11918642311180205, |
| "grad_norm": 0.28250646591186523, |
| "learning_rate": 3.91716987483072e-05, |
| "loss": 0.0274, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.11970462495141858, |
| "grad_norm": 0.18986068665981293, |
| "learning_rate": 3.916210735501902e-05, |
| "loss": 0.0188, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.1202228267910351, |
| "grad_norm": 0.13927146792411804, |
| "learning_rate": 3.915246193666096e-05, |
| "loss": 0.0202, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.12074102863065164, |
| "grad_norm": 0.31036004424095154, |
| "learning_rate": 3.914276252042702e-05, |
| "loss": 0.024, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.12125923047026817, |
| "grad_norm": 0.21623092889785767, |
| "learning_rate": 3.913300913366344e-05, |
| "loss": 0.024, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1217774323098847, |
| "grad_norm": 0.17337322235107422, |
| "learning_rate": 3.9123201803868644e-05, |
| "loss": 0.0162, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.12229563414950123, |
| "grad_norm": 0.2597239315509796, |
| "learning_rate": 3.911334055869309e-05, |
| "loss": 0.0277, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.12281383598911776, |
| "grad_norm": 0.23021075129508972, |
| "learning_rate": 3.910342542593932e-05, |
| "loss": 0.0263, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.1233320378287343, |
| "grad_norm": 0.38447242975234985, |
| "learning_rate": 3.909345643356172e-05, |
| "loss": 0.0293, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.12385023966835082, |
| "grad_norm": 0.20466025173664093, |
| "learning_rate": 3.908343360966659e-05, |
| "loss": 0.021, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.12436844150796736, |
| "grad_norm": 0.35317835211753845, |
| "learning_rate": 3.9073356982511975e-05, |
| "loss": 0.0427, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.12488664334758388, |
| "grad_norm": 0.5810201168060303, |
| "learning_rate": 3.906322658050761e-05, |
| "loss": 0.05, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.1254048451872004, |
| "grad_norm": 0.3113636374473572, |
| "learning_rate": 3.905304243221485e-05, |
| "loss": 0.0313, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.12592304702681695, |
| "grad_norm": 0.1605708748102188, |
| "learning_rate": 3.904280456634659e-05, |
| "loss": 0.0222, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.1264412488664335, |
| "grad_norm": 0.35602137446403503, |
| "learning_rate": 3.9032513011767136e-05, |
| "loss": 0.0224, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.12695945070605, |
| "grad_norm": 0.42479732632637024, |
| "learning_rate": 3.90221677974922e-05, |
| "loss": 0.0314, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.12747765254566654, |
| "grad_norm": 0.24160167574882507, |
| "learning_rate": 3.901176895268877e-05, |
| "loss": 0.0236, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.12799585438528308, |
| "grad_norm": 0.5197824835777283, |
| "learning_rate": 3.900131650667504e-05, |
| "loss": 0.0314, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.1285140562248996, |
| "grad_norm": 0.1657523363828659, |
| "learning_rate": 3.899081048892031e-05, |
| "loss": 0.0207, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.12903225806451613, |
| "grad_norm": 0.21127645671367645, |
| "learning_rate": 3.898025092904494e-05, |
| "loss": 0.0259, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.12955045990413266, |
| "grad_norm": 0.19938185811042786, |
| "learning_rate": 3.896963785682024e-05, |
| "loss": 0.024, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1300686617437492, |
| "grad_norm": 0.6112692952156067, |
| "learning_rate": 3.895897130216838e-05, |
| "loss": 0.0594, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.13058686358336571, |
| "grad_norm": 0.29001185297966003, |
| "learning_rate": 3.894825129516232e-05, |
| "loss": 0.0308, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.13110506542298225, |
| "grad_norm": 0.12033861130475998, |
| "learning_rate": 3.893747786602573e-05, |
| "loss": 0.0173, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.1316232672625988, |
| "grad_norm": 0.21433091163635254, |
| "learning_rate": 3.89266510451329e-05, |
| "loss": 0.0299, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.1321414691022153, |
| "grad_norm": 0.2604752779006958, |
| "learning_rate": 3.8915770863008635e-05, |
| "loss": 0.0209, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.13265967094183184, |
| "grad_norm": 0.2170797735452652, |
| "learning_rate": 3.8904837350328194e-05, |
| "loss": 0.0221, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.13317787278144838, |
| "grad_norm": 0.23096159100532532, |
| "learning_rate": 3.88938505379172e-05, |
| "loss": 0.0337, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.1336960746210649, |
| "grad_norm": 0.1898171603679657, |
| "learning_rate": 3.888281045675153e-05, |
| "loss": 0.0211, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.13421427646068143, |
| "grad_norm": 0.18458768725395203, |
| "learning_rate": 3.887171713795727e-05, |
| "loss": 0.0211, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.13473247830029797, |
| "grad_norm": 0.17833667993545532, |
| "learning_rate": 3.886057061281058e-05, |
| "loss": 0.0318, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1352506801399145, |
| "grad_norm": 0.1271020621061325, |
| "learning_rate": 3.8849370912737644e-05, |
| "loss": 0.0161, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.13576888197953102, |
| "grad_norm": 0.23673047125339508, |
| "learning_rate": 3.883811806931456e-05, |
| "loss": 0.038, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.13628708381914756, |
| "grad_norm": 0.16474123299121857, |
| "learning_rate": 3.8826812114267264e-05, |
| "loss": 0.0258, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.1368052856587641, |
| "grad_norm": 0.2002090960741043, |
| "learning_rate": 3.881545307947141e-05, |
| "loss": 0.0238, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1373234874983806, |
| "grad_norm": 0.19066838920116425, |
| "learning_rate": 3.880404099695232e-05, |
| "loss": 0.0214, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.13784168933799715, |
| "grad_norm": 0.245769664645195, |
| "learning_rate": 3.879257589888489e-05, |
| "loss": 0.0345, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.1383598911776137, |
| "grad_norm": 0.27019011974334717, |
| "learning_rate": 3.878105781759347e-05, |
| "loss": 0.0387, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.1388780930172302, |
| "grad_norm": 0.20554135739803314, |
| "learning_rate": 3.876948678555179e-05, |
| "loss": 0.0209, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.13939629485684674, |
| "grad_norm": 0.273275762796402, |
| "learning_rate": 3.875786283538287e-05, |
| "loss": 0.0209, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.13991449669646328, |
| "grad_norm": 0.1820681393146515, |
| "learning_rate": 3.8746185999858925e-05, |
| "loss": 0.0235, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.14043269853607981, |
| "grad_norm": 0.1960431933403015, |
| "learning_rate": 3.873445631190127e-05, |
| "loss": 0.0217, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.14095090037569633, |
| "grad_norm": 0.3486228883266449, |
| "learning_rate": 3.872267380458024e-05, |
| "loss": 0.0279, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.14146910221531286, |
| "grad_norm": 0.25267505645751953, |
| "learning_rate": 3.871083851111508e-05, |
| "loss": 0.0269, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.1419873040549294, |
| "grad_norm": 0.26706644892692566, |
| "learning_rate": 3.8698950464873874e-05, |
| "loss": 0.0253, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.14250550589454591, |
| "grad_norm": 0.271297812461853, |
| "learning_rate": 3.86870096993734e-05, |
| "loss": 0.0362, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.14302370773416245, |
| "grad_norm": 0.3143746554851532, |
| "learning_rate": 3.867501624827911e-05, |
| "loss": 0.0286, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.143541909573779, |
| "grad_norm": 0.28915753960609436, |
| "learning_rate": 3.866297014540497e-05, |
| "loss": 0.0223, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.14406011141339553, |
| "grad_norm": 0.24351872503757477, |
| "learning_rate": 3.8650871424713406e-05, |
| "loss": 0.0204, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.14457831325301204, |
| "grad_norm": 0.22745080292224884, |
| "learning_rate": 3.863872012031519e-05, |
| "loss": 0.0201, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.14509651509262858, |
| "grad_norm": 0.17342780530452728, |
| "learning_rate": 3.8626516266469346e-05, |
| "loss": 0.0154, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.14561471693224512, |
| "grad_norm": 0.32193371653556824, |
| "learning_rate": 3.8614259897583035e-05, |
| "loss": 0.0296, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.14613291877186163, |
| "grad_norm": 0.23850269615650177, |
| "learning_rate": 3.8601951048211516e-05, |
| "loss": 0.0382, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.14665112061147817, |
| "grad_norm": 0.13823804259300232, |
| "learning_rate": 3.8589589753057986e-05, |
| "loss": 0.0142, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1471693224510947, |
| "grad_norm": 0.5433732271194458, |
| "learning_rate": 3.85771760469735e-05, |
| "loss": 0.0404, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.14768752429071122, |
| "grad_norm": 0.43869102001190186, |
| "learning_rate": 3.856470996495689e-05, |
| "loss": 0.0531, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.14820572613032776, |
| "grad_norm": 0.19134990870952606, |
| "learning_rate": 3.855219154215466e-05, |
| "loss": 0.0205, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1487239279699443, |
| "grad_norm": 0.35158365964889526, |
| "learning_rate": 3.8539620813860875e-05, |
| "loss": 0.0283, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.14924212980956084, |
| "grad_norm": 0.18466360867023468, |
| "learning_rate": 3.8526997815517064e-05, |
| "loss": 0.02, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.14976033164917735, |
| "grad_norm": 0.1774568259716034, |
| "learning_rate": 3.851432258271213e-05, |
| "loss": 0.023, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.1502785334887939, |
| "grad_norm": 0.10834086686372757, |
| "learning_rate": 3.850159515118224e-05, |
| "loss": 0.0119, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.15079673532841043, |
| "grad_norm": 0.3568941056728363, |
| "learning_rate": 3.8488815556810746e-05, |
| "loss": 0.035, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.15131493716802694, |
| "grad_norm": 0.09321002662181854, |
| "learning_rate": 3.847598383562803e-05, |
| "loss": 0.0167, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.15183313900764348, |
| "grad_norm": 0.24882569909095764, |
| "learning_rate": 3.846310002381148e-05, |
| "loss": 0.032, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.15235134084726001, |
| "grad_norm": 0.4001385271549225, |
| "learning_rate": 3.845016415768532e-05, |
| "loss": 0.0365, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.15286954268687653, |
| "grad_norm": 0.08750130981206894, |
| "learning_rate": 3.8437176273720546e-05, |
| "loss": 0.0161, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.15338774452649306, |
| "grad_norm": 0.158023864030838, |
| "learning_rate": 3.8424136408534814e-05, |
| "loss": 0.0181, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.1539059463661096, |
| "grad_norm": 0.12722338736057281, |
| "learning_rate": 3.841104459889232e-05, |
| "loss": 0.0193, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.15442414820572614, |
| "grad_norm": 0.16069327294826508, |
| "learning_rate": 3.839790088170371e-05, |
| "loss": 0.0202, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.15494235004534265, |
| "grad_norm": 0.16501739621162415, |
| "learning_rate": 3.838470529402599e-05, |
| "loss": 0.0223, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1554605518849592, |
| "grad_norm": 0.1982022225856781, |
| "learning_rate": 3.8371457873062425e-05, |
| "loss": 0.0207, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.15597875372457573, |
| "grad_norm": 0.22107556462287903, |
| "learning_rate": 3.835815865616237e-05, |
| "loss": 0.0371, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.15649695556419224, |
| "grad_norm": 0.2059648483991623, |
| "learning_rate": 3.834480768082125e-05, |
| "loss": 0.0198, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.15701515740380878, |
| "grad_norm": 0.24241840839385986, |
| "learning_rate": 3.83314049846804e-05, |
| "loss": 0.0216, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.15753335924342532, |
| "grad_norm": 0.1592680811882019, |
| "learning_rate": 3.831795060552698e-05, |
| "loss": 0.0274, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.15805156108304186, |
| "grad_norm": 0.16157729923725128, |
| "learning_rate": 3.830444458129386e-05, |
| "loss": 0.0197, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.15856976292265837, |
| "grad_norm": 0.1381078064441681, |
| "learning_rate": 3.829088695005952e-05, |
| "loss": 0.028, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.1590879647622749, |
| "grad_norm": 0.1145080178976059, |
| "learning_rate": 3.827727775004794e-05, |
| "loss": 0.0151, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.15960616660189145, |
| "grad_norm": 0.12582014501094818, |
| "learning_rate": 3.8263617019628495e-05, |
| "loss": 0.017, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.16012436844150796, |
| "grad_norm": 0.11775611340999603, |
| "learning_rate": 3.8249904797315825e-05, |
| "loss": 0.021, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1606425702811245, |
| "grad_norm": 0.18558956682682037, |
| "learning_rate": 3.823614112176977e-05, |
| "loss": 0.0229, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.16116077212074104, |
| "grad_norm": 0.2732170820236206, |
| "learning_rate": 3.822232603179521e-05, |
| "loss": 0.0351, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.16167897396035755, |
| "grad_norm": 0.36403414607048035, |
| "learning_rate": 3.820845956634201e-05, |
| "loss": 0.0217, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.16219717579997409, |
| "grad_norm": 0.16710692644119263, |
| "learning_rate": 3.819454176450486e-05, |
| "loss": 0.0211, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.16271537763959062, |
| "grad_norm": 0.11457782238721848, |
| "learning_rate": 3.818057266552319e-05, |
| "loss": 0.0265, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.16323357947920716, |
| "grad_norm": 0.2501565217971802, |
| "learning_rate": 3.816655230878106e-05, |
| "loss": 0.0323, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.16375178131882367, |
| "grad_norm": 0.13976308703422546, |
| "learning_rate": 3.815248073380704e-05, |
| "loss": 0.0215, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.1642699831584402, |
| "grad_norm": 0.14425045251846313, |
| "learning_rate": 3.8138357980274106e-05, |
| "loss": 0.0209, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.16478818499805675, |
| "grad_norm": 0.17892557382583618, |
| "learning_rate": 3.812418408799953e-05, |
| "loss": 0.0249, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.16530638683767326, |
| "grad_norm": 0.14994150400161743, |
| "learning_rate": 3.810995909694476e-05, |
| "loss": 0.0212, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.1658245886772898, |
| "grad_norm": 0.2975420355796814, |
| "learning_rate": 3.80956830472153e-05, |
| "loss": 0.0269, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.16634279051690634, |
| "grad_norm": 0.16391150653362274, |
| "learning_rate": 3.808135597906061e-05, |
| "loss": 0.0173, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.16686099235652285, |
| "grad_norm": 0.4943293631076813, |
| "learning_rate": 3.8066977932874014e-05, |
| "loss": 0.0466, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1673791941961394, |
| "grad_norm": 0.2048560231924057, |
| "learning_rate": 3.8052548949192534e-05, |
| "loss": 0.0213, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.16789739603575593, |
| "grad_norm": 0.3236941397190094, |
| "learning_rate": 3.803806906869682e-05, |
| "loss": 0.0385, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.16841559787537247, |
| "grad_norm": 0.10196578502655029, |
| "learning_rate": 3.8023538332210994e-05, |
| "loss": 0.0137, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.16893379971498898, |
| "grad_norm": 0.1597544252872467, |
| "learning_rate": 3.80089567807026e-05, |
| "loss": 0.0226, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.16945200155460552, |
| "grad_norm": 0.12884768843650818, |
| "learning_rate": 3.799432445528241e-05, |
| "loss": 0.0193, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.16997020339422206, |
| "grad_norm": 0.12846127152442932, |
| "learning_rate": 3.797964139720437e-05, |
| "loss": 0.0194, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.17048840523383857, |
| "grad_norm": 0.2257414311170578, |
| "learning_rate": 3.796490764786545e-05, |
| "loss": 0.0258, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.1710066070734551, |
| "grad_norm": 0.1655774712562561, |
| "learning_rate": 3.795012324880554e-05, |
| "loss": 0.0261, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.17152480891307165, |
| "grad_norm": 0.8630039095878601, |
| "learning_rate": 3.793528824170733e-05, |
| "loss": 0.0519, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.17204301075268819, |
| "grad_norm": 0.3237569332122803, |
| "learning_rate": 3.792040266839621e-05, |
| "loss": 0.018, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.1725612125923047, |
| "grad_norm": 0.1244291141629219, |
| "learning_rate": 3.79054665708401e-05, |
| "loss": 0.0224, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.17307941443192124, |
| "grad_norm": 0.12003582715988159, |
| "learning_rate": 3.7890479991149384e-05, |
| "loss": 0.0191, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.17359761627153777, |
| "grad_norm": 0.1280771940946579, |
| "learning_rate": 3.787544297157678e-05, |
| "loss": 0.0161, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.17411581811115429, |
| "grad_norm": 0.5037534236907959, |
| "learning_rate": 3.786035555451721e-05, |
| "loss": 0.0544, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.17463401995077082, |
| "grad_norm": 0.13476350903511047, |
| "learning_rate": 3.7845217782507686e-05, |
| "loss": 0.0218, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.17515222179038736, |
| "grad_norm": 0.11884070932865143, |
| "learning_rate": 3.783002969822718e-05, |
| "loss": 0.0228, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.17567042363000387, |
| "grad_norm": 0.1743396818637848, |
| "learning_rate": 3.781479134449654e-05, |
| "loss": 0.0251, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.1761886254696204, |
| "grad_norm": 0.26071441173553467, |
| "learning_rate": 3.779950276427829e-05, |
| "loss": 0.0266, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.17670682730923695, |
| "grad_norm": 0.16687913239002228, |
| "learning_rate": 3.778416400067662e-05, |
| "loss": 0.0176, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1772250291488535, |
| "grad_norm": 0.11515403538942337, |
| "learning_rate": 3.7768775096937176e-05, |
| "loss": 0.0186, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.17774323098847, |
| "grad_norm": 0.2612086534500122, |
| "learning_rate": 3.7753336096446954e-05, |
| "loss": 0.0262, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.17826143282808654, |
| "grad_norm": 0.2220304310321808, |
| "learning_rate": 3.773784704273424e-05, |
| "loss": 0.0376, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.17877963466770308, |
| "grad_norm": 0.2760023772716522, |
| "learning_rate": 3.7722307979468374e-05, |
| "loss": 0.0227, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.1792978365073196, |
| "grad_norm": 0.1418105512857437, |
| "learning_rate": 3.770671895045974e-05, |
| "loss": 0.0256, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.17981603834693613, |
| "grad_norm": 0.17588967084884644, |
| "learning_rate": 3.769107999965958e-05, |
| "loss": 0.0184, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.18033424018655267, |
| "grad_norm": 0.17219921946525574, |
| "learning_rate": 3.767539117115988e-05, |
| "loss": 0.0195, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.18085244202616918, |
| "grad_norm": 0.22888055443763733, |
| "learning_rate": 3.765965250919324e-05, |
| "loss": 0.0276, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.18137064386578572, |
| "grad_norm": 0.15111926198005676, |
| "learning_rate": 3.764386405813278e-05, |
| "loss": 0.0222, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.18188884570540226, |
| "grad_norm": 0.10396119952201843, |
| "learning_rate": 3.7628025862492e-05, |
| "loss": 0.0169, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.1824070475450188, |
| "grad_norm": 0.1553802788257599, |
| "learning_rate": 3.7612137966924606e-05, |
| "loss": 0.0205, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1829252493846353, |
| "grad_norm": 0.21649691462516785, |
| "learning_rate": 3.759620041622447e-05, |
| "loss": 0.0311, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.18344345122425185, |
| "grad_norm": 0.1523357480764389, |
| "learning_rate": 3.758021325532544e-05, |
| "loss": 0.0263, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.18396165306386839, |
| "grad_norm": 0.20473290979862213, |
| "learning_rate": 3.756417652930124e-05, |
| "loss": 0.0274, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.1844798549034849, |
| "grad_norm": 0.3731272220611572, |
| "learning_rate": 3.7548090283365325e-05, |
| "loss": 0.0481, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.18499805674310144, |
| "grad_norm": 0.2392759472131729, |
| "learning_rate": 3.753195456287079e-05, |
| "loss": 0.0188, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.18551625858271797, |
| "grad_norm": 0.1472381055355072, |
| "learning_rate": 3.751576941331017e-05, |
| "loss": 0.0166, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.18603446042233449, |
| "grad_norm": 0.15394793450832367, |
| "learning_rate": 3.749953488031542e-05, |
| "loss": 0.0122, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.18655266226195102, |
| "grad_norm": 0.13148321211338043, |
| "learning_rate": 3.748325100965769e-05, |
| "loss": 0.0237, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.18707086410156756, |
| "grad_norm": 0.22537775337696075, |
| "learning_rate": 3.7466917847247225e-05, |
| "loss": 0.0192, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.1875890659411841, |
| "grad_norm": 0.10962450504302979, |
| "learning_rate": 3.745053543913325e-05, |
| "loss": 0.0209, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1881072677808006, |
| "grad_norm": 0.19166776537895203, |
| "learning_rate": 3.7434103831503855e-05, |
| "loss": 0.0303, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.18862546962041715, |
| "grad_norm": 0.20966096222400665, |
| "learning_rate": 3.7417623070685796e-05, |
| "loss": 0.0276, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.1891436714600337, |
| "grad_norm": 0.38217735290527344, |
| "learning_rate": 3.740109320314445e-05, |
| "loss": 0.0349, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1896618732996502, |
| "grad_norm": 0.15686549246311188, |
| "learning_rate": 3.738451427548363e-05, |
| "loss": 0.0186, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.19018007513926674, |
| "grad_norm": 0.34243685007095337, |
| "learning_rate": 3.736788633444546e-05, |
| "loss": 0.0335, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.19069827697888328, |
| "grad_norm": 0.23040959239006042, |
| "learning_rate": 3.735120942691025e-05, |
| "loss": 0.0239, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.19121647881849982, |
| "grad_norm": 0.1737140417098999, |
| "learning_rate": 3.7334483599896394e-05, |
| "loss": 0.0352, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.19173468065811633, |
| "grad_norm": 0.09808428585529327, |
| "learning_rate": 3.731770890056017e-05, |
| "loss": 0.0134, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.19225288249773287, |
| "grad_norm": 0.1002270057797432, |
| "learning_rate": 3.730088537619566e-05, |
| "loss": 0.0189, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.1927710843373494, |
| "grad_norm": 0.1760372668504715, |
| "learning_rate": 3.728401307423462e-05, |
| "loss": 0.0191, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.19328928617696592, |
| "grad_norm": 0.2071564793586731, |
| "learning_rate": 3.7267092042246296e-05, |
| "loss": 0.0352, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.19380748801658246, |
| "grad_norm": 0.11756689846515656, |
| "learning_rate": 3.725012232793734e-05, |
| "loss": 0.0273, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.194325689856199, |
| "grad_norm": 0.20388537645339966, |
| "learning_rate": 3.723310397915166e-05, |
| "loss": 0.0346, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1948438916958155, |
| "grad_norm": 0.17640188336372375, |
| "learning_rate": 3.721603704387026e-05, |
| "loss": 0.0243, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.19536209353543205, |
| "grad_norm": 0.25531336665153503, |
| "learning_rate": 3.7198921570211174e-05, |
| "loss": 0.0293, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.19588029537504859, |
| "grad_norm": 0.36133551597595215, |
| "learning_rate": 3.718175760642923e-05, |
| "loss": 0.0207, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.19639849721466512, |
| "grad_norm": 0.1869734823703766, |
| "learning_rate": 3.716454520091601e-05, |
| "loss": 0.0257, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.19691669905428164, |
| "grad_norm": 0.32143622636795044, |
| "learning_rate": 3.714728440219963e-05, |
| "loss": 0.0264, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.19743490089389817, |
| "grad_norm": 0.24964651465415955, |
| "learning_rate": 3.712997525894469e-05, |
| "loss": 0.0188, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1979531027335147, |
| "grad_norm": 0.2554227113723755, |
| "learning_rate": 3.711261781995206e-05, |
| "loss": 0.0203, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.19847130457313122, |
| "grad_norm": 0.09913673996925354, |
| "learning_rate": 3.709521213415878e-05, |
| "loss": 0.0148, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.19898950641274776, |
| "grad_norm": 0.12040822952985764, |
| "learning_rate": 3.7077758250637914e-05, |
| "loss": 0.0155, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1995077082523643, |
| "grad_norm": 0.17180348932743073, |
| "learning_rate": 3.706025621859842e-05, |
| "loss": 0.0144, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.2000259100919808, |
| "grad_norm": 0.11793264001607895, |
| "learning_rate": 3.704270608738502e-05, |
| "loss": 0.0184, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.20054411193159735, |
| "grad_norm": 0.27998659014701843, |
| "learning_rate": 3.7025107906477995e-05, |
| "loss": 0.022, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.2010623137712139, |
| "grad_norm": 0.16252531111240387, |
| "learning_rate": 3.700746172549315e-05, |
| "loss": 0.0151, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.20158051561083043, |
| "grad_norm": 0.1505972445011139, |
| "learning_rate": 3.698976759418159e-05, |
| "loss": 0.0152, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.20209871745044694, |
| "grad_norm": 0.34772515296936035, |
| "learning_rate": 3.697202556242961e-05, |
| "loss": 0.0403, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.20261691929006348, |
| "grad_norm": 0.19800731539726257, |
| "learning_rate": 3.695423568025858e-05, |
| "loss": 0.027, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.20313512112968002, |
| "grad_norm": 0.16485491394996643, |
| "learning_rate": 3.693639799782475e-05, |
| "loss": 0.0303, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.20365332296929653, |
| "grad_norm": 0.13744878768920898, |
| "learning_rate": 3.691851256541913e-05, |
| "loss": 0.0125, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.20417152480891307, |
| "grad_norm": 0.3705524802207947, |
| "learning_rate": 3.69005794334674e-05, |
| "loss": 0.03, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2046897266485296, |
| "grad_norm": 0.21377843618392944, |
| "learning_rate": 3.688259865252968e-05, |
| "loss": 0.0152, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.20520792848814615, |
| "grad_norm": 0.20547166466712952, |
| "learning_rate": 3.6864570273300436e-05, |
| "loss": 0.0277, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.20572613032776266, |
| "grad_norm": 0.4766762852668762, |
| "learning_rate": 3.6846494346608346e-05, |
| "loss": 0.0287, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.2062443321673792, |
| "grad_norm": 0.3239550292491913, |
| "learning_rate": 3.6828370923416135e-05, |
| "loss": 0.0234, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.20676253400699574, |
| "grad_norm": 0.13522012531757355, |
| "learning_rate": 3.6810200054820435e-05, |
| "loss": 0.0119, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.20728073584661225, |
| "grad_norm": 0.366756409406662, |
| "learning_rate": 3.679198179205165e-05, |
| "loss": 0.0421, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.20779893768622879, |
| "grad_norm": 0.44114238023757935, |
| "learning_rate": 3.677371618647381e-05, |
| "loss": 0.0191, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.20831713952584532, |
| "grad_norm": 0.1942875236272812, |
| "learning_rate": 3.67554032895844e-05, |
| "loss": 0.0175, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.20883534136546184, |
| "grad_norm": 0.4334729313850403, |
| "learning_rate": 3.673704315301426e-05, |
| "loss": 0.0341, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.20935354320507837, |
| "grad_norm": 0.20343036949634552, |
| "learning_rate": 3.6718635828527405e-05, |
| "loss": 0.0169, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2098717450446949, |
| "grad_norm": 0.2867695391178131, |
| "learning_rate": 3.67001813680209e-05, |
| "loss": 0.0182, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.21038994688431145, |
| "grad_norm": 0.375577449798584, |
| "learning_rate": 3.668167982352469e-05, |
| "loss": 0.0249, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.21090814872392796, |
| "grad_norm": 0.33300960063934326, |
| "learning_rate": 3.666313124720146e-05, |
| "loss": 0.0283, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.2114263505635445, |
| "grad_norm": 0.44241946935653687, |
| "learning_rate": 3.664453569134654e-05, |
| "loss": 0.0258, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.21194455240316104, |
| "grad_norm": 0.21153447031974792, |
| "learning_rate": 3.6625893208387654e-05, |
| "loss": 0.0196, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.21246275424277755, |
| "grad_norm": 0.2204204499721527, |
| "learning_rate": 3.660720385088487e-05, |
| "loss": 0.019, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2129809560823941, |
| "grad_norm": 0.329979807138443, |
| "learning_rate": 3.6588467671530404e-05, |
| "loss": 0.0255, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.21349915792201063, |
| "grad_norm": 0.4161871373653412, |
| "learning_rate": 3.656968472314847e-05, |
| "loss": 0.013, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.21401735976162714, |
| "grad_norm": 0.3123794496059418, |
| "learning_rate": 3.655085505869516e-05, |
| "loss": 0.0248, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.21453556160124368, |
| "grad_norm": 0.13111092150211334, |
| "learning_rate": 3.653197873125825e-05, |
| "loss": 0.0072, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.21505376344086022, |
| "grad_norm": 0.30517759919166565, |
| "learning_rate": 3.651305579405709e-05, |
| "loss": 0.0332, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.21557196528047676, |
| "grad_norm": 0.2333979308605194, |
| "learning_rate": 3.649408630044246e-05, |
| "loss": 0.0117, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.21609016712009327, |
| "grad_norm": 0.1910547912120819, |
| "learning_rate": 3.6475070303896364e-05, |
| "loss": 0.0235, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2166083689597098, |
| "grad_norm": 0.3474821448326111, |
| "learning_rate": 3.645600785803193e-05, |
| "loss": 0.0322, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.21712657079932635, |
| "grad_norm": 0.33099234104156494, |
| "learning_rate": 3.643689901659326e-05, |
| "loss": 0.0222, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.21764477263894286, |
| "grad_norm": 0.7437701225280762, |
| "learning_rate": 3.641774383345523e-05, |
| "loss": 0.0412, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2181629744785594, |
| "grad_norm": 0.21985647082328796, |
| "learning_rate": 3.6398542362623406e-05, |
| "loss": 0.0197, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.21868117631817593, |
| "grad_norm": 0.3680473566055298, |
| "learning_rate": 3.637929465823382e-05, |
| "loss": 0.024, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.21919937815779247, |
| "grad_norm": 0.20354367792606354, |
| "learning_rate": 3.6360000774552884e-05, |
| "loss": 0.0261, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.21971757999740898, |
| "grad_norm": 0.3228227496147156, |
| "learning_rate": 3.6340660765977186e-05, |
| "loss": 0.0239, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.22023578183702552, |
| "grad_norm": 0.17572440207004547, |
| "learning_rate": 3.632127468703337e-05, |
| "loss": 0.0125, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.22075398367664206, |
| "grad_norm": 0.2067795842885971, |
| "learning_rate": 3.630184259237797e-05, |
| "loss": 0.0196, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.22127218551625857, |
| "grad_norm": 0.17999672889709473, |
| "learning_rate": 3.628236453679724e-05, |
| "loss": 0.0222, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2217903873558751, |
| "grad_norm": 0.17415858805179596, |
| "learning_rate": 3.6262840575207034e-05, |
| "loss": 0.0218, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.22230858919549165, |
| "grad_norm": 0.21333667635917664, |
| "learning_rate": 3.624327076265261e-05, |
| "loss": 0.0228, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.22282679103510816, |
| "grad_norm": 0.29897600412368774, |
| "learning_rate": 3.622365515430851e-05, |
| "loss": 0.0298, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2233449928747247, |
| "grad_norm": 0.17251932621002197, |
| "learning_rate": 3.6203993805478414e-05, |
| "loss": 0.0211, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.22386319471434124, |
| "grad_norm": 0.11624428629875183, |
| "learning_rate": 3.618428677159492e-05, |
| "loss": 0.0104, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.22438139655395778, |
| "grad_norm": 0.20840483903884888, |
| "learning_rate": 3.6164534108219445e-05, |
| "loss": 0.028, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.2248995983935743, |
| "grad_norm": 0.17955373227596283, |
| "learning_rate": 3.614473587104206e-05, |
| "loss": 0.0206, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.22541780023319083, |
| "grad_norm": 0.32150277495384216, |
| "learning_rate": 3.6124892115881326e-05, |
| "loss": 0.0268, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.22593600207280737, |
| "grad_norm": 0.38776031136512756, |
| "learning_rate": 3.6105002898684115e-05, |
| "loss": 0.0323, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.22645420391242388, |
| "grad_norm": 0.28147077560424805, |
| "learning_rate": 3.60850682755255e-05, |
| "loss": 0.0329, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.22697240575204042, |
| "grad_norm": 0.15004199743270874, |
| "learning_rate": 3.606508830260856e-05, |
| "loss": 0.0156, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.22749060759165696, |
| "grad_norm": 0.13495272397994995, |
| "learning_rate": 3.604506303626423e-05, |
| "loss": 0.0142, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.22800880943127347, |
| "grad_norm": 0.23692473769187927, |
| "learning_rate": 3.602499253295113e-05, |
| "loss": 0.0265, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.22852701127089, |
| "grad_norm": 0.15593603253364563, |
| "learning_rate": 3.600487684925545e-05, |
| "loss": 0.017, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.22904521311050655, |
| "grad_norm": 0.27002614736557007, |
| "learning_rate": 3.5984716041890745e-05, |
| "loss": 0.0345, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.22956341495012308, |
| "grad_norm": 0.2901112139225006, |
| "learning_rate": 3.596451016769778e-05, |
| "loss": 0.0421, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.2300816167897396, |
| "grad_norm": 0.1251426786184311, |
| "learning_rate": 3.5944259283644394e-05, |
| "loss": 0.0156, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.23059981862935613, |
| "grad_norm": 0.3259553015232086, |
| "learning_rate": 3.5923963446825325e-05, |
| "loss": 0.0325, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.23111802046897267, |
| "grad_norm": 0.1895984560251236, |
| "learning_rate": 3.5903622714462045e-05, |
| "loss": 0.0153, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.23163622230858918, |
| "grad_norm": 0.4781612157821655, |
| "learning_rate": 3.5883237143902594e-05, |
| "loss": 0.0491, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.23215442414820572, |
| "grad_norm": 0.3092973530292511, |
| "learning_rate": 3.586280679262144e-05, |
| "loss": 0.0321, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.23267262598782226, |
| "grad_norm": 0.11183629184961319, |
| "learning_rate": 3.584233171821931e-05, |
| "loss": 0.0121, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.23319082782743877, |
| "grad_norm": 0.26677051186561584, |
| "learning_rate": 3.582181197842301e-05, |
| "loss": 0.0261, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2337090296670553, |
| "grad_norm": 0.19713018834590912, |
| "learning_rate": 3.5801247631085286e-05, |
| "loss": 0.0273, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.23422723150667185, |
| "grad_norm": 0.28130441904067993, |
| "learning_rate": 3.578063873418462e-05, |
| "loss": 0.0431, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2347454333462884, |
| "grad_norm": 0.24492007493972778, |
| "learning_rate": 3.5759985345825143e-05, |
| "loss": 0.023, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.2352636351859049, |
| "grad_norm": 0.2576156258583069, |
| "learning_rate": 3.573928752423638e-05, |
| "loss": 0.0276, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.23578183702552144, |
| "grad_norm": 0.2688496708869934, |
| "learning_rate": 3.5718545327773145e-05, |
| "loss": 0.0241, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.23630003886513798, |
| "grad_norm": 0.2165881246328354, |
| "learning_rate": 3.5697758814915376e-05, |
| "loss": 0.0184, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2368182407047545, |
| "grad_norm": 0.10839606076478958, |
| "learning_rate": 3.5676928044267935e-05, |
| "loss": 0.0148, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.23733644254437103, |
| "grad_norm": 0.13416044414043427, |
| "learning_rate": 3.565605307456047e-05, |
| "loss": 0.0174, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.23785464438398757, |
| "grad_norm": 0.19393199682235718, |
| "learning_rate": 3.563513396464726e-05, |
| "loss": 0.0198, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2383728462236041, |
| "grad_norm": 0.1913292557001114, |
| "learning_rate": 3.561417077350699e-05, |
| "loss": 0.0247, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.23889104806322062, |
| "grad_norm": 0.15598498284816742, |
| "learning_rate": 3.559316356024267e-05, |
| "loss": 0.0212, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.23940924990283716, |
| "grad_norm": 0.2367229461669922, |
| "learning_rate": 3.5572112384081395e-05, |
| "loss": 0.03, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.2399274517424537, |
| "grad_norm": 0.15201199054718018, |
| "learning_rate": 3.555101730437423e-05, |
| "loss": 0.0226, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2404456535820702, |
| "grad_norm": 0.20422053337097168, |
| "learning_rate": 3.5529878380596e-05, |
| "loss": 0.0195, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.24096385542168675, |
| "grad_norm": 0.2784031629562378, |
| "learning_rate": 3.5508695672345156e-05, |
| "loss": 0.0218, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.24148205726130328, |
| "grad_norm": 0.17994067072868347, |
| "learning_rate": 3.5487469239343576e-05, |
| "loss": 0.0207, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2420002591009198, |
| "grad_norm": 0.21507667005062103, |
| "learning_rate": 3.546619914143645e-05, |
| "loss": 0.0201, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.24251846094053633, |
| "grad_norm": 0.10343655943870544, |
| "learning_rate": 3.5444885438592036e-05, |
| "loss": 0.0124, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.24303666278015287, |
| "grad_norm": 0.14296704530715942, |
| "learning_rate": 3.542352819090156e-05, |
| "loss": 0.02, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.2435548646197694, |
| "grad_norm": 0.26198479533195496, |
| "learning_rate": 3.5402127458578994e-05, |
| "loss": 0.0297, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.24407306645938592, |
| "grad_norm": 0.21289674937725067, |
| "learning_rate": 3.538068330196093e-05, |
| "loss": 0.0275, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.24459126829900246, |
| "grad_norm": 0.25298917293548584, |
| "learning_rate": 3.535919578150637e-05, |
| "loss": 0.0202, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.245109470138619, |
| "grad_norm": 0.2316911369562149, |
| "learning_rate": 3.5337664957796587e-05, |
| "loss": 0.0323, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2456276719782355, |
| "grad_norm": 0.336458295583725, |
| "learning_rate": 3.531609089153493e-05, |
| "loss": 0.03, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.24614587381785205, |
| "grad_norm": 0.21604609489440918, |
| "learning_rate": 3.52944736435467e-05, |
| "loss": 0.0146, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.2466640756574686, |
| "grad_norm": 0.1912679821252823, |
| "learning_rate": 3.5272813274778885e-05, |
| "loss": 0.0217, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2471822774970851, |
| "grad_norm": 0.3697705864906311, |
| "learning_rate": 3.52511098463001e-05, |
| "loss": 0.0235, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.24770047933670164, |
| "grad_norm": 0.21411247551441193, |
| "learning_rate": 3.522936341930033e-05, |
| "loss": 0.0203, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.24821868117631818, |
| "grad_norm": 0.23912426829338074, |
| "learning_rate": 3.5207574055090786e-05, |
| "loss": 0.0196, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.24873688301593472, |
| "grad_norm": 0.4297958016395569, |
| "learning_rate": 3.518574181510377e-05, |
| "loss": 0.0248, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.24925508485555123, |
| "grad_norm": 0.34526073932647705, |
| "learning_rate": 3.516386676089243e-05, |
| "loss": 0.0293, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.24977328669516777, |
| "grad_norm": 0.26696211099624634, |
| "learning_rate": 3.514194895413065e-05, |
| "loss": 0.0289, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2502914885347843, |
| "grad_norm": 0.22925925254821777, |
| "learning_rate": 3.511998845661282e-05, |
| "loss": 0.0169, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.2508096903744008, |
| "grad_norm": 0.19786757230758667, |
| "learning_rate": 3.5097985330253715e-05, |
| "loss": 0.0167, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.25132789221401736, |
| "grad_norm": 0.12485197931528091, |
| "learning_rate": 3.507593963708828e-05, |
| "loss": 0.0111, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2518460940536339, |
| "grad_norm": 0.21498580276966095, |
| "learning_rate": 3.50538514392715e-05, |
| "loss": 0.0303, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.25236429589325043, |
| "grad_norm": 0.29731622338294983, |
| "learning_rate": 3.503172079907816e-05, |
| "loss": 0.0254, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.252882497732867, |
| "grad_norm": 0.32734227180480957, |
| "learning_rate": 3.500954777890272e-05, |
| "loss": 0.0284, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.25340069957248346, |
| "grad_norm": 0.18852336704730988, |
| "learning_rate": 3.4987332441259134e-05, |
| "loss": 0.0176, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2539189014121, |
| "grad_norm": 0.4650251269340515, |
| "learning_rate": 3.4965074848780665e-05, |
| "loss": 0.0395, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.25443710325171653, |
| "grad_norm": 0.19879503548145294, |
| "learning_rate": 3.494277506421968e-05, |
| "loss": 0.0179, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.2549553050913331, |
| "grad_norm": 0.23383589088916779, |
| "learning_rate": 3.492043315044754e-05, |
| "loss": 0.0175, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.2554735069309496, |
| "grad_norm": 0.12329400330781937, |
| "learning_rate": 3.489804917045436e-05, |
| "loss": 0.0091, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.25599170877056615, |
| "grad_norm": 0.10637083649635315, |
| "learning_rate": 3.487562318734885e-05, |
| "loss": 0.0096, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.2565099106101827, |
| "grad_norm": 0.14903591573238373, |
| "learning_rate": 3.485315526435818e-05, |
| "loss": 0.0185, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2570281124497992, |
| "grad_norm": 0.20365048944950104, |
| "learning_rate": 3.483064546482771e-05, |
| "loss": 0.0177, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.2575463142894157, |
| "grad_norm": 0.22113251686096191, |
| "learning_rate": 3.4808093852220906e-05, |
| "loss": 0.0259, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.25806451612903225, |
| "grad_norm": 0.2057502418756485, |
| "learning_rate": 3.478550049011911e-05, |
| "loss": 0.015, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.2585827179686488, |
| "grad_norm": 0.1919933706521988, |
| "learning_rate": 3.476286544222135e-05, |
| "loss": 0.0208, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.25910091980826533, |
| "grad_norm": 0.19814780354499817, |
| "learning_rate": 3.474018877234422e-05, |
| "loss": 0.0165, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.25961912164788187, |
| "grad_norm": 0.23219355940818787, |
| "learning_rate": 3.471747054442164e-05, |
| "loss": 0.0243, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2601373234874984, |
| "grad_norm": 0.2818119525909424, |
| "learning_rate": 3.469471082250468e-05, |
| "loss": 0.0418, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.2606555253271149, |
| "grad_norm": 0.18665625154972076, |
| "learning_rate": 3.467190967076144e-05, |
| "loss": 0.0176, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.26117372716673143, |
| "grad_norm": 0.1935970038175583, |
| "learning_rate": 3.464906715347679e-05, |
| "loss": 0.0188, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.26169192900634797, |
| "grad_norm": 0.3840036392211914, |
| "learning_rate": 3.462618333505225e-05, |
| "loss": 0.0364, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2622101308459645, |
| "grad_norm": 0.12956850230693817, |
| "learning_rate": 3.4603258280005755e-05, |
| "loss": 0.0106, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.26272833268558105, |
| "grad_norm": 0.2004137635231018, |
| "learning_rate": 3.458029205297151e-05, |
| "loss": 0.0218, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2632465345251976, |
| "grad_norm": 0.11050537973642349, |
| "learning_rate": 3.455728471869983e-05, |
| "loss": 0.0131, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.26376473636481407, |
| "grad_norm": 0.23582395911216736, |
| "learning_rate": 3.4534236342056885e-05, |
| "loss": 0.0252, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.2642829382044306, |
| "grad_norm": 0.19793295860290527, |
| "learning_rate": 3.4511146988024585e-05, |
| "loss": 0.0269, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.26480114004404715, |
| "grad_norm": 0.18839754164218903, |
| "learning_rate": 3.448801672170035e-05, |
| "loss": 0.0193, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2653193418836637, |
| "grad_norm": 0.2332521229982376, |
| "learning_rate": 3.446484560829697e-05, |
| "loss": 0.0262, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.2658375437232802, |
| "grad_norm": 0.17722775042057037, |
| "learning_rate": 3.444163371314238e-05, |
| "loss": 0.0143, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.26635574556289676, |
| "grad_norm": 0.35751911997795105, |
| "learning_rate": 3.441838110167949e-05, |
| "loss": 0.0243, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.2668739474025133, |
| "grad_norm": 0.19578954577445984, |
| "learning_rate": 3.439508783946604e-05, |
| "loss": 0.0264, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.2673921492421298, |
| "grad_norm": 0.22078539431095123, |
| "learning_rate": 3.437175399217433e-05, |
| "loss": 0.0137, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2679103510817463, |
| "grad_norm": 0.12540170550346375, |
| "learning_rate": 3.434837962559112e-05, |
| "loss": 0.0158, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.26842855292136286, |
| "grad_norm": 0.3015592694282532, |
| "learning_rate": 3.432496480561739e-05, |
| "loss": 0.03, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.2689467547609794, |
| "grad_norm": 0.2720678150653839, |
| "learning_rate": 3.430150959826819e-05, |
| "loss": 0.0315, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.26946495660059594, |
| "grad_norm": 0.14795976877212524, |
| "learning_rate": 3.427801406967242e-05, |
| "loss": 0.0212, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2699831584402125, |
| "grad_norm": 0.12961719930171967, |
| "learning_rate": 3.425447828607266e-05, |
| "loss": 0.0101, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.270501360279829, |
| "grad_norm": 0.39186790585517883, |
| "learning_rate": 3.423090231382501e-05, |
| "loss": 0.0316, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.2710195621194455, |
| "grad_norm": 0.1863209307193756, |
| "learning_rate": 3.420728621939884e-05, |
| "loss": 0.0182, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.27153776395906204, |
| "grad_norm": 0.171253502368927, |
| "learning_rate": 3.418363006937668e-05, |
| "loss": 0.0165, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.2720559657986786, |
| "grad_norm": 0.47378459572792053, |
| "learning_rate": 3.4159933930453944e-05, |
| "loss": 0.0442, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.2725741676382951, |
| "grad_norm": 0.21674169600009918, |
| "learning_rate": 3.4136197869438824e-05, |
| "loss": 0.0237, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.27309236947791166, |
| "grad_norm": 0.5017228126525879, |
| "learning_rate": 3.411242195325205e-05, |
| "loss": 0.0245, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.2736105713175282, |
| "grad_norm": 0.21757693588733673, |
| "learning_rate": 3.408860624892675e-05, |
| "loss": 0.0177, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.27412877315714473, |
| "grad_norm": 0.19579505920410156, |
| "learning_rate": 3.406475082360817e-05, |
| "loss": 0.0226, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2746469749967612, |
| "grad_norm": 0.2168528437614441, |
| "learning_rate": 3.4040855744553614e-05, |
| "loss": 0.0226, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.27516517683637776, |
| "grad_norm": 0.14640694856643677, |
| "learning_rate": 3.401692107913213e-05, |
| "loss": 0.0155, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.2756833786759943, |
| "grad_norm": 0.24729685485363007, |
| "learning_rate": 3.39929468948244e-05, |
| "loss": 0.0183, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.27620158051561083, |
| "grad_norm": 0.16490469872951508, |
| "learning_rate": 3.396893325922251e-05, |
| "loss": 0.0149, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2767197823552274, |
| "grad_norm": 0.1809859275817871, |
| "learning_rate": 3.3944880240029804e-05, |
| "loss": 0.0151, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.2772379841948439, |
| "grad_norm": 0.24216516315937042, |
| "learning_rate": 3.392078790506062e-05, |
| "loss": 0.0199, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2777561860344604, |
| "grad_norm": 0.12011023610830307, |
| "learning_rate": 3.3896656322240176e-05, |
| "loss": 0.0127, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.27827438787407693, |
| "grad_norm": 0.1671677827835083, |
| "learning_rate": 3.387248555960433e-05, |
| "loss": 0.0276, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2787925897136935, |
| "grad_norm": 0.33307945728302, |
| "learning_rate": 3.38482756852994e-05, |
| "loss": 0.0236, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.27931079155331, |
| "grad_norm": 0.24255521595478058, |
| "learning_rate": 3.382402676758198e-05, |
| "loss": 0.011, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.27982899339292655, |
| "grad_norm": 0.3321969211101532, |
| "learning_rate": 3.379973887481874e-05, |
| "loss": 0.0303, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2803471952325431, |
| "grad_norm": 0.21098312735557556, |
| "learning_rate": 3.377541207548624e-05, |
| "loss": 0.0206, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.28086539707215963, |
| "grad_norm": 0.7190823554992676, |
| "learning_rate": 3.375104643817073e-05, |
| "loss": 0.0247, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2813835989117761, |
| "grad_norm": 0.2180882841348648, |
| "learning_rate": 3.372664203156794e-05, |
| "loss": 0.0266, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.28190180075139265, |
| "grad_norm": 0.20533742010593414, |
| "learning_rate": 3.3702198924482935e-05, |
| "loss": 0.016, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2824200025910092, |
| "grad_norm": 0.14692696928977966, |
| "learning_rate": 3.3677717185829865e-05, |
| "loss": 0.0237, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.28293820443062573, |
| "grad_norm": 0.25798094272613525, |
| "learning_rate": 3.3653196884631826e-05, |
| "loss": 0.0374, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.28345640627024227, |
| "grad_norm": 0.22581781446933746, |
| "learning_rate": 3.3628638090020604e-05, |
| "loss": 0.0248, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.2839746081098588, |
| "grad_norm": 0.18555013835430145, |
| "learning_rate": 3.3604040871236536e-05, |
| "loss": 0.0172, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.28449280994947534, |
| "grad_norm": 0.18623068928718567, |
| "learning_rate": 3.357940529762827e-05, |
| "loss": 0.0271, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.28501101178909183, |
| "grad_norm": 0.38695311546325684, |
| "learning_rate": 3.355473143865261e-05, |
| "loss": 0.0254, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.28552921362870837, |
| "grad_norm": 0.21091844141483307, |
| "learning_rate": 3.35300193638743e-05, |
| "loss": 0.0145, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.2860474154683249, |
| "grad_norm": 0.284892737865448, |
| "learning_rate": 3.3505269142965807e-05, |
| "loss": 0.017, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.28656561730794144, |
| "grad_norm": 0.1800401210784912, |
| "learning_rate": 3.348048084570717e-05, |
| "loss": 0.012, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.287083819147558, |
| "grad_norm": 0.20696207880973816, |
| "learning_rate": 3.345565454198577e-05, |
| "loss": 0.0183, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.2876020209871745, |
| "grad_norm": 0.21303308010101318, |
| "learning_rate": 3.343079030179613e-05, |
| "loss": 0.0216, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.28812022282679106, |
| "grad_norm": 0.16014441847801208, |
| "learning_rate": 3.340588819523976e-05, |
| "loss": 0.0205, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.28863842466640754, |
| "grad_norm": 0.3861747682094574, |
| "learning_rate": 3.338094829252489e-05, |
| "loss": 0.0283, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2891566265060241, |
| "grad_norm": 0.31511127948760986, |
| "learning_rate": 3.335597066396635e-05, |
| "loss": 0.0188, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2896748283456406, |
| "grad_norm": 0.11825576424598694, |
| "learning_rate": 3.3330955379985316e-05, |
| "loss": 0.0176, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.29019303018525716, |
| "grad_norm": 0.2394377589225769, |
| "learning_rate": 3.3305902511109114e-05, |
| "loss": 0.0318, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2907112320248737, |
| "grad_norm": 0.22725820541381836, |
| "learning_rate": 3.328081212797107e-05, |
| "loss": 0.0228, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.29122943386449024, |
| "grad_norm": 0.3189086616039276, |
| "learning_rate": 3.325568430131023e-05, |
| "loss": 0.0374, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.2917476357041067, |
| "grad_norm": 0.18351779878139496, |
| "learning_rate": 3.323051910197125e-05, |
| "loss": 0.0132, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.29226583754372326, |
| "grad_norm": 0.2662956118583679, |
| "learning_rate": 3.320531660090414e-05, |
| "loss": 0.0254, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2927840393833398, |
| "grad_norm": 0.23380038142204285, |
| "learning_rate": 3.318007686916408e-05, |
| "loss": 0.0204, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.29330224122295634, |
| "grad_norm": 0.21842540800571442, |
| "learning_rate": 3.315479997791119e-05, |
| "loss": 0.0168, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.2938204430625729, |
| "grad_norm": 0.2316482663154602, |
| "learning_rate": 3.312948599841039e-05, |
| "loss": 0.0345, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2943386449021894, |
| "grad_norm": 0.10342805832624435, |
| "learning_rate": 3.3104135002031154e-05, |
| "loss": 0.0098, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.29485684674180596, |
| "grad_norm": 0.6484712958335876, |
| "learning_rate": 3.307874706024732e-05, |
| "loss": 0.0248, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.29537504858142244, |
| "grad_norm": 0.24838627874851227, |
| "learning_rate": 3.305332224463689e-05, |
| "loss": 0.0207, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.295893250421039, |
| "grad_norm": 0.27338987588882446, |
| "learning_rate": 3.302786062688182e-05, |
| "loss": 0.0187, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2964114522606555, |
| "grad_norm": 0.17893199622631073, |
| "learning_rate": 3.300236227876784e-05, |
| "loss": 0.0141, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.29692965410027206, |
| "grad_norm": 0.11885008960962296, |
| "learning_rate": 3.29768272721842e-05, |
| "loss": 0.0151, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2974478559398886, |
| "grad_norm": 0.12600629031658173, |
| "learning_rate": 3.295125567912356e-05, |
| "loss": 0.0152, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.29796605777950513, |
| "grad_norm": 0.25072181224823, |
| "learning_rate": 3.2925647571681697e-05, |
| "loss": 0.0301, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.2984842596191217, |
| "grad_norm": 0.14617246389389038, |
| "learning_rate": 3.290000302205733e-05, |
| "loss": 0.0148, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.29900246145873816, |
| "grad_norm": 0.11175688356161118, |
| "learning_rate": 3.287432210255194e-05, |
| "loss": 0.0131, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2995206632983547, |
| "grad_norm": 0.16760458052158356, |
| "learning_rate": 3.284860488556953e-05, |
| "loss": 0.0225, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.30003886513797123, |
| "grad_norm": 0.21576490998268127, |
| "learning_rate": 3.282285144361646e-05, |
| "loss": 0.0189, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3005570669775878, |
| "grad_norm": 0.29469025135040283, |
| "learning_rate": 3.2797061849301205e-05, |
| "loss": 0.03, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3010752688172043, |
| "grad_norm": 0.16392472386360168, |
| "learning_rate": 3.2771236175334174e-05, |
| "loss": 0.0122, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.30159347065682085, |
| "grad_norm": 0.19024868309497833, |
| "learning_rate": 3.274537449452749e-05, |
| "loss": 0.0191, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.3021116724964374, |
| "grad_norm": 0.1978103071451187, |
| "learning_rate": 3.2719476879794804e-05, |
| "loss": 0.0147, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.3026298743360539, |
| "grad_norm": 0.17215609550476074, |
| "learning_rate": 3.2693543404151084e-05, |
| "loss": 0.0304, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.3031480761756704, |
| "grad_norm": 0.3572210967540741, |
| "learning_rate": 3.266757414071238e-05, |
| "loss": 0.0287, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.30366627801528695, |
| "grad_norm": 0.24866849184036255, |
| "learning_rate": 3.2641569162695654e-05, |
| "loss": 0.0189, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.3041844798549035, |
| "grad_norm": 0.24234169721603394, |
| "learning_rate": 3.2615528543418564e-05, |
| "loss": 0.0272, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.30470268169452003, |
| "grad_norm": 0.15545207262039185, |
| "learning_rate": 3.258945235629924e-05, |
| "loss": 0.0154, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.30522088353413657, |
| "grad_norm": 0.1727120280265808, |
| "learning_rate": 3.256334067485611e-05, |
| "loss": 0.019, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.30573908537375305, |
| "grad_norm": 0.33689263463020325, |
| "learning_rate": 3.2537193572707665e-05, |
| "loss": 0.0228, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3062572872133696, |
| "grad_norm": 0.16162997484207153, |
| "learning_rate": 3.251101112357225e-05, |
| "loss": 0.0091, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.30677548905298613, |
| "grad_norm": 0.1706080287694931, |
| "learning_rate": 3.248479340126789e-05, |
| "loss": 0.0138, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.30729369089260267, |
| "grad_norm": 0.20587295293807983, |
| "learning_rate": 3.245854047971204e-05, |
| "loss": 0.0232, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.3078118927322192, |
| "grad_norm": 0.22837670147418976, |
| "learning_rate": 3.2432252432921385e-05, |
| "loss": 0.0221, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.30833009457183574, |
| "grad_norm": 0.20966672897338867, |
| "learning_rate": 3.240592933501166e-05, |
| "loss": 0.0133, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.3088482964114523, |
| "grad_norm": 0.2917436361312866, |
| "learning_rate": 3.2379571260197434e-05, |
| "loss": 0.0237, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.30936649825106877, |
| "grad_norm": 0.27110305428504944, |
| "learning_rate": 3.235317828279185e-05, |
| "loss": 0.025, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3098847000906853, |
| "grad_norm": 0.31341224908828735, |
| "learning_rate": 3.2326750477206484e-05, |
| "loss": 0.0301, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.31040290193030184, |
| "grad_norm": 0.2728060185909271, |
| "learning_rate": 3.23002879179511e-05, |
| "loss": 0.0326, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.3109211037699184, |
| "grad_norm": 0.20057561993598938, |
| "learning_rate": 3.227379067963343e-05, |
| "loss": 0.0232, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3114393056095349, |
| "grad_norm": 0.21433892846107483, |
| "learning_rate": 3.224725883695901e-05, |
| "loss": 0.0249, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.31195750744915146, |
| "grad_norm": 0.20961421728134155, |
| "learning_rate": 3.222069246473091e-05, |
| "loss": 0.0198, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.312475709288768, |
| "grad_norm": 0.14495067298412323, |
| "learning_rate": 3.2194091637849554e-05, |
| "loss": 0.0125, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.3129939111283845, |
| "grad_norm": 0.26637324690818787, |
| "learning_rate": 3.216745643131254e-05, |
| "loss": 0.0204, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.313512112968001, |
| "grad_norm": 0.2717486023902893, |
| "learning_rate": 3.214078692021434e-05, |
| "loss": 0.0188, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.31403031480761756, |
| "grad_norm": 0.21144232153892517, |
| "learning_rate": 3.211408317974619e-05, |
| "loss": 0.0202, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.3145485166472341, |
| "grad_norm": 0.1410185843706131, |
| "learning_rate": 3.208734528519581e-05, |
| "loss": 0.0149, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.31506671848685064, |
| "grad_norm": 0.2241893708705902, |
| "learning_rate": 3.2060573311947214e-05, |
| "loss": 0.0169, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3155849203264672, |
| "grad_norm": 0.18555960059165955, |
| "learning_rate": 3.20337673354805e-05, |
| "loss": 0.016, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.3161031221660837, |
| "grad_norm": 0.09408245235681534, |
| "learning_rate": 3.200692743137163e-05, |
| "loss": 0.0087, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3166213240057002, |
| "grad_norm": 0.24068722128868103, |
| "learning_rate": 3.1980053675292234e-05, |
| "loss": 0.0179, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.31713952584531674, |
| "grad_norm": 0.20078520476818085, |
| "learning_rate": 3.1953146143009367e-05, |
| "loss": 0.0127, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.3176577276849333, |
| "grad_norm": 0.478952556848526, |
| "learning_rate": 3.192620491038531e-05, |
| "loss": 0.0241, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.3181759295245498, |
| "grad_norm": 0.18030868470668793, |
| "learning_rate": 3.189923005337737e-05, |
| "loss": 0.0131, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.31869413136416636, |
| "grad_norm": 0.2694755494594574, |
| "learning_rate": 3.1872221648037654e-05, |
| "loss": 0.0179, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.3192123332037829, |
| "grad_norm": 0.18882977962493896, |
| "learning_rate": 3.1845179770512845e-05, |
| "loss": 0.026, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.3197305350433994, |
| "grad_norm": 0.29215094447135925, |
| "learning_rate": 3.181810449704399e-05, |
| "loss": 0.0164, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.3202487368830159, |
| "grad_norm": 0.2687048017978668, |
| "learning_rate": 3.179099590396632e-05, |
| "loss": 0.0223, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.32076693872263246, |
| "grad_norm": 0.15531520545482635, |
| "learning_rate": 3.1763854067708985e-05, |
| "loss": 0.0072, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.321285140562249, |
| "grad_norm": 0.2893719971179962, |
| "learning_rate": 3.173667906479485e-05, |
| "loss": 0.0174, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.32180334240186553, |
| "grad_norm": 0.23916292190551758, |
| "learning_rate": 3.170947097184031e-05, |
| "loss": 0.0231, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.32232154424148207, |
| "grad_norm": 0.17515330016613007, |
| "learning_rate": 3.168222986555504e-05, |
| "loss": 0.0141, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.3228397460810986, |
| "grad_norm": 0.2012537568807602, |
| "learning_rate": 3.16549558227418e-05, |
| "loss": 0.0181, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.3233579479207151, |
| "grad_norm": 0.13551956415176392, |
| "learning_rate": 3.162764892029622e-05, |
| "loss": 0.0127, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.32387614976033163, |
| "grad_norm": 0.10975822061300278, |
| "learning_rate": 3.1600309235206536e-05, |
| "loss": 0.0098, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.32439435159994817, |
| "grad_norm": 0.3241739273071289, |
| "learning_rate": 3.1572936844553454e-05, |
| "loss": 0.0321, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.3249125534395647, |
| "grad_norm": 0.31218957901000977, |
| "learning_rate": 3.154553182550986e-05, |
| "loss": 0.0239, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.32543075527918125, |
| "grad_norm": 0.2881108522415161, |
| "learning_rate": 3.151809425534065e-05, |
| "loss": 0.0287, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3259489571187978, |
| "grad_norm": 0.20379316806793213, |
| "learning_rate": 3.1490624211402475e-05, |
| "loss": 0.0171, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.3264671589584143, |
| "grad_norm": 0.21332648396492004, |
| "learning_rate": 3.1463121771143563e-05, |
| "loss": 0.0266, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3269853607980308, |
| "grad_norm": 0.2312246561050415, |
| "learning_rate": 3.143558701210345e-05, |
| "loss": 0.0268, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.32750356263764735, |
| "grad_norm": 0.2009822130203247, |
| "learning_rate": 3.140802001191283e-05, |
| "loss": 0.027, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.3280217644772639, |
| "grad_norm": 0.2293207049369812, |
| "learning_rate": 3.138042084829327e-05, |
| "loss": 0.0331, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.3285399663168804, |
| "grad_norm": 0.14847518503665924, |
| "learning_rate": 3.1352789599057004e-05, |
| "loss": 0.0077, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.32905816815649697, |
| "grad_norm": 0.26872965693473816, |
| "learning_rate": 3.132512634210676e-05, |
| "loss": 0.0272, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.3295763699961135, |
| "grad_norm": 0.08970358967781067, |
| "learning_rate": 3.129743115543551e-05, |
| "loss": 0.0135, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.33009457183573004, |
| "grad_norm": 0.15737628936767578, |
| "learning_rate": 3.1269704117126206e-05, |
| "loss": 0.0174, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3306127736753465, |
| "grad_norm": 0.24024097621440887, |
| "learning_rate": 3.1241945305351637e-05, |
| "loss": 0.0171, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.33113097551496307, |
| "grad_norm": 0.17292901873588562, |
| "learning_rate": 3.121415479837416e-05, |
| "loss": 0.0252, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.3316491773545796, |
| "grad_norm": 0.16394537687301636, |
| "learning_rate": 3.11863326745455e-05, |
| "loss": 0.0171, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.33216737919419614, |
| "grad_norm": 0.17823824286460876, |
| "learning_rate": 3.115847901230652e-05, |
| "loss": 0.0201, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.3326855810338127, |
| "grad_norm": 0.24929647147655487, |
| "learning_rate": 3.113059389018699e-05, |
| "loss": 0.0287, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.3332037828734292, |
| "grad_norm": 0.1503756195306778, |
| "learning_rate": 3.110267738680539e-05, |
| "loss": 0.0141, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.3337219847130457, |
| "grad_norm": 0.13402046263217926, |
| "learning_rate": 3.107472958086867e-05, |
| "loss": 0.0145, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.33424018655266224, |
| "grad_norm": 0.2699407935142517, |
| "learning_rate": 3.104675055117203e-05, |
| "loss": 0.0183, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.3347583883922788, |
| "grad_norm": 0.24766863882541656, |
| "learning_rate": 3.101874037659872e-05, |
| "loss": 0.0308, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.3352765902318953, |
| "grad_norm": 0.10173456370830536, |
| "learning_rate": 3.099069913611977e-05, |
| "loss": 0.014, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.33579479207151186, |
| "grad_norm": 0.13719849288463593, |
| "learning_rate": 3.0962626908793824e-05, |
| "loss": 0.014, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.3363129939111284, |
| "grad_norm": 0.30098456144332886, |
| "learning_rate": 3.0934523773766864e-05, |
| "loss": 0.0194, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.33683119575074494, |
| "grad_norm": 0.25823748111724854, |
| "learning_rate": 3.090638981027203e-05, |
| "loss": 0.0368, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3373493975903614, |
| "grad_norm": 0.13479436933994293, |
| "learning_rate": 3.087822509762938e-05, |
| "loss": 0.0114, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.33786759942997796, |
| "grad_norm": 0.23411042988300323, |
| "learning_rate": 3.085002971524564e-05, |
| "loss": 0.0219, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.3383858012695945, |
| "grad_norm": 0.361751914024353, |
| "learning_rate": 3.0821803742614044e-05, |
| "loss": 0.0487, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.33890400310921104, |
| "grad_norm": 0.2769923508167267, |
| "learning_rate": 3.079354725931405e-05, |
| "loss": 0.0244, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3394222049488276, |
| "grad_norm": 0.28610527515411377, |
| "learning_rate": 3.076526034501113e-05, |
| "loss": 0.0416, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.3399404067884441, |
| "grad_norm": 0.12687546014785767, |
| "learning_rate": 3.0736943079456554e-05, |
| "loss": 0.0164, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.34045860862806065, |
| "grad_norm": 0.10315964370965958, |
| "learning_rate": 3.070859554248719e-05, |
| "loss": 0.0155, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.34097681046767714, |
| "grad_norm": 0.1738579124212265, |
| "learning_rate": 3.0680217814025216e-05, |
| "loss": 0.0211, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.3414950123072937, |
| "grad_norm": 0.13814416527748108, |
| "learning_rate": 3.065180997407795e-05, |
| "loss": 0.0136, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3420132141469102, |
| "grad_norm": 0.20698578655719757, |
| "learning_rate": 3.062337210273761e-05, |
| "loss": 0.0288, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.34253141598652675, |
| "grad_norm": 0.14585328102111816, |
| "learning_rate": 3.059490428018106e-05, |
| "loss": 0.0189, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.3430496178261433, |
| "grad_norm": 0.15746049582958221, |
| "learning_rate": 3.056640658666965e-05, |
| "loss": 0.0222, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.34356781966575983, |
| "grad_norm": 0.14348691701889038, |
| "learning_rate": 3.0537879102548904e-05, |
| "loss": 0.0173, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.34408602150537637, |
| "grad_norm": 0.14485906064510345, |
| "learning_rate": 3.0509321908248347e-05, |
| "loss": 0.0167, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.34460422334499285, |
| "grad_norm": 0.21492420136928558, |
| "learning_rate": 3.0480735084281286e-05, |
| "loss": 0.0217, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.3451224251846094, |
| "grad_norm": 0.1390220671892166, |
| "learning_rate": 3.0452118711244555e-05, |
| "loss": 0.0155, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.34564062702422593, |
| "grad_norm": 0.38242873549461365, |
| "learning_rate": 3.042347286981829e-05, |
| "loss": 0.0359, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.34615882886384247, |
| "grad_norm": 0.1517501026391983, |
| "learning_rate": 3.039479764076572e-05, |
| "loss": 0.0181, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.346677030703459, |
| "grad_norm": 0.30889183282852173, |
| "learning_rate": 3.036609310493292e-05, |
| "loss": 0.036, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.34719523254307555, |
| "grad_norm": 0.24580207467079163, |
| "learning_rate": 3.0337359343248606e-05, |
| "loss": 0.034, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.34771343438269203, |
| "grad_norm": 0.24991121888160706, |
| "learning_rate": 3.030859643672387e-05, |
| "loss": 0.0227, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.34823163622230857, |
| "grad_norm": 0.13242225348949432, |
| "learning_rate": 3.0279804466451996e-05, |
| "loss": 0.0125, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3487498380619251, |
| "grad_norm": 0.12930886447429657, |
| "learning_rate": 3.0250983513608198e-05, |
| "loss": 0.0211, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.34926803990154165, |
| "grad_norm": 0.14146579802036285, |
| "learning_rate": 3.0222133659449404e-05, |
| "loss": 0.0117, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.3497862417411582, |
| "grad_norm": 0.1282532513141632, |
| "learning_rate": 3.0193254985314015e-05, |
| "loss": 0.0169, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3503044435807747, |
| "grad_norm": 0.17600007355213165, |
| "learning_rate": 3.0164347572621717e-05, |
| "loss": 0.0196, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.35082264542039127, |
| "grad_norm": 0.13536961376667023, |
| "learning_rate": 3.0135411502873188e-05, |
| "loss": 0.0131, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.35134084726000775, |
| "grad_norm": 0.33690956234931946, |
| "learning_rate": 3.0106446857649922e-05, |
| "loss": 0.0243, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.3518590490996243, |
| "grad_norm": 0.3075578808784485, |
| "learning_rate": 3.007745371861396e-05, |
| "loss": 0.0241, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3523772509392408, |
| "grad_norm": 0.1268237829208374, |
| "learning_rate": 3.00484321675077e-05, |
| "loss": 0.0104, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.35289545277885737, |
| "grad_norm": 0.15431272983551025, |
| "learning_rate": 3.0019382286153625e-05, |
| "loss": 0.0139, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.3534136546184739, |
| "grad_norm": 0.1659412533044815, |
| "learning_rate": 2.9990304156454088e-05, |
| "loss": 0.0148, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.35393185645809044, |
| "grad_norm": 0.258486270904541, |
| "learning_rate": 2.996119786039111e-05, |
| "loss": 0.0226, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.354450058297707, |
| "grad_norm": 0.12462588399648666, |
| "learning_rate": 2.99320634800261e-05, |
| "loss": 0.0106, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.35496826013732347, |
| "grad_norm": 0.09379307180643082, |
| "learning_rate": 2.9902901097499656e-05, |
| "loss": 0.0057, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.35548646197694, |
| "grad_norm": 0.25576797127723694, |
| "learning_rate": 2.9873710795031324e-05, |
| "loss": 0.018, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.35600466381655654, |
| "grad_norm": 0.14848865568637848, |
| "learning_rate": 2.984449265491937e-05, |
| "loss": 0.0104, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.3565228656561731, |
| "grad_norm": 0.2981567680835724, |
| "learning_rate": 2.9815246759540523e-05, |
| "loss": 0.0229, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.3570410674957896, |
| "grad_norm": 0.15566019713878632, |
| "learning_rate": 2.9785973191349807e-05, |
| "loss": 0.0138, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.35755926933540616, |
| "grad_norm": 0.3972475826740265, |
| "learning_rate": 2.9756672032880226e-05, |
| "loss": 0.0202, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3580774711750227, |
| "grad_norm": 0.18364618718624115, |
| "learning_rate": 2.9727343366742593e-05, |
| "loss": 0.0143, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3585956730146392, |
| "grad_norm": 0.6787497401237488, |
| "learning_rate": 2.969798727562526e-05, |
| "loss": 0.0212, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.3591138748542557, |
| "grad_norm": 0.19514262676239014, |
| "learning_rate": 2.9668603842293914e-05, |
| "loss": 0.0151, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.35963207669387226, |
| "grad_norm": 0.17772996425628662, |
| "learning_rate": 2.9639193149591317e-05, |
| "loss": 0.0171, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.3601502785334888, |
| "grad_norm": 0.1942756623029709, |
| "learning_rate": 2.9609755280437107e-05, |
| "loss": 0.0191, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.36066848037310534, |
| "grad_norm": 0.1511867791414261, |
| "learning_rate": 2.9580290317827512e-05, |
| "loss": 0.004, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.3611866822127219, |
| "grad_norm": 0.35033461451530457, |
| "learning_rate": 2.9550798344835176e-05, |
| "loss": 0.0239, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.36170488405233836, |
| "grad_norm": 0.2694636881351471, |
| "learning_rate": 2.9521279444608875e-05, |
| "loss": 0.0276, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3622230858919549, |
| "grad_norm": 0.2565995752811432, |
| "learning_rate": 2.9491733700373307e-05, |
| "loss": 0.0313, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.36274128773157144, |
| "grad_norm": 0.42613688111305237, |
| "learning_rate": 2.9462161195428862e-05, |
| "loss": 0.0224, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.363259489571188, |
| "grad_norm": 0.2020256370306015, |
| "learning_rate": 2.9432562013151372e-05, |
| "loss": 0.0222, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.3637776914108045, |
| "grad_norm": 0.35377201437950134, |
| "learning_rate": 2.940293623699187e-05, |
| "loss": 0.0265, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.36429589325042105, |
| "grad_norm": 0.42614316940307617, |
| "learning_rate": 2.93732839504764e-05, |
| "loss": 0.0334, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3648140950900376, |
| "grad_norm": 0.26751890778541565, |
| "learning_rate": 2.934360523720571e-05, |
| "loss": 0.0112, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.3653322969296541, |
| "grad_norm": 0.1458798050880432, |
| "learning_rate": 2.9313900180855087e-05, |
| "loss": 0.0113, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.3658504987692706, |
| "grad_norm": 0.20325633883476257, |
| "learning_rate": 2.9284168865174058e-05, |
| "loss": 0.0162, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.36636870060888715, |
| "grad_norm": 0.3195553123950958, |
| "learning_rate": 2.9254411373986218e-05, |
| "loss": 0.018, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3668869024485037, |
| "grad_norm": 0.14034762978553772, |
| "learning_rate": 2.922462779118894e-05, |
| "loss": 0.0124, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.36740510428812023, |
| "grad_norm": 0.12469898164272308, |
| "learning_rate": 2.9194818200753157e-05, |
| "loss": 0.0122, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.36792330612773677, |
| "grad_norm": 0.15942120552062988, |
| "learning_rate": 2.916498268672313e-05, |
| "loss": 0.0159, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3684415079673533, |
| "grad_norm": 0.33030030131340027, |
| "learning_rate": 2.9135121333216218e-05, |
| "loss": 0.0164, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.3689597098069698, |
| "grad_norm": 0.4804418087005615, |
| "learning_rate": 2.910523422442262e-05, |
| "loss": 0.0457, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.36947791164658633, |
| "grad_norm": 0.14172638952732086, |
| "learning_rate": 2.907532144460516e-05, |
| "loss": 0.0131, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.36999611348620287, |
| "grad_norm": 0.14897310733795166, |
| "learning_rate": 2.9045383078099018e-05, |
| "loss": 0.0091, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3705143153258194, |
| "grad_norm": 0.3274158239364624, |
| "learning_rate": 2.901541920931154e-05, |
| "loss": 0.0211, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.37103251716543595, |
| "grad_norm": 0.367117702960968, |
| "learning_rate": 2.8985429922721953e-05, |
| "loss": 0.0283, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3715507190050525, |
| "grad_norm": 0.2538760304450989, |
| "learning_rate": 2.895541530288115e-05, |
| "loss": 0.0162, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.37206892084466897, |
| "grad_norm": 0.16373780369758606, |
| "learning_rate": 2.892537543441144e-05, |
| "loss": 0.0129, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.3725871226842855, |
| "grad_norm": 0.21004892885684967, |
| "learning_rate": 2.889531040200633e-05, |
| "loss": 0.0172, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.37310532452390205, |
| "grad_norm": 0.15073063969612122, |
| "learning_rate": 2.8865220290430275e-05, |
| "loss": 0.018, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3736235263635186, |
| "grad_norm": 0.3549087345600128, |
| "learning_rate": 2.8835105184518438e-05, |
| "loss": 0.025, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.3741417282031351, |
| "grad_norm": 0.3787144720554352, |
| "learning_rate": 2.880496516917642e-05, |
| "loss": 0.0116, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.37465993004275167, |
| "grad_norm": 0.25196373462677, |
| "learning_rate": 2.8774800329380103e-05, |
| "loss": 0.014, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.3751781318823682, |
| "grad_norm": 0.24565114080905914, |
| "learning_rate": 2.874461075017531e-05, |
| "loss": 0.0183, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.3756963337219847, |
| "grad_norm": 0.11303957551717758, |
| "learning_rate": 2.871439651667764e-05, |
| "loss": 0.0088, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3762145355616012, |
| "grad_norm": 0.21302036941051483, |
| "learning_rate": 2.86841577140722e-05, |
| "loss": 0.0176, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.37673273740121777, |
| "grad_norm": 0.31156080961227417, |
| "learning_rate": 2.865389442761336e-05, |
| "loss": 0.0213, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.3772509392408343, |
| "grad_norm": 0.1727733612060547, |
| "learning_rate": 2.8623606742624513e-05, |
| "loss": 0.0161, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.37776914108045084, |
| "grad_norm": 0.3240056037902832, |
| "learning_rate": 2.8593294744497857e-05, |
| "loss": 0.0202, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3782873429200674, |
| "grad_norm": 0.2706719636917114, |
| "learning_rate": 2.8562958518694123e-05, |
| "loss": 0.0228, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3788055447596839, |
| "grad_norm": 0.22442671656608582, |
| "learning_rate": 2.8532598150742364e-05, |
| "loss": 0.029, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3793237465993004, |
| "grad_norm": 0.15026988089084625, |
| "learning_rate": 2.8502213726239678e-05, |
| "loss": 0.0143, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.37984194843891694, |
| "grad_norm": 0.3866124749183655, |
| "learning_rate": 2.8471805330851006e-05, |
| "loss": 0.0396, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.3803601502785335, |
| "grad_norm": 0.1631077378988266, |
| "learning_rate": 2.844137305030886e-05, |
| "loss": 0.0144, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.38087835211815, |
| "grad_norm": 0.258539080619812, |
| "learning_rate": 2.84109169704131e-05, |
| "loss": 0.0221, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.38139655395776656, |
| "grad_norm": 0.09007499366998672, |
| "learning_rate": 2.8380437177030687e-05, |
| "loss": 0.0068, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.3819147557973831, |
| "grad_norm": 0.1124981939792633, |
| "learning_rate": 2.8349933756095427e-05, |
| "loss": 0.0086, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.38243295763699964, |
| "grad_norm": 0.21551914513111115, |
| "learning_rate": 2.8319406793607752e-05, |
| "loss": 0.0114, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.3829511594766161, |
| "grad_norm": 0.1445673406124115, |
| "learning_rate": 2.828885637563447e-05, |
| "loss": 0.0079, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.38346936131623266, |
| "grad_norm": 0.25108802318573, |
| "learning_rate": 2.82582825883085e-05, |
| "loss": 0.0225, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3839875631558492, |
| "grad_norm": 0.1586422175168991, |
| "learning_rate": 2.8227685517828662e-05, |
| "loss": 0.0131, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.38450576499546574, |
| "grad_norm": 0.1375976800918579, |
| "learning_rate": 2.8197065250459422e-05, |
| "loss": 0.0133, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3850239668350823, |
| "grad_norm": 0.08903449028730392, |
| "learning_rate": 2.8166421872530647e-05, |
| "loss": 0.008, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.3855421686746988, |
| "grad_norm": 0.21467937529087067, |
| "learning_rate": 2.813575547043734e-05, |
| "loss": 0.0219, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.3860603705143153, |
| "grad_norm": 0.2640451192855835, |
| "learning_rate": 2.8105066130639457e-05, |
| "loss": 0.0155, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.38657857235393184, |
| "grad_norm": 0.11930115520954132, |
| "learning_rate": 2.8074353939661575e-05, |
| "loss": 0.008, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.3870967741935484, |
| "grad_norm": 0.21493662893772125, |
| "learning_rate": 2.804361898409274e-05, |
| "loss": 0.0091, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3876149760331649, |
| "grad_norm": 0.18691658973693848, |
| "learning_rate": 2.8012861350586155e-05, |
| "loss": 0.0147, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.38813317787278145, |
| "grad_norm": 0.19181039929389954, |
| "learning_rate": 2.798208112585897e-05, |
| "loss": 0.027, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.388651379712398, |
| "grad_norm": 0.2299458533525467, |
| "learning_rate": 2.795127839669203e-05, |
| "loss": 0.0079, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.38916958155201453, |
| "grad_norm": 0.1063414141535759, |
| "learning_rate": 2.792045324992962e-05, |
| "loss": 0.004, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.389687783391631, |
| "grad_norm": 0.22242259979248047, |
| "learning_rate": 2.7889605772479233e-05, |
| "loss": 0.0254, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.39020598523124755, |
| "grad_norm": 0.25379589200019836, |
| "learning_rate": 2.7858736051311325e-05, |
| "loss": 0.0221, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.3907241870708641, |
| "grad_norm": 0.0877365842461586, |
| "learning_rate": 2.782784417345905e-05, |
| "loss": 0.0034, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.39124238891048063, |
| "grad_norm": 0.3404882848262787, |
| "learning_rate": 2.779693022601805e-05, |
| "loss": 0.0268, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.39176059075009717, |
| "grad_norm": 0.39878952503204346, |
| "learning_rate": 2.7765994296146176e-05, |
| "loss": 0.0248, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.3922787925897137, |
| "grad_norm": 0.6084442138671875, |
| "learning_rate": 2.7735036471063265e-05, |
| "loss": 0.0392, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.39279699442933025, |
| "grad_norm": 0.35335928201675415, |
| "learning_rate": 2.770405683805087e-05, |
| "loss": 0.0192, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.39331519626894673, |
| "grad_norm": 0.34677132964134216, |
| "learning_rate": 2.7673055484452042e-05, |
| "loss": 0.0205, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.39383339810856327, |
| "grad_norm": 0.3506069481372833, |
| "learning_rate": 2.7642032497671065e-05, |
| "loss": 0.0216, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.3943515999481798, |
| "grad_norm": 0.0815180316567421, |
| "learning_rate": 2.761098796517322e-05, |
| "loss": 0.0061, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.39486980178779635, |
| "grad_norm": 0.15629258751869202, |
| "learning_rate": 2.7579921974484532e-05, |
| "loss": 0.0132, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3953880036274129, |
| "grad_norm": 0.06827951967716217, |
| "learning_rate": 2.754883461319151e-05, |
| "loss": 0.0065, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3959062054670294, |
| "grad_norm": 0.23429635167121887, |
| "learning_rate": 2.7517725968940935e-05, |
| "loss": 0.015, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.39642440730664597, |
| "grad_norm": 0.40686559677124023, |
| "learning_rate": 2.748659612943958e-05, |
| "loss": 0.0325, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.39694260914626245, |
| "grad_norm": 0.34594210982322693, |
| "learning_rate": 2.745544518245398e-05, |
| "loss": 0.0323, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.397460810985879, |
| "grad_norm": 0.19717127084732056, |
| "learning_rate": 2.7424273215810192e-05, |
| "loss": 0.0093, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.3979790128254955, |
| "grad_norm": 0.2936684191226959, |
| "learning_rate": 2.73930803173935e-05, |
| "loss": 0.0187, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.39849721466511207, |
| "grad_norm": 0.25458282232284546, |
| "learning_rate": 2.7361866575148243e-05, |
| "loss": 0.03, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3990154165047286, |
| "grad_norm": 0.19077961146831512, |
| "learning_rate": 2.7330632077077497e-05, |
| "loss": 0.0164, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.39953361834434514, |
| "grad_norm": 0.1585199385881424, |
| "learning_rate": 2.729937691124288e-05, |
| "loss": 0.014, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.4000518201839616, |
| "grad_norm": 0.0987677276134491, |
| "learning_rate": 2.7268101165764256e-05, |
| "loss": 0.0085, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.40057002202357817, |
| "grad_norm": 0.13913258910179138, |
| "learning_rate": 2.7236804928819525e-05, |
| "loss": 0.0128, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.4010882238631947, |
| "grad_norm": 0.1244295984506607, |
| "learning_rate": 2.720548828864436e-05, |
| "loss": 0.0092, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.40160642570281124, |
| "grad_norm": 0.461353063583374, |
| "learning_rate": 2.7174151333531953e-05, |
| "loss": 0.0264, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.4021246275424278, |
| "grad_norm": 0.23609624803066254, |
| "learning_rate": 2.714279415183277e-05, |
| "loss": 0.0269, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.4026428293820443, |
| "grad_norm": 0.1776999682188034, |
| "learning_rate": 2.7111416831954324e-05, |
| "loss": 0.0173, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.40316103122166086, |
| "grad_norm": 0.15312516689300537, |
| "learning_rate": 2.708001946236087e-05, |
| "loss": 0.012, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.40367923306127734, |
| "grad_norm": 0.2107289731502533, |
| "learning_rate": 2.7048602131573215e-05, |
| "loss": 0.0114, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.4041974349008939, |
| "grad_norm": 0.34796831011772156, |
| "learning_rate": 2.7017164928168442e-05, |
| "loss": 0.0275, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4047156367405104, |
| "grad_norm": 0.22923240065574646, |
| "learning_rate": 2.698570794077965e-05, |
| "loss": 0.0077, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.40523383858012696, |
| "grad_norm": 0.15492555499076843, |
| "learning_rate": 2.6954231258095728e-05, |
| "loss": 0.0237, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.4057520404197435, |
| "grad_norm": 0.20873203873634338, |
| "learning_rate": 2.6922734968861095e-05, |
| "loss": 0.0115, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.40627024225936004, |
| "grad_norm": 0.3824748396873474, |
| "learning_rate": 2.689121916187543e-05, |
| "loss": 0.0291, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.4067884440989766, |
| "grad_norm": 0.2510291337966919, |
| "learning_rate": 2.6859683925993463e-05, |
| "loss": 0.0294, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.40730664593859306, |
| "grad_norm": 0.14085540175437927, |
| "learning_rate": 2.6828129350124688e-05, |
| "loss": 0.0114, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.4078248477782096, |
| "grad_norm": 0.17253312468528748, |
| "learning_rate": 2.679655552323313e-05, |
| "loss": 0.0104, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.40834304961782614, |
| "grad_norm": 0.21511492133140564, |
| "learning_rate": 2.6764962534337082e-05, |
| "loss": 0.0165, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.4088612514574427, |
| "grad_norm": 0.26060453057289124, |
| "learning_rate": 2.673335047250888e-05, |
| "loss": 0.0173, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.4093794532970592, |
| "grad_norm": 0.07147006690502167, |
| "learning_rate": 2.670171942687461e-05, |
| "loss": 0.0041, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.40989765513667575, |
| "grad_norm": 0.12942592799663544, |
| "learning_rate": 2.66700694866139e-05, |
| "loss": 0.0106, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.4104158569762923, |
| "grad_norm": 0.1451411247253418, |
| "learning_rate": 2.663840074095963e-05, |
| "loss": 0.0105, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.4109340588159088, |
| "grad_norm": 0.18926863372325897, |
| "learning_rate": 2.6606713279197722e-05, |
| "loss": 0.0227, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.4114522606555253, |
| "grad_norm": 0.4707081913948059, |
| "learning_rate": 2.6575007190666843e-05, |
| "loss": 0.0452, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.41197046249514185, |
| "grad_norm": 0.21161231398582458, |
| "learning_rate": 2.65432825647582e-05, |
| "loss": 0.016, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.4124886643347584, |
| "grad_norm": 0.1153176873922348, |
| "learning_rate": 2.6511539490915233e-05, |
| "loss": 0.0049, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.41300686617437493, |
| "grad_norm": 0.15783904492855072, |
| "learning_rate": 2.6479778058633426e-05, |
| "loss": 0.0132, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.41352506801399147, |
| "grad_norm": 0.46011635661125183, |
| "learning_rate": 2.6447998357460006e-05, |
| "loss": 0.025, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.41404326985360795, |
| "grad_norm": 0.31267356872558594, |
| "learning_rate": 2.6416200476993702e-05, |
| "loss": 0.0225, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.4145614716932245, |
| "grad_norm": 0.17100736498832703, |
| "learning_rate": 2.6384384506884496e-05, |
| "loss": 0.0093, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.41507967353284103, |
| "grad_norm": 0.48631441593170166, |
| "learning_rate": 2.6352550536833397e-05, |
| "loss": 0.0264, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.41559787537245757, |
| "grad_norm": 0.1943615972995758, |
| "learning_rate": 2.6320698656592126e-05, |
| "loss": 0.0157, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.4161160772120741, |
| "grad_norm": 0.2109791487455368, |
| "learning_rate": 2.6288828955962934e-05, |
| "loss": 0.0202, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.41663427905169065, |
| "grad_norm": 0.2340904027223587, |
| "learning_rate": 2.6256941524798278e-05, |
| "loss": 0.028, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.4171524808913072, |
| "grad_norm": 0.2932374179363251, |
| "learning_rate": 2.6225036453000643e-05, |
| "loss": 0.0189, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.41767068273092367, |
| "grad_norm": 0.1333383321762085, |
| "learning_rate": 2.619311383052222e-05, |
| "loss": 0.0089, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.4181888845705402, |
| "grad_norm": 0.1482066810131073, |
| "learning_rate": 2.6161173747364694e-05, |
| "loss": 0.0073, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.41870708641015675, |
| "grad_norm": 0.16888538002967834, |
| "learning_rate": 2.6129216293578978e-05, |
| "loss": 0.0133, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.4192252882497733, |
| "grad_norm": 0.32865652441978455, |
| "learning_rate": 2.609724155926495e-05, |
| "loss": 0.0335, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.4197434900893898, |
| "grad_norm": 0.18831047415733337, |
| "learning_rate": 2.6065249634571215e-05, |
| "loss": 0.0128, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.42026169192900636, |
| "grad_norm": 0.1461619734764099, |
| "learning_rate": 2.6033240609694864e-05, |
| "loss": 0.0159, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.4207798937686229, |
| "grad_norm": 0.13002753257751465, |
| "learning_rate": 2.600121457488116e-05, |
| "loss": 0.0153, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.4212980956082394, |
| "grad_norm": 0.25124236941337585, |
| "learning_rate": 2.596917162042335e-05, |
| "loss": 0.0219, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.4218162974478559, |
| "grad_norm": 0.12394192814826965, |
| "learning_rate": 2.593711183666237e-05, |
| "loss": 0.0144, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.42233449928747246, |
| "grad_norm": 0.3708241283893585, |
| "learning_rate": 2.5905035313986622e-05, |
| "loss": 0.0425, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.422852701127089, |
| "grad_norm": 0.2006586492061615, |
| "learning_rate": 2.5872942142831692e-05, |
| "loss": 0.0299, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.42337090296670554, |
| "grad_norm": 0.2943853735923767, |
| "learning_rate": 2.5840832413680096e-05, |
| "loss": 0.0456, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.4238891048063221, |
| "grad_norm": 0.1277809590101242, |
| "learning_rate": 2.5808706217061038e-05, |
| "loss": 0.0115, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.4244073066459386, |
| "grad_norm": 0.16021910309791565, |
| "learning_rate": 2.5776563643550153e-05, |
| "loss": 0.0186, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.4249255084855551, |
| "grad_norm": 0.13433189690113068, |
| "learning_rate": 2.574440478376925e-05, |
| "loss": 0.0206, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.42544371032517164, |
| "grad_norm": 0.19457033276557922, |
| "learning_rate": 2.5712229728386054e-05, |
| "loss": 0.0106, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.4259619121647882, |
| "grad_norm": 0.3658684194087982, |
| "learning_rate": 2.5680038568113944e-05, |
| "loss": 0.03, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.4264801140044047, |
| "grad_norm": 0.23988570272922516, |
| "learning_rate": 2.564783139371172e-05, |
| "loss": 0.0288, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.42699831584402126, |
| "grad_norm": 0.16135792434215546, |
| "learning_rate": 2.5615608295983308e-05, |
| "loss": 0.0181, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.4275165176836378, |
| "grad_norm": 0.22570829093456268, |
| "learning_rate": 2.5583369365777556e-05, |
| "loss": 0.0112, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.4280347195232543, |
| "grad_norm": 0.18531742691993713, |
| "learning_rate": 2.555111469398793e-05, |
| "loss": 0.0145, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.4285529213628708, |
| "grad_norm": 0.13606177270412445, |
| "learning_rate": 2.551884437155228e-05, |
| "loss": 0.0085, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.42907112320248736, |
| "grad_norm": 0.10950173437595367, |
| "learning_rate": 2.548655848945258e-05, |
| "loss": 0.0094, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.4295893250421039, |
| "grad_norm": 0.2311939150094986, |
| "learning_rate": 2.5454257138714686e-05, |
| "loss": 0.0131, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.43010752688172044, |
| "grad_norm": 0.14860382676124573, |
| "learning_rate": 2.5421940410408047e-05, |
| "loss": 0.017, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.430625728721337, |
| "grad_norm": 0.10067587345838547, |
| "learning_rate": 2.5389608395645485e-05, |
| "loss": 0.0129, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.4311439305609535, |
| "grad_norm": 0.210439532995224, |
| "learning_rate": 2.5357261185582905e-05, |
| "loss": 0.0206, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.43166213240057, |
| "grad_norm": 0.161096453666687, |
| "learning_rate": 2.532489887141906e-05, |
| "loss": 0.0152, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.43218033424018654, |
| "grad_norm": 0.3122105896472931, |
| "learning_rate": 2.5292521544395295e-05, |
| "loss": 0.0086, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.4326985360798031, |
| "grad_norm": 0.13776956498622894, |
| "learning_rate": 2.5260129295795257e-05, |
| "loss": 0.0183, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.4332167379194196, |
| "grad_norm": 0.13521309196949005, |
| "learning_rate": 2.522772221694469e-05, |
| "loss": 0.0128, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.43373493975903615, |
| "grad_norm": 0.14316008985042572, |
| "learning_rate": 2.5195300399211137e-05, |
| "loss": 0.0108, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.4342531415986527, |
| "grad_norm": 0.2449924498796463, |
| "learning_rate": 2.5162863934003693e-05, |
| "loss": 0.0181, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.43477134343826923, |
| "grad_norm": 0.263105571269989, |
| "learning_rate": 2.5130412912772762e-05, |
| "loss": 0.03, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.4352895452778857, |
| "grad_norm": 0.33116382360458374, |
| "learning_rate": 2.5097947427009767e-05, |
| "loss": 0.0178, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.43580774711750225, |
| "grad_norm": 0.24194487929344177, |
| "learning_rate": 2.506546756824694e-05, |
| "loss": 0.0125, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.4363259489571188, |
| "grad_norm": 0.3121300935745239, |
| "learning_rate": 2.5032973428057e-05, |
| "loss": 0.0128, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.43684415079673533, |
| "grad_norm": 0.2334538698196411, |
| "learning_rate": 2.500046509805296e-05, |
| "loss": 0.0167, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.43736235263635187, |
| "grad_norm": 0.1214141920208931, |
| "learning_rate": 2.496794266988783e-05, |
| "loss": 0.0083, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.4378805544759684, |
| "grad_norm": 0.4726041555404663, |
| "learning_rate": 2.4935406235254367e-05, |
| "loss": 0.0291, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.43839875631558495, |
| "grad_norm": 0.10701345652341843, |
| "learning_rate": 2.490285588588481e-05, |
| "loss": 0.0064, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.43891695815520143, |
| "grad_norm": 0.4372049868106842, |
| "learning_rate": 2.4870291713550648e-05, |
| "loss": 0.0211, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.43943515999481797, |
| "grad_norm": 0.1732262223958969, |
| "learning_rate": 2.4837713810062317e-05, |
| "loss": 0.0149, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.4399533618344345, |
| "grad_norm": 0.32579711079597473, |
| "learning_rate": 2.480512226726899e-05, |
| "loss": 0.0154, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.44047156367405105, |
| "grad_norm": 0.2965090572834015, |
| "learning_rate": 2.477251717705828e-05, |
| "loss": 0.0325, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4409897655136676, |
| "grad_norm": 0.418514609336853, |
| "learning_rate": 2.4739898631355995e-05, |
| "loss": 0.0164, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.4415079673532841, |
| "grad_norm": 0.2681904733181, |
| "learning_rate": 2.4707266722125888e-05, |
| "loss": 0.0115, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.4420261691929006, |
| "grad_norm": 0.40225011110305786, |
| "learning_rate": 2.4674621541369392e-05, |
| "loss": 0.0143, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.44254437103251715, |
| "grad_norm": 0.24150243401527405, |
| "learning_rate": 2.4641963181125332e-05, |
| "loss": 0.021, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.4430625728721337, |
| "grad_norm": 0.23854626715183258, |
| "learning_rate": 2.460929173346972e-05, |
| "loss": 0.0231, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.4435807747117502, |
| "grad_norm": 0.18777628242969513, |
| "learning_rate": 2.4576607290515445e-05, |
| "loss": 0.0114, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.44409897655136676, |
| "grad_norm": 0.12525376677513123, |
| "learning_rate": 2.4543909944412048e-05, |
| "loss": 0.0095, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.4446171783909833, |
| "grad_norm": 0.24290180206298828, |
| "learning_rate": 2.4511199787345447e-05, |
| "loss": 0.0189, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.44513538023059984, |
| "grad_norm": 0.26251229643821716, |
| "learning_rate": 2.447847691153767e-05, |
| "loss": 0.0202, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.4456535820702163, |
| "grad_norm": 0.35255348682403564, |
| "learning_rate": 2.4445741409246616e-05, |
| "loss": 0.0187, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.44617178390983286, |
| "grad_norm": 0.15728411078453064, |
| "learning_rate": 2.4412993372765782e-05, |
| "loss": 0.0095, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.4466899857494494, |
| "grad_norm": 0.33080241084098816, |
| "learning_rate": 2.438023289442399e-05, |
| "loss": 0.0203, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.44720818758906594, |
| "grad_norm": 0.3215973973274231, |
| "learning_rate": 2.4347460066585146e-05, |
| "loss": 0.037, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.4477263894286825, |
| "grad_norm": 0.25812244415283203, |
| "learning_rate": 2.4314674981647982e-05, |
| "loss": 0.0169, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.448244591268299, |
| "grad_norm": 0.21332046389579773, |
| "learning_rate": 2.4281877732045778e-05, |
| "loss": 0.0195, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.44876279310791556, |
| "grad_norm": 0.42107102274894714, |
| "learning_rate": 2.4249068410246114e-05, |
| "loss": 0.0157, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.44928099494753204, |
| "grad_norm": 0.1260032057762146, |
| "learning_rate": 2.4216247108750613e-05, |
| "loss": 0.0156, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.4497991967871486, |
| "grad_norm": 0.19122031331062317, |
| "learning_rate": 2.4183413920094657e-05, |
| "loss": 0.0083, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.4503173986267651, |
| "grad_norm": 0.18674705922603607, |
| "learning_rate": 2.4150568936847166e-05, |
| "loss": 0.023, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.45083560046638166, |
| "grad_norm": 0.33427202701568604, |
| "learning_rate": 2.4117712251610287e-05, |
| "loss": 0.026, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4513538023059982, |
| "grad_norm": 0.2598010003566742, |
| "learning_rate": 2.4084843957019174e-05, |
| "loss": 0.0188, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.45187200414561474, |
| "grad_norm": 0.21907593309879303, |
| "learning_rate": 2.405196414574171e-05, |
| "loss": 0.0165, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.4523902059852313, |
| "grad_norm": 0.20734968781471252, |
| "learning_rate": 2.4019072910478248e-05, |
| "loss": 0.0133, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.45290840782484776, |
| "grad_norm": 0.18882878124713898, |
| "learning_rate": 2.3986170343961342e-05, |
| "loss": 0.0115, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.4534266096644643, |
| "grad_norm": 0.2494608759880066, |
| "learning_rate": 2.3953256538955517e-05, |
| "loss": 0.0146, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.45394481150408084, |
| "grad_norm": 0.22158919274806976, |
| "learning_rate": 2.3920331588256945e-05, |
| "loss": 0.0138, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.4544630133436974, |
| "grad_norm": 0.3354347050189972, |
| "learning_rate": 2.3887395584693258e-05, |
| "loss": 0.0276, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.4549812151833139, |
| "grad_norm": 0.20476201176643372, |
| "learning_rate": 2.3854448621123227e-05, |
| "loss": 0.0121, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.45549941702293045, |
| "grad_norm": 0.36594030261039734, |
| "learning_rate": 2.382149079043654e-05, |
| "loss": 0.0271, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.45601761886254694, |
| "grad_norm": 0.1319293975830078, |
| "learning_rate": 2.3788522185553505e-05, |
| "loss": 0.0088, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4565358207021635, |
| "grad_norm": 0.2355639934539795, |
| "learning_rate": 2.375554289942483e-05, |
| "loss": 0.0196, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.45705402254178, |
| "grad_norm": 0.09457190334796906, |
| "learning_rate": 2.3722553025031304e-05, |
| "loss": 0.0071, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.45757222438139655, |
| "grad_norm": 0.2910573184490204, |
| "learning_rate": 2.36895526553836e-05, |
| "loss": 0.0191, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4580904262210131, |
| "grad_norm": 0.36989885568618774, |
| "learning_rate": 2.3656541883521963e-05, |
| "loss": 0.0255, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.45860862806062963, |
| "grad_norm": 0.29597464203834534, |
| "learning_rate": 2.362352080251598e-05, |
| "loss": 0.0198, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.45912682990024617, |
| "grad_norm": 0.32694950699806213, |
| "learning_rate": 2.359048950546429e-05, |
| "loss": 0.019, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.45964503173986265, |
| "grad_norm": 0.2195930778980255, |
| "learning_rate": 2.3557448085494343e-05, |
| "loss": 0.0279, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.4601632335794792, |
| "grad_norm": 0.20476271212100983, |
| "learning_rate": 2.352439663576212e-05, |
| "loss": 0.0137, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.46068143541909573, |
| "grad_norm": 0.3710532784461975, |
| "learning_rate": 2.349133524945189e-05, |
| "loss": 0.0216, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.46119963725871227, |
| "grad_norm": 0.15248271822929382, |
| "learning_rate": 2.3458264019775933e-05, |
| "loss": 0.0161, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4617178390983288, |
| "grad_norm": 0.2434094101190567, |
| "learning_rate": 2.3425183039974284e-05, |
| "loss": 0.0175, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.46223604093794535, |
| "grad_norm": 0.33260586857795715, |
| "learning_rate": 2.3392092403314447e-05, |
| "loss": 0.0206, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4627542427775619, |
| "grad_norm": 0.10504089295864105, |
| "learning_rate": 2.3358992203091185e-05, |
| "loss": 0.0059, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.46327244461717837, |
| "grad_norm": 0.08952392637729645, |
| "learning_rate": 2.3325882532626203e-05, |
| "loss": 0.011, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4637906464567949, |
| "grad_norm": 0.1848842054605484, |
| "learning_rate": 2.3292763485267917e-05, |
| "loss": 0.015, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.46430884829641145, |
| "grad_norm": 0.1017458513379097, |
| "learning_rate": 2.325963515439116e-05, |
| "loss": 0.0131, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.464827050136028, |
| "grad_norm": 0.1866416186094284, |
| "learning_rate": 2.3226497633396978e-05, |
| "loss": 0.0142, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.4653452519756445, |
| "grad_norm": 0.19123072922229767, |
| "learning_rate": 2.3193351015712278e-05, |
| "loss": 0.0155, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.46586345381526106, |
| "grad_norm": 0.1844363510608673, |
| "learning_rate": 2.3160195394789648e-05, |
| "loss": 0.0142, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.46638165565487755, |
| "grad_norm": 0.12927821278572083, |
| "learning_rate": 2.3127030864107044e-05, |
| "loss": 0.008, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4668998574944941, |
| "grad_norm": 0.2421107441186905, |
| "learning_rate": 2.3093857517167555e-05, |
| "loss": 0.0189, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4674180593341106, |
| "grad_norm": 0.12359347939491272, |
| "learning_rate": 2.3060675447499116e-05, |
| "loss": 0.0081, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.46793626117372716, |
| "grad_norm": 0.13112476468086243, |
| "learning_rate": 2.302748474865426e-05, |
| "loss": 0.0166, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.4684544630133437, |
| "grad_norm": 0.2681474983692169, |
| "learning_rate": 2.299428551420984e-05, |
| "loss": 0.0217, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.46897266485296024, |
| "grad_norm": 0.12790656089782715, |
| "learning_rate": 2.296107783776679e-05, |
| "loss": 0.0084, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4694908666925768, |
| "grad_norm": 0.21175391972064972, |
| "learning_rate": 2.2927861812949823e-05, |
| "loss": 0.0123, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.47000906853219326, |
| "grad_norm": 0.14393571019172668, |
| "learning_rate": 2.2894637533407212e-05, |
| "loss": 0.0097, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.4705272703718098, |
| "grad_norm": 0.21133379638195038, |
| "learning_rate": 2.2861405092810492e-05, |
| "loss": 0.0175, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.47104547221142634, |
| "grad_norm": 0.12299802154302597, |
| "learning_rate": 2.2828164584854204e-05, |
| "loss": 0.0091, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.4715636740510429, |
| "grad_norm": 0.29604965448379517, |
| "learning_rate": 2.2794916103255632e-05, |
| "loss": 0.0153, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4720818758906594, |
| "grad_norm": 0.3092305362224579, |
| "learning_rate": 2.2761659741754562e-05, |
| "loss": 0.0142, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.47260007773027596, |
| "grad_norm": 0.3267214000225067, |
| "learning_rate": 2.2728395594112965e-05, |
| "loss": 0.036, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.4731182795698925, |
| "grad_norm": 0.13050074875354767, |
| "learning_rate": 2.269512375411479e-05, |
| "loss": 0.0096, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.473636481409509, |
| "grad_norm": 0.38647061586380005, |
| "learning_rate": 2.266184431556566e-05, |
| "loss": 0.0173, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.4741546832491255, |
| "grad_norm": 0.1383042186498642, |
| "learning_rate": 2.2628557372292618e-05, |
| "loss": 0.0133, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.47467288508874206, |
| "grad_norm": 0.19705870747566223, |
| "learning_rate": 2.2595263018143885e-05, |
| "loss": 0.0126, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4751910869283586, |
| "grad_norm": 0.19639675319194794, |
| "learning_rate": 2.2561961346988553e-05, |
| "loss": 0.0104, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.47570928876797514, |
| "grad_norm": 0.28665390610694885, |
| "learning_rate": 2.2528652452716354e-05, |
| "loss": 0.0236, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4762274906075917, |
| "grad_norm": 0.15427164733409882, |
| "learning_rate": 2.2495336429237388e-05, |
| "loss": 0.0151, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.4767456924472082, |
| "grad_norm": 0.36349889636039734, |
| "learning_rate": 2.246201337048185e-05, |
| "loss": 0.0209, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.4772638942868247, |
| "grad_norm": 0.21488307416439056, |
| "learning_rate": 2.2428683370399767e-05, |
| "loss": 0.0197, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.47778209612644124, |
| "grad_norm": 0.27596259117126465, |
| "learning_rate": 2.2395346522960742e-05, |
| "loss": 0.0231, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.4783002979660578, |
| "grad_norm": 0.21963416039943695, |
| "learning_rate": 2.2362002922153686e-05, |
| "loss": 0.0193, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.4788184998056743, |
| "grad_norm": 0.25122183561325073, |
| "learning_rate": 2.232865266198654e-05, |
| "loss": 0.0182, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.47933670164529085, |
| "grad_norm": 0.2015954703092575, |
| "learning_rate": 2.2295295836486028e-05, |
| "loss": 0.0127, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.4798549034849074, |
| "grad_norm": 0.1243843361735344, |
| "learning_rate": 2.226193253969738e-05, |
| "loss": 0.0078, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.4803731053245239, |
| "grad_norm": 0.5754460096359253, |
| "learning_rate": 2.222856286568408e-05, |
| "loss": 0.0329, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.4808913071641404, |
| "grad_norm": 0.1487659215927124, |
| "learning_rate": 2.219518690852757e-05, |
| "loss": 0.0069, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.48140950900375695, |
| "grad_norm": 0.1283818930387497, |
| "learning_rate": 2.2161804762327036e-05, |
| "loss": 0.0036, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.4819277108433735, |
| "grad_norm": 0.26554226875305176, |
| "learning_rate": 2.2128416521199085e-05, |
| "loss": 0.0109, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.48244591268299003, |
| "grad_norm": 0.2600436508655548, |
| "learning_rate": 2.2095022279277535e-05, |
| "loss": 0.0229, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.48296411452260657, |
| "grad_norm": 0.5942787528038025, |
| "learning_rate": 2.2061622130713097e-05, |
| "loss": 0.0356, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4834823163622231, |
| "grad_norm": 0.30313023924827576, |
| "learning_rate": 2.2028216169673155e-05, |
| "loss": 0.0229, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.4840005182018396, |
| "grad_norm": 0.06324416399002075, |
| "learning_rate": 2.1994804490341467e-05, |
| "loss": 0.0019, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.48451872004145613, |
| "grad_norm": 0.3620297312736511, |
| "learning_rate": 2.1961387186917926e-05, |
| "loss": 0.0128, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.48503692188107267, |
| "grad_norm": 0.5341509580612183, |
| "learning_rate": 2.1927964353618253e-05, |
| "loss": 0.0253, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.4855551237206892, |
| "grad_norm": 0.1990864872932434, |
| "learning_rate": 2.18945360846738e-05, |
| "loss": 0.0103, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.48607332556030575, |
| "grad_norm": 0.05690270662307739, |
| "learning_rate": 2.186110247433122e-05, |
| "loss": 0.0058, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.4865915273999223, |
| "grad_norm": 0.2440994381904602, |
| "learning_rate": 2.1827663616852228e-05, |
| "loss": 0.0094, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.4871097292395388, |
| "grad_norm": 0.16593553125858307, |
| "learning_rate": 2.1794219606513335e-05, |
| "loss": 0.0097, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4876279310791553, |
| "grad_norm": 0.24518586695194244, |
| "learning_rate": 2.176077053760558e-05, |
| "loss": 0.0313, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.48814613291877185, |
| "grad_norm": 0.19542941451072693, |
| "learning_rate": 2.172731650443425e-05, |
| "loss": 0.023, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.4886643347583884, |
| "grad_norm": 0.2354109138250351, |
| "learning_rate": 2.1693857601318665e-05, |
| "loss": 0.0114, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.4891825365980049, |
| "grad_norm": 0.3304251730442047, |
| "learning_rate": 2.166039392259184e-05, |
| "loss": 0.0118, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.48970073843762146, |
| "grad_norm": 0.21442767977714539, |
| "learning_rate": 2.1626925562600266e-05, |
| "loss": 0.0143, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.490218940277238, |
| "grad_norm": 0.1758863776922226, |
| "learning_rate": 2.159345261570362e-05, |
| "loss": 0.0145, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.49073714211685454, |
| "grad_norm": 0.3084407150745392, |
| "learning_rate": 2.1559975176274545e-05, |
| "loss": 0.015, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.491255343956471, |
| "grad_norm": 0.26537203788757324, |
| "learning_rate": 2.152649333869831e-05, |
| "loss": 0.0112, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.49177354579608756, |
| "grad_norm": 0.21452540159225464, |
| "learning_rate": 2.1493007197372614e-05, |
| "loss": 0.0125, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.4922917476357041, |
| "grad_norm": 0.23216988146305084, |
| "learning_rate": 2.1459516846707267e-05, |
| "loss": 0.0198, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.49280994947532064, |
| "grad_norm": 0.2911728024482727, |
| "learning_rate": 2.1426022381123967e-05, |
| "loss": 0.0165, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.4933281513149372, |
| "grad_norm": 0.25865858793258667, |
| "learning_rate": 2.1392523895056e-05, |
| "loss": 0.0177, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.4938463531545537, |
| "grad_norm": 0.2825429439544678, |
| "learning_rate": 2.1359021482947996e-05, |
| "loss": 0.0276, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.4943645549941702, |
| "grad_norm": 0.09475862979888916, |
| "learning_rate": 2.1325515239255642e-05, |
| "loss": 0.0062, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.49488275683378674, |
| "grad_norm": 0.29453566670417786, |
| "learning_rate": 2.1292005258445442e-05, |
| "loss": 0.0216, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4954009586734033, |
| "grad_norm": 0.15916626155376434, |
| "learning_rate": 2.1258491634994422e-05, |
| "loss": 0.0087, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4959191605130198, |
| "grad_norm": 0.29559406638145447, |
| "learning_rate": 2.1224974463389892e-05, |
| "loss": 0.0111, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.49643736235263636, |
| "grad_norm": 0.19165553152561188, |
| "learning_rate": 2.119145383812915e-05, |
| "loss": 0.0189, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.4969555641922529, |
| "grad_norm": 0.2623184025287628, |
| "learning_rate": 2.1157929853719255e-05, |
| "loss": 0.0228, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.49747376603186944, |
| "grad_norm": 0.6063542366027832, |
| "learning_rate": 2.11244026046767e-05, |
| "loss": 0.0241, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4979919678714859, |
| "grad_norm": 0.6173031330108643, |
| "learning_rate": 2.109087218552723e-05, |
| "loss": 0.0243, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.49851016971110246, |
| "grad_norm": 0.22902703285217285, |
| "learning_rate": 2.1057338690805485e-05, |
| "loss": 0.0218, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.499028371550719, |
| "grad_norm": 0.2686314582824707, |
| "learning_rate": 2.1023802215054797e-05, |
| "loss": 0.022, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.49954657339033554, |
| "grad_norm": 0.1550043672323227, |
| "learning_rate": 2.099026285282689e-05, |
| "loss": 0.0093, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5000647752299521, |
| "grad_norm": 0.0828891471028328, |
| "learning_rate": 2.095672069868165e-05, |
| "loss": 0.0054, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5005829770695686, |
| "grad_norm": 0.2590828239917755, |
| "learning_rate": 2.0923175847186797e-05, |
| "loss": 0.0138, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5011011789091852, |
| "grad_norm": 0.26930099725723267, |
| "learning_rate": 2.08896283929177e-05, |
| "loss": 0.0233, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.5016193807488016, |
| "grad_norm": 0.23394693434238434, |
| "learning_rate": 2.0856078430457033e-05, |
| "loss": 0.0151, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5021375825884182, |
| "grad_norm": 0.18417996168136597, |
| "learning_rate": 2.0822526054394562e-05, |
| "loss": 0.013, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.5026557844280347, |
| "grad_norm": 0.16068002581596375, |
| "learning_rate": 2.0788971359326827e-05, |
| "loss": 0.0128, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5031739862676512, |
| "grad_norm": 0.29491886496543884, |
| "learning_rate": 2.0755414439856948e-05, |
| "loss": 0.0224, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.5036921881072678, |
| "grad_norm": 0.16440480947494507, |
| "learning_rate": 2.0721855390594294e-05, |
| "loss": 0.0135, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5042103899468843, |
| "grad_norm": 0.1847657710313797, |
| "learning_rate": 2.068829430615423e-05, |
| "loss": 0.0121, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.5047285917865009, |
| "grad_norm": 0.31765875220298767, |
| "learning_rate": 2.0654731281157874e-05, |
| "loss": 0.0332, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5052467936261174, |
| "grad_norm": 0.27984386682510376, |
| "learning_rate": 2.062116641023181e-05, |
| "loss": 0.0122, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.505764995465734, |
| "grad_norm": 0.24907967448234558, |
| "learning_rate": 2.0587599788007825e-05, |
| "loss": 0.018, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.5062831973053504, |
| "grad_norm": 0.18288204073905945, |
| "learning_rate": 2.0554031509122656e-05, |
| "loss": 0.0122, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5068013991449669, |
| "grad_norm": 0.13284114003181458, |
| "learning_rate": 2.0520461668217683e-05, |
| "loss": 0.0083, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.5073196009845835, |
| "grad_norm": 0.20933015644550323, |
| "learning_rate": 2.0486890359938724e-05, |
| "loss": 0.0162, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5078378028242, |
| "grad_norm": 0.23051418364048004, |
| "learning_rate": 2.045331767893571e-05, |
| "loss": 0.0186, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5083560046638166, |
| "grad_norm": 0.23583345115184784, |
| "learning_rate": 2.0419743719862447e-05, |
| "loss": 0.0188, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.5088742065034331, |
| "grad_norm": 0.2092517614364624, |
| "learning_rate": 2.0386168577376346e-05, |
| "loss": 0.0202, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.5093924083430497, |
| "grad_norm": 0.2943207025527954, |
| "learning_rate": 2.035259234613816e-05, |
| "loss": 0.0287, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.5099106101826661, |
| "grad_norm": 0.18729962408542633, |
| "learning_rate": 2.03190151208117e-05, |
| "loss": 0.0166, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5104288120222826, |
| "grad_norm": 0.19974197447299957, |
| "learning_rate": 2.0285436996063596e-05, |
| "loss": 0.0148, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.5109470138618992, |
| "grad_norm": 0.09993212670087814, |
| "learning_rate": 2.0251858066562997e-05, |
| "loss": 0.0042, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.5114652157015157, |
| "grad_norm": 0.17130297422409058, |
| "learning_rate": 2.0218278426981332e-05, |
| "loss": 0.0117, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.5119834175411323, |
| "grad_norm": 0.0700158104300499, |
| "learning_rate": 2.0184698171992023e-05, |
| "loss": 0.0046, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.5125016193807488, |
| "grad_norm": 0.24379920959472656, |
| "learning_rate": 2.0151117396270243e-05, |
| "loss": 0.0132, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.5130198212203654, |
| "grad_norm": 0.1190122663974762, |
| "learning_rate": 2.0117536194492616e-05, |
| "loss": 0.0107, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5135380230599819, |
| "grad_norm": 0.3585830628871918, |
| "learning_rate": 2.008395466133697e-05, |
| "loss": 0.022, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.5140562248995983, |
| "grad_norm": 0.1896945983171463, |
| "learning_rate": 2.0050372891482065e-05, |
| "loss": 0.0169, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.5145744267392149, |
| "grad_norm": 0.27251240611076355, |
| "learning_rate": 2.0016790979607353e-05, |
| "loss": 0.0197, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.5150926285788314, |
| "grad_norm": 0.2470308244228363, |
| "learning_rate": 1.9983209020392657e-05, |
| "loss": 0.0107, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.515610830418448, |
| "grad_norm": 0.1589849293231964, |
| "learning_rate": 1.9949627108517938e-05, |
| "loss": 0.0075, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.5161290322580645, |
| "grad_norm": 0.283698171377182, |
| "learning_rate": 1.991604533866304e-05, |
| "loss": 0.0148, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.5166472340976811, |
| "grad_norm": 0.2342384308576584, |
| "learning_rate": 1.988246380550739e-05, |
| "loss": 0.0427, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.5171654359372976, |
| "grad_norm": 0.30012941360473633, |
| "learning_rate": 1.9848882603729763e-05, |
| "loss": 0.0139, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.5176836377769141, |
| "grad_norm": 0.1278998851776123, |
| "learning_rate": 1.981530182800798e-05, |
| "loss": 0.0117, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.5182018396165307, |
| "grad_norm": 0.31228339672088623, |
| "learning_rate": 1.978172157301867e-05, |
| "loss": 0.0191, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5187200414561471, |
| "grad_norm": 0.13579922914505005, |
| "learning_rate": 1.9748141933437003e-05, |
| "loss": 0.0052, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.5192382432957637, |
| "grad_norm": 0.29426661133766174, |
| "learning_rate": 1.9714563003936414e-05, |
| "loss": 0.0255, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.5197564451353802, |
| "grad_norm": 0.15366186201572418, |
| "learning_rate": 1.9680984879188306e-05, |
| "loss": 0.0109, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.5202746469749968, |
| "grad_norm": 0.2718539834022522, |
| "learning_rate": 1.9647407653861846e-05, |
| "loss": 0.0136, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.5207928488146133, |
| "grad_norm": 0.307129830121994, |
| "learning_rate": 1.9613831422623657e-05, |
| "loss": 0.0193, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.5213110506542298, |
| "grad_norm": 0.21338942646980286, |
| "learning_rate": 1.9580256280137563e-05, |
| "loss": 0.0152, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.5218292524938464, |
| "grad_norm": 0.3313189446926117, |
| "learning_rate": 1.9546682321064296e-05, |
| "loss": 0.0202, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.5223474543334629, |
| "grad_norm": 0.17557553946971893, |
| "learning_rate": 1.951310964006128e-05, |
| "loss": 0.0094, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.5228656561730795, |
| "grad_norm": 0.22019444406032562, |
| "learning_rate": 1.9479538331782317e-05, |
| "loss": 0.0097, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.5233838580126959, |
| "grad_norm": 0.15170864760875702, |
| "learning_rate": 1.9445968490877357e-05, |
| "loss": 0.0075, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5239020598523125, |
| "grad_norm": 0.5065960884094238, |
| "learning_rate": 1.9412400211992178e-05, |
| "loss": 0.0285, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.524420261691929, |
| "grad_norm": 0.14924491941928864, |
| "learning_rate": 1.9378833589768196e-05, |
| "loss": 0.0158, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.5249384635315455, |
| "grad_norm": 0.2105136215686798, |
| "learning_rate": 1.934526871884213e-05, |
| "loss": 0.0107, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.5254566653711621, |
| "grad_norm": 0.21951298415660858, |
| "learning_rate": 1.931170569384578e-05, |
| "loss": 0.0125, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.5259748672107786, |
| "grad_norm": 0.31380099058151245, |
| "learning_rate": 1.9278144609405712e-05, |
| "loss": 0.0218, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.5264930690503952, |
| "grad_norm": 0.10603537410497665, |
| "learning_rate": 1.9244585560143055e-05, |
| "loss": 0.007, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.5270112708900117, |
| "grad_norm": 0.2262280434370041, |
| "learning_rate": 1.9211028640673173e-05, |
| "loss": 0.0117, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.5275294727296281, |
| "grad_norm": 0.1582176387310028, |
| "learning_rate": 1.917747394560545e-05, |
| "loss": 0.0051, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.5280476745692447, |
| "grad_norm": 0.17075197398662567, |
| "learning_rate": 1.914392156954297e-05, |
| "loss": 0.0147, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.5285658764088612, |
| "grad_norm": 0.3450043499469757, |
| "learning_rate": 1.9110371607082303e-05, |
| "loss": 0.0201, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5290840782484778, |
| "grad_norm": 0.11497087776660919, |
| "learning_rate": 1.9076824152813203e-05, |
| "loss": 0.0048, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.5296022800880943, |
| "grad_norm": 0.25621140003204346, |
| "learning_rate": 1.9043279301318365e-05, |
| "loss": 0.0117, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.5301204819277109, |
| "grad_norm": 0.18441203236579895, |
| "learning_rate": 1.9009737147173116e-05, |
| "loss": 0.0094, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.5306386837673274, |
| "grad_norm": 0.18254609405994415, |
| "learning_rate": 1.897619778494521e-05, |
| "loss": 0.0115, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.5311568856069439, |
| "grad_norm": 0.336505264043808, |
| "learning_rate": 1.894266130919452e-05, |
| "loss": 0.0287, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.5316750874465604, |
| "grad_norm": 0.2124987095594406, |
| "learning_rate": 1.8909127814472776e-05, |
| "loss": 0.0139, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.5321932892861769, |
| "grad_norm": 0.30894720554351807, |
| "learning_rate": 1.88755973953233e-05, |
| "loss": 0.0175, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.5327114911257935, |
| "grad_norm": 0.16775032877922058, |
| "learning_rate": 1.8842070146280752e-05, |
| "loss": 0.0101, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.53322969296541, |
| "grad_norm": 0.2674119472503662, |
| "learning_rate": 1.880854616187085e-05, |
| "loss": 0.0136, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.5337478948050266, |
| "grad_norm": 0.31328970193862915, |
| "learning_rate": 1.8775025536610118e-05, |
| "loss": 0.0193, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5342660966446431, |
| "grad_norm": 0.18065671622753143, |
| "learning_rate": 1.8741508365005584e-05, |
| "loss": 0.0079, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.5347842984842596, |
| "grad_norm": 0.1782873570919037, |
| "learning_rate": 1.8707994741554565e-05, |
| "loss": 0.0073, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.5353025003238762, |
| "grad_norm": 0.20292030274868011, |
| "learning_rate": 1.867448476074436e-05, |
| "loss": 0.0115, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.5358207021634926, |
| "grad_norm": 0.19517368078231812, |
| "learning_rate": 1.8640978517052014e-05, |
| "loss": 0.0187, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.5363389040031092, |
| "grad_norm": 0.20960001647472382, |
| "learning_rate": 1.8607476104944006e-05, |
| "loss": 0.0142, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.5368571058427257, |
| "grad_norm": 0.5607621073722839, |
| "learning_rate": 1.8573977618876036e-05, |
| "loss": 0.0207, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.5373753076823423, |
| "grad_norm": 0.21276597678661346, |
| "learning_rate": 1.8540483153292736e-05, |
| "loss": 0.0137, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.5378935095219588, |
| "grad_norm": 0.11740380525588989, |
| "learning_rate": 1.85069928026274e-05, |
| "loss": 0.0082, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5384117113615753, |
| "grad_norm": 0.33550262451171875, |
| "learning_rate": 1.8473506661301697e-05, |
| "loss": 0.0194, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5389299132011919, |
| "grad_norm": 0.33623600006103516, |
| "learning_rate": 1.8440024823725462e-05, |
| "loss": 0.0336, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5394481150408084, |
| "grad_norm": 0.2196187973022461, |
| "learning_rate": 1.8406547384296377e-05, |
| "loss": 0.0172, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.539966316880425, |
| "grad_norm": 0.17992262542247772, |
| "learning_rate": 1.8373074437399744e-05, |
| "loss": 0.0115, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5404845187200414, |
| "grad_norm": 0.19862842559814453, |
| "learning_rate": 1.8339606077408165e-05, |
| "loss": 0.0238, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.541002720559658, |
| "grad_norm": 0.30180686712265015, |
| "learning_rate": 1.8306142398681338e-05, |
| "loss": 0.0115, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5415209223992745, |
| "grad_norm": 0.24334149062633514, |
| "learning_rate": 1.827268349556575e-05, |
| "loss": 0.0172, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.542039124238891, |
| "grad_norm": 0.17345716059207916, |
| "learning_rate": 1.8239229462394435e-05, |
| "loss": 0.0109, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.5425573260785076, |
| "grad_norm": 0.25446006655693054, |
| "learning_rate": 1.8205780393486675e-05, |
| "loss": 0.0113, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.5430755279181241, |
| "grad_norm": 0.319873571395874, |
| "learning_rate": 1.817233638314778e-05, |
| "loss": 0.0232, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.5435937297577407, |
| "grad_norm": 0.42063799500465393, |
| "learning_rate": 1.813889752566878e-05, |
| "loss": 0.026, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5441119315973572, |
| "grad_norm": 0.33134227991104126, |
| "learning_rate": 1.8105463915326205e-05, |
| "loss": 0.0196, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5446301334369738, |
| "grad_norm": 0.26480934023857117, |
| "learning_rate": 1.807203564638175e-05, |
| "loss": 0.0267, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5451483352765902, |
| "grad_norm": 0.18234863877296448, |
| "learning_rate": 1.8038612813082084e-05, |
| "loss": 0.0137, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.5456665371162067, |
| "grad_norm": 0.15523044764995575, |
| "learning_rate": 1.8005195509658536e-05, |
| "loss": 0.0111, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.5461847389558233, |
| "grad_norm": 0.3344533145427704, |
| "learning_rate": 1.797178383032685e-05, |
| "loss": 0.0168, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.5467029407954398, |
| "grad_norm": 0.12319575250148773, |
| "learning_rate": 1.793837786928691e-05, |
| "loss": 0.0053, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.5472211426350564, |
| "grad_norm": 0.13881735503673553, |
| "learning_rate": 1.790497772072247e-05, |
| "loss": 0.0114, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.5477393444746729, |
| "grad_norm": 0.19909988343715668, |
| "learning_rate": 1.7871583478800915e-05, |
| "loss": 0.0138, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5482575463142895, |
| "grad_norm": 0.08285260200500488, |
| "learning_rate": 1.7838195237672974e-05, |
| "loss": 0.0034, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.548775748153906, |
| "grad_norm": 0.5260575413703918, |
| "learning_rate": 1.7804813091472435e-05, |
| "loss": 0.0355, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5492939499935224, |
| "grad_norm": 0.13995790481567383, |
| "learning_rate": 1.7771437134315927e-05, |
| "loss": 0.0102, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.549812151833139, |
| "grad_norm": 0.3183762729167938, |
| "learning_rate": 1.7738067460302625e-05, |
| "loss": 0.0168, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.5503303536727555, |
| "grad_norm": 0.19481916725635529, |
| "learning_rate": 1.770470416351398e-05, |
| "loss": 0.0129, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.5508485555123721, |
| "grad_norm": 0.18306854367256165, |
| "learning_rate": 1.7671347338013468e-05, |
| "loss": 0.0224, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5513667573519886, |
| "grad_norm": 0.22059708833694458, |
| "learning_rate": 1.763799707784632e-05, |
| "loss": 0.0117, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5518849591916052, |
| "grad_norm": 0.4268891513347626, |
| "learning_rate": 1.7604653477039258e-05, |
| "loss": 0.0138, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.5524031610312217, |
| "grad_norm": 0.10974422097206116, |
| "learning_rate": 1.7571316629600243e-05, |
| "loss": 0.0057, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5529213628708382, |
| "grad_norm": 0.09162997454404831, |
| "learning_rate": 1.753798662951816e-05, |
| "loss": 0.0061, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5534395647104547, |
| "grad_norm": 0.12637154757976532, |
| "learning_rate": 1.7504663570762615e-05, |
| "loss": 0.0085, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5539577665500712, |
| "grad_norm": 0.1373872607946396, |
| "learning_rate": 1.747134754728365e-05, |
| "loss": 0.0097, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.5544759683896878, |
| "grad_norm": 0.23044104874134064, |
| "learning_rate": 1.7438038653011457e-05, |
| "loss": 0.0148, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5549941702293043, |
| "grad_norm": 0.22062622010707855, |
| "learning_rate": 1.740473698185612e-05, |
| "loss": 0.0199, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.5555123720689208, |
| "grad_norm": 0.32974135875701904, |
| "learning_rate": 1.7371442627707385e-05, |
| "loss": 0.0099, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5560305739085374, |
| "grad_norm": 0.18324632942676544, |
| "learning_rate": 1.7338155684434344e-05, |
| "loss": 0.0103, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5565487757481539, |
| "grad_norm": 0.1241641715168953, |
| "learning_rate": 1.730487624588522e-05, |
| "loss": 0.0079, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.5570669775877705, |
| "grad_norm": 0.22878427803516388, |
| "learning_rate": 1.727160440588704e-05, |
| "loss": 0.0148, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.557585179427387, |
| "grad_norm": 0.549785315990448, |
| "learning_rate": 1.723834025824544e-05, |
| "loss": 0.0457, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.5581033812670035, |
| "grad_norm": 0.5593258142471313, |
| "learning_rate": 1.7205083896744365e-05, |
| "loss": 0.0259, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.55862158310662, |
| "grad_norm": 0.1253921538591385, |
| "learning_rate": 1.7171835415145806e-05, |
| "loss": 0.0117, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.5591397849462365, |
| "grad_norm": 0.15490829944610596, |
| "learning_rate": 1.713859490718951e-05, |
| "loss": 0.0103, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5596579867858531, |
| "grad_norm": 0.19711671769618988, |
| "learning_rate": 1.710536246659279e-05, |
| "loss": 0.0094, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5601761886254696, |
| "grad_norm": 0.24218136072158813, |
| "learning_rate": 1.7072138187050177e-05, |
| "loss": 0.0223, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.5606943904650862, |
| "grad_norm": 0.43247753381729126, |
| "learning_rate": 1.7038922162233224e-05, |
| "loss": 0.0437, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.5612125923047027, |
| "grad_norm": 0.3048844039440155, |
| "learning_rate": 1.7005714485790167e-05, |
| "loss": 0.0164, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.5617307941443193, |
| "grad_norm": 0.269682377576828, |
| "learning_rate": 1.6972515251345746e-05, |
| "loss": 0.0153, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.5622489959839357, |
| "grad_norm": 0.22269891202449799, |
| "learning_rate": 1.6939324552500887e-05, |
| "loss": 0.0158, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.5627671978235522, |
| "grad_norm": 0.11371760815382004, |
| "learning_rate": 1.690614248283245e-05, |
| "loss": 0.0072, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.5632853996631688, |
| "grad_norm": 0.2383551448583603, |
| "learning_rate": 1.6872969135892963e-05, |
| "loss": 0.0207, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.5638036015027853, |
| "grad_norm": 0.27557238936424255, |
| "learning_rate": 1.683980460521036e-05, |
| "loss": 0.0137, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.5643218033424019, |
| "grad_norm": 0.16742661595344543, |
| "learning_rate": 1.680664898428773e-05, |
| "loss": 0.0085, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.5648400051820184, |
| "grad_norm": 0.17626698315143585, |
| "learning_rate": 1.6773502366603032e-05, |
| "loss": 0.0079, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.565358207021635, |
| "grad_norm": 0.27058324217796326, |
| "learning_rate": 1.6740364845608842e-05, |
| "loss": 0.0169, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.5658764088612515, |
| "grad_norm": 0.2151581048965454, |
| "learning_rate": 1.670723651473209e-05, |
| "loss": 0.0185, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.5663946107008679, |
| "grad_norm": 0.20677979290485382, |
| "learning_rate": 1.6674117467373797e-05, |
| "loss": 0.0114, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.5669128125404845, |
| "grad_norm": 0.23222865164279938, |
| "learning_rate": 1.6641007796908822e-05, |
| "loss": 0.0212, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.567431014380101, |
| "grad_norm": 0.4040347635746002, |
| "learning_rate": 1.6607907596685556e-05, |
| "loss": 0.0174, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.5679492162197176, |
| "grad_norm": 0.15651348233222961, |
| "learning_rate": 1.6574816960025722e-05, |
| "loss": 0.0096, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.5684674180593341, |
| "grad_norm": 0.2294890284538269, |
| "learning_rate": 1.654173598022407e-05, |
| "loss": 0.0184, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.5689856198989507, |
| "grad_norm": 0.23169976472854614, |
| "learning_rate": 1.6508664750548114e-05, |
| "loss": 0.0101, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5695038217385672, |
| "grad_norm": 0.16020269691944122, |
| "learning_rate": 1.6475603364237886e-05, |
| "loss": 0.0108, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5700220235781837, |
| "grad_norm": 0.27258041501045227, |
| "learning_rate": 1.644255191450566e-05, |
| "loss": 0.0172, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5705402254178003, |
| "grad_norm": 0.25970420241355896, |
| "learning_rate": 1.640951049453571e-05, |
| "loss": 0.015, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5710584272574167, |
| "grad_norm": 0.15470710396766663, |
| "learning_rate": 1.6376479197484028e-05, |
| "loss": 0.0108, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5715766290970333, |
| "grad_norm": 0.15609830617904663, |
| "learning_rate": 1.634345811647804e-05, |
| "loss": 0.0179, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5720948309366498, |
| "grad_norm": 0.24201931059360504, |
| "learning_rate": 1.6310447344616406e-05, |
| "loss": 0.0152, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.5726130327762664, |
| "grad_norm": 0.2512819468975067, |
| "learning_rate": 1.62774469749687e-05, |
| "loss": 0.018, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5731312346158829, |
| "grad_norm": 0.13237565755844116, |
| "learning_rate": 1.6244457100575184e-05, |
| "loss": 0.0102, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5736494364554994, |
| "grad_norm": 0.17929814755916595, |
| "learning_rate": 1.6211477814446498e-05, |
| "loss": 0.0147, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.574167638295116, |
| "grad_norm": 0.26788702607154846, |
| "learning_rate": 1.6178509209563465e-05, |
| "loss": 0.0326, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5746858401347325, |
| "grad_norm": 0.3604764938354492, |
| "learning_rate": 1.6145551378876773e-05, |
| "loss": 0.0134, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.575204041974349, |
| "grad_norm": 0.1976727545261383, |
| "learning_rate": 1.611260441530675e-05, |
| "loss": 0.0138, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5757222438139655, |
| "grad_norm": 0.24070017039775848, |
| "learning_rate": 1.6079668411743058e-05, |
| "loss": 0.0204, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5762404456535821, |
| "grad_norm": 0.3317483365535736, |
| "learning_rate": 1.604674346104449e-05, |
| "loss": 0.0183, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.5767586474931986, |
| "grad_norm": 0.18072566390037537, |
| "learning_rate": 1.6013829656038654e-05, |
| "loss": 0.0181, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5772768493328151, |
| "grad_norm": 0.21476292610168457, |
| "learning_rate": 1.5980927089521762e-05, |
| "loss": 0.0124, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5777950511724317, |
| "grad_norm": 0.1312938779592514, |
| "learning_rate": 1.5948035854258294e-05, |
| "loss": 0.0071, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.5783132530120482, |
| "grad_norm": 0.08147620409727097, |
| "learning_rate": 1.5915156042980833e-05, |
| "loss": 0.0028, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.5788314548516648, |
| "grad_norm": 0.06438157707452774, |
| "learning_rate": 1.5882287748389716e-05, |
| "loss": 0.0031, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5793496566912812, |
| "grad_norm": 0.23561817407608032, |
| "learning_rate": 1.584943106315284e-05, |
| "loss": 0.0308, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5798678585308978, |
| "grad_norm": 0.13540908694267273, |
| "learning_rate": 1.5816586079905346e-05, |
| "loss": 0.0135, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5803860603705143, |
| "grad_norm": 0.10627519339323044, |
| "learning_rate": 1.578375289124939e-05, |
| "loss": 0.0075, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5809042622101308, |
| "grad_norm": 0.15574534237384796, |
| "learning_rate": 1.575093158975389e-05, |
| "loss": 0.0073, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.5814224640497474, |
| "grad_norm": 0.22351405024528503, |
| "learning_rate": 1.5718122267954232e-05, |
| "loss": 0.0161, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.5819406658893639, |
| "grad_norm": 0.3184528648853302, |
| "learning_rate": 1.5685325018352028e-05, |
| "loss": 0.0326, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5824588677289805, |
| "grad_norm": 0.1077670082449913, |
| "learning_rate": 1.5652539933414858e-05, |
| "loss": 0.0037, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.582977069568597, |
| "grad_norm": 0.16742312908172607, |
| "learning_rate": 1.561976710557602e-05, |
| "loss": 0.0118, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5834952714082134, |
| "grad_norm": 0.17053693532943726, |
| "learning_rate": 1.5587006627234225e-05, |
| "loss": 0.0151, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.58401347324783, |
| "grad_norm": 0.26882749795913696, |
| "learning_rate": 1.5554258590753388e-05, |
| "loss": 0.0153, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5845316750874465, |
| "grad_norm": 0.17190419137477875, |
| "learning_rate": 1.5521523088462332e-05, |
| "loss": 0.0104, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.5850498769270631, |
| "grad_norm": 0.2322155237197876, |
| "learning_rate": 1.5488800212654557e-05, |
| "loss": 0.0248, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5855680787666796, |
| "grad_norm": 0.3169136047363281, |
| "learning_rate": 1.545609005558796e-05, |
| "loss": 0.0321, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5860862806062962, |
| "grad_norm": 0.35867586731910706, |
| "learning_rate": 1.5423392709484565e-05, |
| "loss": 0.0089, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5866044824459127, |
| "grad_norm": 0.17669306695461273, |
| "learning_rate": 1.5390708266530288e-05, |
| "loss": 0.0166, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5871226842855292, |
| "grad_norm": 0.3532368540763855, |
| "learning_rate": 1.5358036818874668e-05, |
| "loss": 0.0289, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5876408861251458, |
| "grad_norm": 0.16300423443317413, |
| "learning_rate": 1.5325378458630615e-05, |
| "loss": 0.0084, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5881590879647622, |
| "grad_norm": 0.29001057147979736, |
| "learning_rate": 1.5292733277874116e-05, |
| "loss": 0.0244, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5886772898043788, |
| "grad_norm": 0.1256478726863861, |
| "learning_rate": 1.5260101368644008e-05, |
| "loss": 0.0075, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5891954916439953, |
| "grad_norm": 0.19914460182189941, |
| "learning_rate": 1.5227482822941725e-05, |
| "loss": 0.0143, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.5897136934836119, |
| "grad_norm": 0.2361249178647995, |
| "learning_rate": 1.519487773273102e-05, |
| "loss": 0.0169, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.5902318953232284, |
| "grad_norm": 0.19691789150238037, |
| "learning_rate": 1.516228618993769e-05, |
| "loss": 0.0108, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.5907500971628449, |
| "grad_norm": 0.3483859896659851, |
| "learning_rate": 1.5129708286449359e-05, |
| "loss": 0.0218, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5912682990024615, |
| "grad_norm": 0.3023143708705902, |
| "learning_rate": 1.509714411411519e-05, |
| "loss": 0.0143, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.591786500842078, |
| "grad_norm": 0.17124785482883453, |
| "learning_rate": 1.506459376474564e-05, |
| "loss": 0.0095, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.5923047026816946, |
| "grad_norm": 0.19885291159152985, |
| "learning_rate": 1.5032057330112174e-05, |
| "loss": 0.0091, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.592822904521311, |
| "grad_norm": 0.6884151697158813, |
| "learning_rate": 1.4999534901947046e-05, |
| "loss": 0.0288, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.5933411063609276, |
| "grad_norm": 0.3492492139339447, |
| "learning_rate": 1.4967026571943004e-05, |
| "loss": 0.0219, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.5938593082005441, |
| "grad_norm": 0.2615423798561096, |
| "learning_rate": 1.4934532431753073e-05, |
| "loss": 0.021, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.5943775100401606, |
| "grad_norm": 0.23267151415348053, |
| "learning_rate": 1.4902052572990236e-05, |
| "loss": 0.0166, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.5948957118797772, |
| "grad_norm": 0.20014171302318573, |
| "learning_rate": 1.4869587087227241e-05, |
| "loss": 0.0098, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.5954139137193937, |
| "grad_norm": 0.13207559287548065, |
| "learning_rate": 1.4837136065996307e-05, |
| "loss": 0.0077, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.5959321155590103, |
| "grad_norm": 0.36822882294654846, |
| "learning_rate": 1.4804699600788872e-05, |
| "loss": 0.0262, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5964503173986268, |
| "grad_norm": 0.1576855331659317, |
| "learning_rate": 1.4772277783055318e-05, |
| "loss": 0.0077, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.5969685192382433, |
| "grad_norm": 0.056045811623334885, |
| "learning_rate": 1.4739870704204746e-05, |
| "loss": 0.0034, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.5974867210778598, |
| "grad_norm": 0.164437398314476, |
| "learning_rate": 1.4707478455604713e-05, |
| "loss": 0.0047, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.5980049229174763, |
| "grad_norm": 0.1738058626651764, |
| "learning_rate": 1.4675101128580944e-05, |
| "loss": 0.0106, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.5985231247570929, |
| "grad_norm": 0.27060168981552124, |
| "learning_rate": 1.4642738814417104e-05, |
| "loss": 0.0233, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5990413265967094, |
| "grad_norm": 0.3752515912055969, |
| "learning_rate": 1.4610391604354522e-05, |
| "loss": 0.0152, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.599559528436326, |
| "grad_norm": 0.32041823863983154, |
| "learning_rate": 1.4578059589591953e-05, |
| "loss": 0.0149, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.6000777302759425, |
| "grad_norm": 0.0778937041759491, |
| "learning_rate": 1.4545742861285325e-05, |
| "loss": 0.006, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.6005959321155591, |
| "grad_norm": 0.22142964601516724, |
| "learning_rate": 1.4513441510547428e-05, |
| "loss": 0.0119, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.6011141339551755, |
| "grad_norm": 0.3265921473503113, |
| "learning_rate": 1.4481155628447728e-05, |
| "loss": 0.0286, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.601632335794792, |
| "grad_norm": 0.22287465631961823, |
| "learning_rate": 1.4448885306012078e-05, |
| "loss": 0.0118, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.6021505376344086, |
| "grad_norm": 0.14569589495658875, |
| "learning_rate": 1.4416630634222449e-05, |
| "loss": 0.0106, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.6026687394740251, |
| "grad_norm": 0.1785486936569214, |
| "learning_rate": 1.4384391704016695e-05, |
| "loss": 0.0106, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.6031869413136417, |
| "grad_norm": 0.22368593513965607, |
| "learning_rate": 1.4352168606288285e-05, |
| "loss": 0.0145, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.6037051431532582, |
| "grad_norm": 0.18533925712108612, |
| "learning_rate": 1.4319961431886054e-05, |
| "loss": 0.0178, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.6042233449928748, |
| "grad_norm": 0.22897221148014069, |
| "learning_rate": 1.4287770271613952e-05, |
| "loss": 0.0156, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.6047415468324913, |
| "grad_norm": 0.28994596004486084, |
| "learning_rate": 1.4255595216230753e-05, |
| "loss": 0.0277, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.6052597486721077, |
| "grad_norm": 0.17252403497695923, |
| "learning_rate": 1.4223436356449852e-05, |
| "loss": 0.0094, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.6057779505117243, |
| "grad_norm": 0.17040708661079407, |
| "learning_rate": 1.4191293782938967e-05, |
| "loss": 0.0117, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.6062961523513408, |
| "grad_norm": 0.47751736640930176, |
| "learning_rate": 1.4159167586319916e-05, |
| "loss": 0.0296, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6068143541909574, |
| "grad_norm": 0.19108998775482178, |
| "learning_rate": 1.4127057857168316e-05, |
| "loss": 0.0138, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.6073325560305739, |
| "grad_norm": 0.235890194773674, |
| "learning_rate": 1.4094964686013381e-05, |
| "loss": 0.0107, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.6078507578701905, |
| "grad_norm": 0.12942662835121155, |
| "learning_rate": 1.4062888163337633e-05, |
| "loss": 0.0059, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.608368959709807, |
| "grad_norm": 0.4324270486831665, |
| "learning_rate": 1.4030828379576664e-05, |
| "loss": 0.0296, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.6088871615494235, |
| "grad_norm": 0.11905232071876526, |
| "learning_rate": 1.3998785425118851e-05, |
| "loss": 0.012, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.6094053633890401, |
| "grad_norm": 0.20753410458564758, |
| "learning_rate": 1.3966759390305143e-05, |
| "loss": 0.0133, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.6099235652286565, |
| "grad_norm": 0.2876964211463928, |
| "learning_rate": 1.3934750365428783e-05, |
| "loss": 0.0315, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.6104417670682731, |
| "grad_norm": 0.20571279525756836, |
| "learning_rate": 1.3902758440735058e-05, |
| "loss": 0.0189, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.6109599689078896, |
| "grad_norm": 0.16015420854091644, |
| "learning_rate": 1.3870783706421032e-05, |
| "loss": 0.0191, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.6114781707475061, |
| "grad_norm": 0.23814105987548828, |
| "learning_rate": 1.3838826252635313e-05, |
| "loss": 0.033, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6119963725871227, |
| "grad_norm": 0.2253902405500412, |
| "learning_rate": 1.3806886169477784e-05, |
| "loss": 0.0109, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.6125145744267392, |
| "grad_norm": 0.17431442439556122, |
| "learning_rate": 1.3774963546999364e-05, |
| "loss": 0.0113, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.6130327762663558, |
| "grad_norm": 0.24366681277751923, |
| "learning_rate": 1.3743058475201727e-05, |
| "loss": 0.019, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.6135509781059723, |
| "grad_norm": 0.20095647871494293, |
| "learning_rate": 1.3711171044037074e-05, |
| "loss": 0.0145, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.6140691799455888, |
| "grad_norm": 0.37862977385520935, |
| "learning_rate": 1.3679301343407874e-05, |
| "loss": 0.0226, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.6145873817852053, |
| "grad_norm": 0.23509663343429565, |
| "learning_rate": 1.3647449463166616e-05, |
| "loss": 0.0125, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.6151055836248218, |
| "grad_norm": 0.1528037041425705, |
| "learning_rate": 1.3615615493115509e-05, |
| "loss": 0.0136, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.6156237854644384, |
| "grad_norm": 0.24438060820102692, |
| "learning_rate": 1.3583799523006307e-05, |
| "loss": 0.0134, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.6161419873040549, |
| "grad_norm": 0.1713854968547821, |
| "learning_rate": 1.3552001642540003e-05, |
| "loss": 0.0085, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.6166601891436715, |
| "grad_norm": 0.4027296006679535, |
| "learning_rate": 1.3520221941366579e-05, |
| "loss": 0.0288, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.617178390983288, |
| "grad_norm": 0.25324851274490356, |
| "learning_rate": 1.3488460509084774e-05, |
| "loss": 0.0355, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.6176965928229046, |
| "grad_norm": 0.15679526329040527, |
| "learning_rate": 1.3456717435241808e-05, |
| "loss": 0.0141, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.618214794662521, |
| "grad_norm": 0.3524555563926697, |
| "learning_rate": 1.342499280933316e-05, |
| "loss": 0.0229, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.6187329965021375, |
| "grad_norm": 0.12044969946146011, |
| "learning_rate": 1.339328672080229e-05, |
| "loss": 0.01, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.6192511983417541, |
| "grad_norm": 0.20489737391471863, |
| "learning_rate": 1.3361599259040375e-05, |
| "loss": 0.0128, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.6197694001813706, |
| "grad_norm": 0.30765610933303833, |
| "learning_rate": 1.3329930513386105e-05, |
| "loss": 0.0166, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.6202876020209872, |
| "grad_norm": 0.15374019742012024, |
| "learning_rate": 1.3298280573125392e-05, |
| "loss": 0.0099, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.6208058038606037, |
| "grad_norm": 0.12795308232307434, |
| "learning_rate": 1.3266649527491127e-05, |
| "loss": 0.0037, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.6213240057002203, |
| "grad_norm": 0.0716242715716362, |
| "learning_rate": 1.3235037465662921e-05, |
| "loss": 0.0051, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.6218422075398368, |
| "grad_norm": 0.18337176740169525, |
| "learning_rate": 1.3203444476766874e-05, |
| "loss": 0.0113, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6223604093794532, |
| "grad_norm": 0.25055044889450073, |
| "learning_rate": 1.3171870649875313e-05, |
| "loss": 0.0111, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.6228786112190698, |
| "grad_norm": 0.28508034348487854, |
| "learning_rate": 1.3140316074006545e-05, |
| "loss": 0.0113, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.6233968130586863, |
| "grad_norm": 0.1235652044415474, |
| "learning_rate": 1.3108780838124577e-05, |
| "loss": 0.0101, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.6239150148983029, |
| "grad_norm": 0.18104441463947296, |
| "learning_rate": 1.3077265031138913e-05, |
| "loss": 0.0068, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.6244332167379194, |
| "grad_norm": 0.1736707240343094, |
| "learning_rate": 1.3045768741904272e-05, |
| "loss": 0.0255, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.624951418577536, |
| "grad_norm": 0.31340596079826355, |
| "learning_rate": 1.3014292059220357e-05, |
| "loss": 0.0154, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.6254696204171525, |
| "grad_norm": 0.3333686292171478, |
| "learning_rate": 1.2982835071831565e-05, |
| "loss": 0.0221, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.625987822256769, |
| "grad_norm": 0.09395372867584229, |
| "learning_rate": 1.295139786842679e-05, |
| "loss": 0.0028, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.6265060240963856, |
| "grad_norm": 0.19452796876430511, |
| "learning_rate": 1.2919980537639134e-05, |
| "loss": 0.0173, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.627024225936002, |
| "grad_norm": 0.16185300052165985, |
| "learning_rate": 1.2888583168045688e-05, |
| "loss": 0.0104, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6275424277756186, |
| "grad_norm": 0.3420233726501465, |
| "learning_rate": 1.2857205848167231e-05, |
| "loss": 0.0224, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.6280606296152351, |
| "grad_norm": 0.18805178999900818, |
| "learning_rate": 1.2825848666468052e-05, |
| "loss": 0.0125, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.6285788314548517, |
| "grad_norm": 0.32541659474372864, |
| "learning_rate": 1.2794511711355644e-05, |
| "loss": 0.0225, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.6290970332944682, |
| "grad_norm": 0.6056791543960571, |
| "learning_rate": 1.2763195071180483e-05, |
| "loss": 0.029, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.6296152351340847, |
| "grad_norm": 0.06527632474899292, |
| "learning_rate": 1.2731898834235752e-05, |
| "loss": 0.0057, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.6301334369737013, |
| "grad_norm": 0.1526113748550415, |
| "learning_rate": 1.270062308875713e-05, |
| "loss": 0.0073, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.6306516388133178, |
| "grad_norm": 0.14983856678009033, |
| "learning_rate": 1.2669367922922504e-05, |
| "loss": 0.0051, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.6311698406529344, |
| "grad_norm": 0.17253729701042175, |
| "learning_rate": 1.2638133424851766e-05, |
| "loss": 0.0093, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.6316880424925508, |
| "grad_norm": 0.42797988653182983, |
| "learning_rate": 1.2606919682606505e-05, |
| "loss": 0.0314, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.6322062443321674, |
| "grad_norm": 0.22709913551807404, |
| "learning_rate": 1.2575726784189816e-05, |
| "loss": 0.0295, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6327244461717839, |
| "grad_norm": 0.16602033376693726, |
| "learning_rate": 1.2544554817546017e-05, |
| "loss": 0.0139, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.6332426480114004, |
| "grad_norm": 0.31228166818618774, |
| "learning_rate": 1.2513403870560429e-05, |
| "loss": 0.0191, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.633760849851017, |
| "grad_norm": 0.1140090823173523, |
| "learning_rate": 1.2482274031059073e-05, |
| "loss": 0.0102, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.6342790516906335, |
| "grad_norm": 0.37553247809410095, |
| "learning_rate": 1.2451165386808496e-05, |
| "loss": 0.0115, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.6347972535302501, |
| "grad_norm": 0.16082100570201874, |
| "learning_rate": 1.2420078025515476e-05, |
| "loss": 0.0138, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.6353154553698666, |
| "grad_norm": 0.1942625790834427, |
| "learning_rate": 1.2389012034826787e-05, |
| "loss": 0.0114, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.635833657209483, |
| "grad_norm": 0.0908832773566246, |
| "learning_rate": 1.2357967502328943e-05, |
| "loss": 0.004, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.6363518590490996, |
| "grad_norm": 0.32251858711242676, |
| "learning_rate": 1.2326944515547963e-05, |
| "loss": 0.0133, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.6368700608887161, |
| "grad_norm": 0.42536473274230957, |
| "learning_rate": 1.2295943161949135e-05, |
| "loss": 0.0407, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.6373882627283327, |
| "grad_norm": 0.2966007888317108, |
| "learning_rate": 1.2264963528936747e-05, |
| "loss": 0.0168, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.6379064645679492, |
| "grad_norm": 0.30448687076568604, |
| "learning_rate": 1.2234005703853829e-05, |
| "loss": 0.0304, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.6384246664075658, |
| "grad_norm": 0.22583232820034027, |
| "learning_rate": 1.2203069773981953e-05, |
| "loss": 0.0199, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.6389428682471823, |
| "grad_norm": 0.21759001910686493, |
| "learning_rate": 1.2172155826540952e-05, |
| "loss": 0.0123, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.6394610700867988, |
| "grad_norm": 0.2320861667394638, |
| "learning_rate": 1.2141263948688687e-05, |
| "loss": 0.0127, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.6399792719264153, |
| "grad_norm": 0.3261067271232605, |
| "learning_rate": 1.211039422752077e-05, |
| "loss": 0.0145, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.6404974737660318, |
| "grad_norm": 0.15837307274341583, |
| "learning_rate": 1.2079546750070385e-05, |
| "loss": 0.0073, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.6410156756056484, |
| "grad_norm": 0.13467198610305786, |
| "learning_rate": 1.2048721603307971e-05, |
| "loss": 0.0073, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.6415338774452649, |
| "grad_norm": 0.10030899196863174, |
| "learning_rate": 1.2017918874141037e-05, |
| "loss": 0.0066, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.6420520792848815, |
| "grad_norm": 0.1476227343082428, |
| "learning_rate": 1.198713864941385e-05, |
| "loss": 0.0082, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.642570281124498, |
| "grad_norm": 0.15087570250034332, |
| "learning_rate": 1.1956381015907266e-05, |
| "loss": 0.0115, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.6430884829641145, |
| "grad_norm": 0.271362841129303, |
| "learning_rate": 1.1925646060338427e-05, |
| "loss": 0.0212, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.6436066848037311, |
| "grad_norm": 0.22935399413108826, |
| "learning_rate": 1.1894933869360555e-05, |
| "loss": 0.014, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.6441248866433475, |
| "grad_norm": 0.12292534857988358, |
| "learning_rate": 1.186424452956266e-05, |
| "loss": 0.0108, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.6446430884829641, |
| "grad_norm": 0.0863785669207573, |
| "learning_rate": 1.1833578127469361e-05, |
| "loss": 0.0026, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 0.4094049334526062, |
| "learning_rate": 1.1802934749540578e-05, |
| "loss": 0.0201, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.6456794921621972, |
| "grad_norm": 0.1941591501235962, |
| "learning_rate": 1.1772314482171346e-05, |
| "loss": 0.0103, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.6461976940018137, |
| "grad_norm": 0.05845485255122185, |
| "learning_rate": 1.1741717411691509e-05, |
| "loss": 0.0027, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.6467158958414302, |
| "grad_norm": 0.22471730411052704, |
| "learning_rate": 1.1711143624365537e-05, |
| "loss": 0.0181, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.6472340976810468, |
| "grad_norm": 0.22589747607707977, |
| "learning_rate": 1.1680593206392248e-05, |
| "loss": 0.016, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.6477522995206633, |
| "grad_norm": 0.1910380721092224, |
| "learning_rate": 1.1650066243904585e-05, |
| "loss": 0.0026, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6482705013602799, |
| "grad_norm": 0.4072948396205902, |
| "learning_rate": 1.1619562822969323e-05, |
| "loss": 0.0121, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.6487887031998963, |
| "grad_norm": 0.1868102103471756, |
| "learning_rate": 1.1589083029586906e-05, |
| "loss": 0.0045, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.6493069050395129, |
| "grad_norm": 0.33840611577033997, |
| "learning_rate": 1.1558626949691142e-05, |
| "loss": 0.0317, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.6498251068791294, |
| "grad_norm": 0.1256544589996338, |
| "learning_rate": 1.1528194669149006e-05, |
| "loss": 0.0108, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.6503433087187459, |
| "grad_norm": 0.1379401683807373, |
| "learning_rate": 1.1497786273760329e-05, |
| "loss": 0.0073, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.6508615105583625, |
| "grad_norm": 0.21062274277210236, |
| "learning_rate": 1.1467401849257643e-05, |
| "loss": 0.0141, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.651379712397979, |
| "grad_norm": 0.20837582647800446, |
| "learning_rate": 1.1437041481305877e-05, |
| "loss": 0.0132, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.6518979142375956, |
| "grad_norm": 0.3153347074985504, |
| "learning_rate": 1.1406705255502151e-05, |
| "loss": 0.0095, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.6524161160772121, |
| "grad_norm": 0.18465188145637512, |
| "learning_rate": 1.137639325737549e-05, |
| "loss": 0.0068, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.6529343179168287, |
| "grad_norm": 0.1789904236793518, |
| "learning_rate": 1.1346105572386646e-05, |
| "loss": 0.0091, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.6534525197564451, |
| "grad_norm": 0.10834602266550064, |
| "learning_rate": 1.13158422859278e-05, |
| "loss": 0.0048, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.6539707215960616, |
| "grad_norm": 0.1637331247329712, |
| "learning_rate": 1.1285603483322362e-05, |
| "loss": 0.0077, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.6544889234356782, |
| "grad_norm": 0.3413477838039398, |
| "learning_rate": 1.1255389249824696e-05, |
| "loss": 0.0208, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.6550071252752947, |
| "grad_norm": 0.1726699322462082, |
| "learning_rate": 1.1225199670619899e-05, |
| "loss": 0.0051, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.6555253271149113, |
| "grad_norm": 0.38212427496910095, |
| "learning_rate": 1.1195034830823583e-05, |
| "loss": 0.0242, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.6560435289545278, |
| "grad_norm": 0.2110847383737564, |
| "learning_rate": 1.116489481548157e-05, |
| "loss": 0.0113, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.6565617307941444, |
| "grad_norm": 0.27684441208839417, |
| "learning_rate": 1.1134779709569732e-05, |
| "loss": 0.0153, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.6570799326337609, |
| "grad_norm": 0.7447668313980103, |
| "learning_rate": 1.1104689597993677e-05, |
| "loss": 0.0215, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.6575981344733773, |
| "grad_norm": 0.281093955039978, |
| "learning_rate": 1.107462456558857e-05, |
| "loss": 0.0203, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.6581163363129939, |
| "grad_norm": 0.39335325360298157, |
| "learning_rate": 1.1044584697118868e-05, |
| "loss": 0.0326, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.6586345381526104, |
| "grad_norm": 0.2588825523853302, |
| "learning_rate": 1.1014570077278057e-05, |
| "loss": 0.0261, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.659152739992227, |
| "grad_norm": 0.19893081486225128, |
| "learning_rate": 1.0984580790688463e-05, |
| "loss": 0.0097, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.6596709418318435, |
| "grad_norm": 0.19710324704647064, |
| "learning_rate": 1.0954616921900982e-05, |
| "loss": 0.0202, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.6601891436714601, |
| "grad_norm": 0.5276082158088684, |
| "learning_rate": 1.0924678555394852e-05, |
| "loss": 0.0363, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.6607073455110766, |
| "grad_norm": 0.11454619467258453, |
| "learning_rate": 1.0894765775577385e-05, |
| "loss": 0.0064, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.661225547350693, |
| "grad_norm": 0.20593585073947906, |
| "learning_rate": 1.0864878666783789e-05, |
| "loss": 0.009, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.6617437491903096, |
| "grad_norm": 0.23478110134601593, |
| "learning_rate": 1.0835017313276874e-05, |
| "loss": 0.0177, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.6622619510299261, |
| "grad_norm": 0.4277084171772003, |
| "learning_rate": 1.0805181799246856e-05, |
| "loss": 0.0237, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.6627801528695427, |
| "grad_norm": 0.3500283658504486, |
| "learning_rate": 1.0775372208811067e-05, |
| "loss": 0.0214, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.6632983547091592, |
| "grad_norm": 0.23808035254478455, |
| "learning_rate": 1.074558862601378e-05, |
| "loss": 0.024, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6638165565487757, |
| "grad_norm": 0.2519163489341736, |
| "learning_rate": 1.0715831134825936e-05, |
| "loss": 0.0121, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.6643347583883923, |
| "grad_norm": 0.2995316982269287, |
| "learning_rate": 1.068609981914492e-05, |
| "loss": 0.0145, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.6648529602280088, |
| "grad_norm": 0.3232516050338745, |
| "learning_rate": 1.065639476279429e-05, |
| "loss": 0.0092, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.6653711620676254, |
| "grad_norm": 0.3077804744243622, |
| "learning_rate": 1.06267160495236e-05, |
| "loss": 0.0111, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.6658893639072418, |
| "grad_norm": 0.3714086413383484, |
| "learning_rate": 1.059706376300813e-05, |
| "loss": 0.0085, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.6664075657468584, |
| "grad_norm": 0.17870378494262695, |
| "learning_rate": 1.0567437986848635e-05, |
| "loss": 0.0105, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.6669257675864749, |
| "grad_norm": 0.15498405694961548, |
| "learning_rate": 1.0537838804571145e-05, |
| "loss": 0.0071, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.6674439694260914, |
| "grad_norm": 0.2603953778743744, |
| "learning_rate": 1.05082662996267e-05, |
| "loss": 0.0155, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.667962171265708, |
| "grad_norm": 0.2905248999595642, |
| "learning_rate": 1.0478720555391131e-05, |
| "loss": 0.0151, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.6684803731053245, |
| "grad_norm": 0.2210564762353897, |
| "learning_rate": 1.0449201655164836e-05, |
| "loss": 0.0097, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.6689985749449411, |
| "grad_norm": 0.2719863951206207, |
| "learning_rate": 1.0419709682172495e-05, |
| "loss": 0.0161, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.6695167767845576, |
| "grad_norm": 0.2112817019224167, |
| "learning_rate": 1.0390244719562901e-05, |
| "loss": 0.0126, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.6700349786241742, |
| "grad_norm": 0.12838172912597656, |
| "learning_rate": 1.0360806850408684e-05, |
| "loss": 0.0069, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.6705531804637906, |
| "grad_norm": 0.1306982785463333, |
| "learning_rate": 1.0331396157706098e-05, |
| "loss": 0.0044, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.6710713823034071, |
| "grad_norm": 0.25365006923675537, |
| "learning_rate": 1.0302012724374748e-05, |
| "loss": 0.0144, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.6715895841430237, |
| "grad_norm": 0.18223069608211517, |
| "learning_rate": 1.0272656633257412e-05, |
| "loss": 0.0105, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.6721077859826402, |
| "grad_norm": 0.3096601068973541, |
| "learning_rate": 1.0243327967119772e-05, |
| "loss": 0.0104, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.6726259878222568, |
| "grad_norm": 0.10256693512201309, |
| "learning_rate": 1.0214026808650197e-05, |
| "loss": 0.0048, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.6731441896618733, |
| "grad_norm": 0.2325192242860794, |
| "learning_rate": 1.0184753240459475e-05, |
| "loss": 0.02, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.6736623915014899, |
| "grad_norm": 0.1683931052684784, |
| "learning_rate": 1.0155507345080635e-05, |
| "loss": 0.0054, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6741805933411064, |
| "grad_norm": 0.14104929566383362, |
| "learning_rate": 1.0126289204968673e-05, |
| "loss": 0.0085, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.6746987951807228, |
| "grad_norm": 0.18550588190555573, |
| "learning_rate": 1.0097098902500346e-05, |
| "loss": 0.0135, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.6752169970203394, |
| "grad_norm": 0.1749538630247116, |
| "learning_rate": 1.0067936519973901e-05, |
| "loss": 0.0151, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.6757351988599559, |
| "grad_norm": 0.12418388575315475, |
| "learning_rate": 1.0038802139608897e-05, |
| "loss": 0.0039, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.6762534006995725, |
| "grad_norm": 0.3040088713169098, |
| "learning_rate": 1.0009695843545914e-05, |
| "loss": 0.0169, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.676771602539189, |
| "grad_norm": 0.1392335146665573, |
| "learning_rate": 9.980617713846382e-06, |
| "loss": 0.0053, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.6772898043788056, |
| "grad_norm": 0.19270426034927368, |
| "learning_rate": 9.951567832492308e-06, |
| "loss": 0.0063, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.6778080062184221, |
| "grad_norm": 1.204883098602295, |
| "learning_rate": 9.922546281386044e-06, |
| "loss": 0.0185, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.6783262080580386, |
| "grad_norm": 0.30739647150039673, |
| "learning_rate": 9.893553142350083e-06, |
| "loss": 0.0091, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.6788444098976552, |
| "grad_norm": 0.32659727334976196, |
| "learning_rate": 9.864588497126812e-06, |
| "loss": 0.0201, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.6793626117372716, |
| "grad_norm": 0.4216388165950775, |
| "learning_rate": 9.83565242737829e-06, |
| "loss": 0.0188, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.6798808135768882, |
| "grad_norm": 0.4315127730369568, |
| "learning_rate": 9.80674501468599e-06, |
| "loss": 0.0079, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.6803990154165047, |
| "grad_norm": 0.7377837300300598, |
| "learning_rate": 9.777866340550606e-06, |
| "loss": 0.024, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.6809172172561213, |
| "grad_norm": 0.10035476833581924, |
| "learning_rate": 9.749016486391805e-06, |
| "loss": 0.0041, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.6814354190957378, |
| "grad_norm": 0.18348383903503418, |
| "learning_rate": 9.720195533548012e-06, |
| "loss": 0.0143, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.6819536209353543, |
| "grad_norm": 0.274054616689682, |
| "learning_rate": 9.691403563276132e-06, |
| "loss": 0.0084, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.6824718227749709, |
| "grad_norm": 0.3879346251487732, |
| "learning_rate": 9.662640656751396e-06, |
| "loss": 0.0154, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.6829900246145874, |
| "grad_norm": 0.6137412190437317, |
| "learning_rate": 9.633906895067075e-06, |
| "loss": 0.0083, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.683508226454204, |
| "grad_norm": 0.2440262883901596, |
| "learning_rate": 9.605202359234284e-06, |
| "loss": 0.0166, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.6840264282938204, |
| "grad_norm": 0.1609223335981369, |
| "learning_rate": 9.576527130181713e-06, |
| "loss": 0.0113, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.684544630133437, |
| "grad_norm": 0.37860408425331116, |
| "learning_rate": 9.547881288755455e-06, |
| "loss": 0.0137, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.6850628319730535, |
| "grad_norm": 0.0857042595744133, |
| "learning_rate": 9.519264915718717e-06, |
| "loss": 0.0083, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.68558103381267, |
| "grad_norm": 0.24749597907066345, |
| "learning_rate": 9.490678091751665e-06, |
| "loss": 0.0295, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.6860992356522866, |
| "grad_norm": 0.22808784246444702, |
| "learning_rate": 9.462120897451108e-06, |
| "loss": 0.0137, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.6866174374919031, |
| "grad_norm": 0.46637922525405884, |
| "learning_rate": 9.433593413330355e-06, |
| "loss": 0.0244, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.6871356393315197, |
| "grad_norm": 0.349376916885376, |
| "learning_rate": 9.405095719818938e-06, |
| "loss": 0.0121, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.6876538411711361, |
| "grad_norm": 0.5376272797584534, |
| "learning_rate": 9.3766278972624e-06, |
| "loss": 0.0246, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.6881720430107527, |
| "grad_norm": 0.47698912024497986, |
| "learning_rate": 9.348190025922055e-06, |
| "loss": 0.0269, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.6886902448503692, |
| "grad_norm": 0.24001258611679077, |
| "learning_rate": 9.31978218597479e-06, |
| "loss": 0.008, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.6892084466899857, |
| "grad_norm": 0.14646369218826294, |
| "learning_rate": 9.291404457512813e-06, |
| "loss": 0.0119, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6897266485296023, |
| "grad_norm": 0.5990729331970215, |
| "learning_rate": 9.26305692054345e-06, |
| "loss": 0.0223, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.6902448503692188, |
| "grad_norm": 1.1851770877838135, |
| "learning_rate": 9.234739654988879e-06, |
| "loss": 0.0298, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.6907630522088354, |
| "grad_norm": 0.5354000329971313, |
| "learning_rate": 9.206452740685954e-06, |
| "loss": 0.0333, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.6912812540484519, |
| "grad_norm": 0.6018664240837097, |
| "learning_rate": 9.178196257385952e-06, |
| "loss": 0.0234, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.6917994558880683, |
| "grad_norm": 0.23644517362117767, |
| "learning_rate": 9.14997028475436e-06, |
| "loss": 0.0076, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.6923176577276849, |
| "grad_norm": 0.14124254882335663, |
| "learning_rate": 9.121774902370628e-06, |
| "loss": 0.0104, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.6928358595673014, |
| "grad_norm": 0.37195298075675964, |
| "learning_rate": 9.09361018972797e-06, |
| "loss": 0.0303, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.693354061406918, |
| "grad_norm": 0.24142932891845703, |
| "learning_rate": 9.065476226233143e-06, |
| "loss": 0.0097, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.6938722632465345, |
| "grad_norm": 0.17429199814796448, |
| "learning_rate": 9.037373091206182e-06, |
| "loss": 0.0134, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.6943904650861511, |
| "grad_norm": 0.17789122462272644, |
| "learning_rate": 9.009300863880236e-06, |
| "loss": 0.0131, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6949086669257676, |
| "grad_norm": 0.1673128455877304, |
| "learning_rate": 8.981259623401288e-06, |
| "loss": 0.0164, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.6954268687653841, |
| "grad_norm": 0.20929870009422302, |
| "learning_rate": 8.953249448827971e-06, |
| "loss": 0.0098, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.6959450706050007, |
| "grad_norm": 0.1541186422109604, |
| "learning_rate": 8.925270419131342e-06, |
| "loss": 0.0134, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.6964632724446171, |
| "grad_norm": 0.2542741596698761, |
| "learning_rate": 8.89732261319462e-06, |
| "loss": 0.0255, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.6969814742842337, |
| "grad_norm": 0.20983968675136566, |
| "learning_rate": 8.869406109813017e-06, |
| "loss": 0.0146, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6974996761238502, |
| "grad_norm": 0.3096301555633545, |
| "learning_rate": 8.841520987693486e-06, |
| "loss": 0.014, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.6980178779634668, |
| "grad_norm": 0.20430825650691986, |
| "learning_rate": 8.813667325454507e-06, |
| "loss": 0.0156, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.6985360798030833, |
| "grad_norm": 0.14121589064598083, |
| "learning_rate": 8.785845201625844e-06, |
| "loss": 0.0077, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.6990542816426998, |
| "grad_norm": 0.11843699216842651, |
| "learning_rate": 8.758054694648368e-06, |
| "loss": 0.0126, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.6995724834823164, |
| "grad_norm": 0.32668405771255493, |
| "learning_rate": 8.730295882873798e-06, |
| "loss": 0.012, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7000906853219329, |
| "grad_norm": 0.2675720751285553, |
| "learning_rate": 8.702568844564498e-06, |
| "loss": 0.0244, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.7006088871615495, |
| "grad_norm": 0.15671458840370178, |
| "learning_rate": 8.674873657893236e-06, |
| "loss": 0.01, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.7011270890011659, |
| "grad_norm": 0.10088402777910233, |
| "learning_rate": 8.647210400942998e-06, |
| "loss": 0.0052, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.7016452908407825, |
| "grad_norm": 0.5230119824409485, |
| "learning_rate": 8.619579151706735e-06, |
| "loss": 0.0263, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.702163492680399, |
| "grad_norm": 0.1367189586162567, |
| "learning_rate": 8.591979988087173e-06, |
| "loss": 0.0047, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.7026816945200155, |
| "grad_norm": 0.2677091658115387, |
| "learning_rate": 8.564412987896549e-06, |
| "loss": 0.0257, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.7031998963596321, |
| "grad_norm": 0.5703830122947693, |
| "learning_rate": 8.536878228856447e-06, |
| "loss": 0.0145, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.7037180981992486, |
| "grad_norm": 0.2206091731786728, |
| "learning_rate": 8.509375788597528e-06, |
| "loss": 0.0072, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.7042363000388652, |
| "grad_norm": 0.19818182289600372, |
| "learning_rate": 8.48190574465936e-06, |
| "loss": 0.0111, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.7047545018784817, |
| "grad_norm": 0.10023718327283859, |
| "learning_rate": 8.454468174490145e-06, |
| "loss": 0.0115, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7052727037180982, |
| "grad_norm": 0.14288294315338135, |
| "learning_rate": 8.42706315544655e-06, |
| "loss": 0.0099, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.7057909055577147, |
| "grad_norm": 0.2115078568458557, |
| "learning_rate": 8.399690764793464e-06, |
| "loss": 0.0116, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.7063091073973312, |
| "grad_norm": 0.17733272910118103, |
| "learning_rate": 8.372351079703788e-06, |
| "loss": 0.0157, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.7068273092369478, |
| "grad_norm": 0.08629205077886581, |
| "learning_rate": 8.3450441772582e-06, |
| "loss": 0.0033, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.7073455110765643, |
| "grad_norm": 0.1436304897069931, |
| "learning_rate": 8.317770134444962e-06, |
| "loss": 0.018, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.7078637129161809, |
| "grad_norm": 0.27016061544418335, |
| "learning_rate": 8.290529028159696e-06, |
| "loss": 0.0254, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.7083819147557974, |
| "grad_norm": 0.2512115240097046, |
| "learning_rate": 8.26332093520516e-06, |
| "loss": 0.0224, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.708900116595414, |
| "grad_norm": 0.057625047862529755, |
| "learning_rate": 8.236145932291022e-06, |
| "loss": 0.0022, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.7094183184350304, |
| "grad_norm": 0.10072803497314453, |
| "learning_rate": 8.20900409603368e-06, |
| "loss": 0.0097, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.7099365202746469, |
| "grad_norm": 0.1774268001317978, |
| "learning_rate": 8.181895502956006e-06, |
| "loss": 0.0207, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7104547221142635, |
| "grad_norm": 0.2120877206325531, |
| "learning_rate": 8.154820229487163e-06, |
| "loss": 0.0182, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.71097292395388, |
| "grad_norm": 0.12090042233467102, |
| "learning_rate": 8.127778351962349e-06, |
| "loss": 0.0047, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.7114911257934966, |
| "grad_norm": 0.2732641100883484, |
| "learning_rate": 8.100769946622632e-06, |
| "loss": 0.0178, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.7120093276331131, |
| "grad_norm": 0.12582547962665558, |
| "learning_rate": 8.0737950896147e-06, |
| "loss": 0.0053, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.7125275294727297, |
| "grad_norm": 0.10372617095708847, |
| "learning_rate": 8.046853856990643e-06, |
| "loss": 0.0083, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.7130457313123462, |
| "grad_norm": 0.1782783567905426, |
| "learning_rate": 8.019946324707776e-06, |
| "loss": 0.0167, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.7135639331519626, |
| "grad_norm": 0.2992905378341675, |
| "learning_rate": 7.993072568628378e-06, |
| "loss": 0.0197, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.7140821349915792, |
| "grad_norm": 0.3926917016506195, |
| "learning_rate": 7.966232664519508e-06, |
| "loss": 0.0256, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.7146003368311957, |
| "grad_norm": 0.1238306313753128, |
| "learning_rate": 7.9394266880528e-06, |
| "loss": 0.0075, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.7151185386708123, |
| "grad_norm": 0.1858736127614975, |
| "learning_rate": 7.912654714804201e-06, |
| "loss": 0.0112, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7156367405104288, |
| "grad_norm": 0.08168400824069977, |
| "learning_rate": 7.885916820253818e-06, |
| "loss": 0.005, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.7161549423500454, |
| "grad_norm": 0.1177031546831131, |
| "learning_rate": 7.859213079785665e-06, |
| "loss": 0.0069, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.7166731441896619, |
| "grad_norm": 0.2270517796278, |
| "learning_rate": 7.832543568687474e-06, |
| "loss": 0.0127, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.7171913460292784, |
| "grad_norm": 0.12286113202571869, |
| "learning_rate": 7.805908362150449e-06, |
| "loss": 0.007, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.717709547868895, |
| "grad_norm": 0.2528562843799591, |
| "learning_rate": 7.779307535269099e-06, |
| "loss": 0.0126, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.7182277497085114, |
| "grad_norm": 0.289711594581604, |
| "learning_rate": 7.752741163040996e-06, |
| "loss": 0.0229, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.718745951548128, |
| "grad_norm": 0.24089190363883972, |
| "learning_rate": 7.726209320366575e-06, |
| "loss": 0.009, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.7192641533877445, |
| "grad_norm": 0.24581708014011383, |
| "learning_rate": 7.699712082048908e-06, |
| "loss": 0.013, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.719782355227361, |
| "grad_norm": 0.19005197286605835, |
| "learning_rate": 7.67324952279352e-06, |
| "loss": 0.0153, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.7203005570669776, |
| "grad_norm": 0.19025154411792755, |
| "learning_rate": 7.646821717208153e-06, |
| "loss": 0.0094, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7208187589065941, |
| "grad_norm": 0.325736403465271, |
| "learning_rate": 7.620428739802575e-06, |
| "loss": 0.0228, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.7213369607462107, |
| "grad_norm": 0.1527811884880066, |
| "learning_rate": 7.594070664988338e-06, |
| "loss": 0.0101, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.7218551625858272, |
| "grad_norm": 0.2867499589920044, |
| "learning_rate": 7.567747567078625e-06, |
| "loss": 0.0218, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.7223733644254438, |
| "grad_norm": 0.09495671093463898, |
| "learning_rate": 7.54145952028797e-06, |
| "loss": 0.0051, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.7228915662650602, |
| "grad_norm": 0.376874178647995, |
| "learning_rate": 7.515206598732119e-06, |
| "loss": 0.0132, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.7234097681046767, |
| "grad_norm": 0.3421163558959961, |
| "learning_rate": 7.488988876427754e-06, |
| "loss": 0.0243, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.7239279699442933, |
| "grad_norm": 0.27136725187301636, |
| "learning_rate": 7.462806427292342e-06, |
| "loss": 0.0154, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.7244461717839098, |
| "grad_norm": 0.4345359802246094, |
| "learning_rate": 7.436659325143893e-06, |
| "loss": 0.0247, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.7249643736235264, |
| "grad_norm": 0.13415028154850006, |
| "learning_rate": 7.41054764370077e-06, |
| "loss": 0.0079, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.7254825754631429, |
| "grad_norm": 0.3060521185398102, |
| "learning_rate": 7.384471456581448e-06, |
| "loss": 0.0308, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7260007773027595, |
| "grad_norm": 0.13133402168750763, |
| "learning_rate": 7.358430837304353e-06, |
| "loss": 0.0083, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.726518979142376, |
| "grad_norm": 0.168381467461586, |
| "learning_rate": 7.332425859287624e-06, |
| "loss": 0.0075, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.7270371809819924, |
| "grad_norm": 0.12512297928333282, |
| "learning_rate": 7.306456595848923e-06, |
| "loss": 0.0066, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.727555382821609, |
| "grad_norm": 0.1732812225818634, |
| "learning_rate": 7.280523120205196e-06, |
| "loss": 0.0146, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.7280735846612255, |
| "grad_norm": 0.27395644783973694, |
| "learning_rate": 7.254625505472513e-06, |
| "loss": 0.0174, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.7285917865008421, |
| "grad_norm": 0.24181146919727325, |
| "learning_rate": 7.228763824665832e-06, |
| "loss": 0.0203, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.7291099883404586, |
| "grad_norm": 0.32806146144866943, |
| "learning_rate": 7.202938150698804e-06, |
| "loss": 0.0181, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.7296281901800752, |
| "grad_norm": 0.12564238905906677, |
| "learning_rate": 7.1771485563835465e-06, |
| "loss": 0.0094, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.7301463920196917, |
| "grad_norm": 0.3716842532157898, |
| "learning_rate": 7.151395114430473e-06, |
| "loss": 0.025, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.7306645938593082, |
| "grad_norm": 0.06230960413813591, |
| "learning_rate": 7.1256778974480686e-06, |
| "loss": 0.0041, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.7311827956989247, |
| "grad_norm": 0.32549139857292175, |
| "learning_rate": 7.099996977942673e-06, |
| "loss": 0.0137, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.7317009975385412, |
| "grad_norm": 0.13841503858566284, |
| "learning_rate": 7.074352428318312e-06, |
| "loss": 0.0106, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.7322191993781578, |
| "grad_norm": 0.12878160178661346, |
| "learning_rate": 7.048744320876444e-06, |
| "loss": 0.0053, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.7327374012177743, |
| "grad_norm": 0.14967595040798187, |
| "learning_rate": 7.0231727278158035e-06, |
| "loss": 0.0051, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.7332556030573909, |
| "grad_norm": 0.16011378169059753, |
| "learning_rate": 6.99763772123218e-06, |
| "loss": 0.0055, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.7337738048970074, |
| "grad_norm": 0.29788485169410706, |
| "learning_rate": 6.97213937311819e-06, |
| "loss": 0.0221, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.7342920067366239, |
| "grad_norm": 0.33558785915374756, |
| "learning_rate": 6.946677755363116e-06, |
| "loss": 0.0087, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.7348102085762405, |
| "grad_norm": 0.22806207835674286, |
| "learning_rate": 6.921252939752681e-06, |
| "loss": 0.0257, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.735328410415857, |
| "grad_norm": 0.24824737012386322, |
| "learning_rate": 6.895864997968851e-06, |
| "loss": 0.0143, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.7358466122554735, |
| "grad_norm": 0.1711535006761551, |
| "learning_rate": 6.8705140015896145e-06, |
| "loss": 0.0099, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.73636481409509, |
| "grad_norm": 0.19238120317459106, |
| "learning_rate": 6.8452000220888135e-06, |
| "loss": 0.0104, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.7368830159347066, |
| "grad_norm": 0.21341420710086823, |
| "learning_rate": 6.819923130835926e-06, |
| "loss": 0.0266, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.7374012177743231, |
| "grad_norm": 0.23991352319717407, |
| "learning_rate": 6.79468339909586e-06, |
| "loss": 0.0159, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.7379194196139396, |
| "grad_norm": 0.15172316133975983, |
| "learning_rate": 6.769480898028751e-06, |
| "loss": 0.0123, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.7384376214535562, |
| "grad_norm": 0.11418157070875168, |
| "learning_rate": 6.744315698689774e-06, |
| "loss": 0.0089, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.7389558232931727, |
| "grad_norm": 0.1747230738401413, |
| "learning_rate": 6.719187872028938e-06, |
| "loss": 0.0148, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.7394740251327893, |
| "grad_norm": 0.6717311143875122, |
| "learning_rate": 6.694097488890889e-06, |
| "loss": 0.0244, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.7399922269724057, |
| "grad_norm": 0.27945640683174133, |
| "learning_rate": 6.669044620014691e-06, |
| "loss": 0.0115, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.7405104288120223, |
| "grad_norm": 0.13768765330314636, |
| "learning_rate": 6.644029336033657e-06, |
| "loss": 0.0123, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.7410286306516388, |
| "grad_norm": 0.19821152091026306, |
| "learning_rate": 6.6190517074751145e-06, |
| "loss": 0.0162, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.7415468324912553, |
| "grad_norm": 0.2635100781917572, |
| "learning_rate": 6.594111804760257e-06, |
| "loss": 0.0164, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.7420650343308719, |
| "grad_norm": 0.16833581030368805, |
| "learning_rate": 6.56920969820388e-06, |
| "loss": 0.0117, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.7425832361704884, |
| "grad_norm": 0.12360576540231705, |
| "learning_rate": 6.5443454580142425e-06, |
| "loss": 0.0069, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.743101438010105, |
| "grad_norm": 0.1418430656194687, |
| "learning_rate": 6.519519154292837e-06, |
| "loss": 0.0077, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.7436196398497215, |
| "grad_norm": 0.21706590056419373, |
| "learning_rate": 6.494730857034202e-06, |
| "loss": 0.0152, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.7441378416893379, |
| "grad_norm": 0.2138921618461609, |
| "learning_rate": 6.46998063612571e-06, |
| "loss": 0.0142, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.7446560435289545, |
| "grad_norm": 0.37199369072914124, |
| "learning_rate": 6.445268561347393e-06, |
| "loss": 0.0298, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.745174245368571, |
| "grad_norm": 0.13695186376571655, |
| "learning_rate": 6.4205947023717344e-06, |
| "loss": 0.0057, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.7456924472081876, |
| "grad_norm": 0.19427013397216797, |
| "learning_rate": 6.395959128763476e-06, |
| "loss": 0.0125, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.7462106490478041, |
| "grad_norm": 0.3446883261203766, |
| "learning_rate": 6.371361909979401e-06, |
| "loss": 0.0246, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.7467288508874207, |
| "grad_norm": 0.14575010538101196, |
| "learning_rate": 6.3468031153681785e-06, |
| "loss": 0.0164, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.7472470527270372, |
| "grad_norm": 0.4180688261985779, |
| "learning_rate": 6.3222828141701335e-06, |
| "loss": 0.0192, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.7477652545666537, |
| "grad_norm": 0.3175865709781647, |
| "learning_rate": 6.297801075517074e-06, |
| "loss": 0.0189, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.7482834564062703, |
| "grad_norm": 0.16741381585597992, |
| "learning_rate": 6.2733579684320655e-06, |
| "loss": 0.0105, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.7488016582458867, |
| "grad_norm": 0.344389408826828, |
| "learning_rate": 6.248953561829276e-06, |
| "loss": 0.0225, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.7493198600855033, |
| "grad_norm": 0.31048911809921265, |
| "learning_rate": 6.224587924513763e-06, |
| "loss": 0.0107, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.7498380619251198, |
| "grad_norm": 0.3725675344467163, |
| "learning_rate": 6.200261125181262e-06, |
| "loss": 0.023, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.7503562637647364, |
| "grad_norm": 0.19193294644355774, |
| "learning_rate": 6.175973232418029e-06, |
| "loss": 0.0183, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.7508744656043529, |
| "grad_norm": 0.28560853004455566, |
| "learning_rate": 6.151724314700609e-06, |
| "loss": 0.0095, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.7513926674439694, |
| "grad_norm": 0.18966034054756165, |
| "learning_rate": 6.127514440395676e-06, |
| "loss": 0.0159, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.751910869283586, |
| "grad_norm": 0.13293716311454773, |
| "learning_rate": 6.103343677759834e-06, |
| "loss": 0.005, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.7524290711232025, |
| "grad_norm": 0.47814297676086426, |
| "learning_rate": 6.0792120949393885e-06, |
| "loss": 0.0251, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.752947272962819, |
| "grad_norm": 0.2816180884838104, |
| "learning_rate": 6.055119759970205e-06, |
| "loss": 0.0161, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.7534654748024355, |
| "grad_norm": 0.45219457149505615, |
| "learning_rate": 6.031066740777491e-06, |
| "loss": 0.0177, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.7539836766420521, |
| "grad_norm": 0.14777222275733948, |
| "learning_rate": 6.007053105175613e-06, |
| "loss": 0.0073, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.7545018784816686, |
| "grad_norm": 0.21083438396453857, |
| "learning_rate": 5.983078920867879e-06, |
| "loss": 0.0141, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.7550200803212851, |
| "grad_norm": 0.22181129455566406, |
| "learning_rate": 5.959144255446392e-06, |
| "loss": 0.013, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.7555382821609017, |
| "grad_norm": 0.11170242726802826, |
| "learning_rate": 5.935249176391828e-06, |
| "loss": 0.0052, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.7560564840005182, |
| "grad_norm": 0.21665945649147034, |
| "learning_rate": 5.911393751073262e-06, |
| "loss": 0.0115, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.7565746858401348, |
| "grad_norm": 0.22410617768764496, |
| "learning_rate": 5.8875780467479525e-06, |
| "loss": 0.0074, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.7570928876797512, |
| "grad_norm": 0.16454528272151947, |
| "learning_rate": 5.863802130561183e-06, |
| "loss": 0.0078, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.7576110895193678, |
| "grad_norm": 0.1663055717945099, |
| "learning_rate": 5.840066069546062e-06, |
| "loss": 0.0119, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.7581292913589843, |
| "grad_norm": 0.3612779378890991, |
| "learning_rate": 5.816369930623329e-06, |
| "loss": 0.0238, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.7586474931986008, |
| "grad_norm": 0.17329257726669312, |
| "learning_rate": 5.792713780601158e-06, |
| "loss": 0.0107, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.7591656950382174, |
| "grad_norm": 0.201755091547966, |
| "learning_rate": 5.769097686174989e-06, |
| "loss": 0.0097, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.7596838968778339, |
| "grad_norm": 0.26469019055366516, |
| "learning_rate": 5.74552171392734e-06, |
| "loss": 0.0111, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.7602020987174505, |
| "grad_norm": 0.08461549133062363, |
| "learning_rate": 5.721985930327585e-06, |
| "loss": 0.0038, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.760720300557067, |
| "grad_norm": 0.09042751044034958, |
| "learning_rate": 5.698490401731818e-06, |
| "loss": 0.0037, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.7612385023966836, |
| "grad_norm": 0.2408987283706665, |
| "learning_rate": 5.675035194382614e-06, |
| "loss": 0.0135, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.7617567042363, |
| "grad_norm": 0.1702120453119278, |
| "learning_rate": 5.651620374408886e-06, |
| "loss": 0.0184, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.7622749060759165, |
| "grad_norm": 0.25853922963142395, |
| "learning_rate": 5.6282460078256795e-06, |
| "loss": 0.0135, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.7627931079155331, |
| "grad_norm": 0.40889886021614075, |
| "learning_rate": 5.604912160533968e-06, |
| "loss": 0.0143, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.7633113097551496, |
| "grad_norm": 0.35814812779426575, |
| "learning_rate": 5.581618898320511e-06, |
| "loss": 0.0228, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.7638295115947662, |
| "grad_norm": 0.15147720277309418, |
| "learning_rate": 5.558366286857628e-06, |
| "loss": 0.0106, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.7643477134343827, |
| "grad_norm": 0.35429468750953674, |
| "learning_rate": 5.535154391703039e-06, |
| "loss": 0.0231, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.7648659152739993, |
| "grad_norm": 0.28859689831733704, |
| "learning_rate": 5.511983278299655e-06, |
| "loss": 0.011, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.7653841171136158, |
| "grad_norm": 0.22457551956176758, |
| "learning_rate": 5.488853011975421e-06, |
| "loss": 0.0123, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.7659023189532322, |
| "grad_norm": 0.29618382453918457, |
| "learning_rate": 5.465763657943115e-06, |
| "loss": 0.0175, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.7664205207928488, |
| "grad_norm": 0.28490161895751953, |
| "learning_rate": 5.442715281300175e-06, |
| "loss": 0.0155, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.7669387226324653, |
| "grad_norm": 0.2284500151872635, |
| "learning_rate": 5.41970794702849e-06, |
| "loss": 0.0145, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.7674569244720819, |
| "grad_norm": 0.25994613766670227, |
| "learning_rate": 5.396741719994252e-06, |
| "loss": 0.0146, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.7679751263116984, |
| "grad_norm": 0.1707499772310257, |
| "learning_rate": 5.373816664947753e-06, |
| "loss": 0.0062, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.768493328151315, |
| "grad_norm": 0.17810523509979248, |
| "learning_rate": 5.3509328465232095e-06, |
| "loss": 0.0183, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.7690115299909315, |
| "grad_norm": 0.1567215621471405, |
| "learning_rate": 5.32809032923856e-06, |
| "loss": 0.0086, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.769529731830548, |
| "grad_norm": 0.11671585589647293, |
| "learning_rate": 5.305289177495323e-06, |
| "loss": 0.0065, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.7700479336701646, |
| "grad_norm": 0.16693814098834991, |
| "learning_rate": 5.282529455578369e-06, |
| "loss": 0.0067, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.770566135509781, |
| "grad_norm": 0.15044854581356049, |
| "learning_rate": 5.259811227655789e-06, |
| "loss": 0.0061, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.7710843373493976, |
| "grad_norm": 0.2685668170452118, |
| "learning_rate": 5.2371345577786555e-06, |
| "loss": 0.0145, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.7716025391890141, |
| "grad_norm": 0.24585674703121185, |
| "learning_rate": 5.214499509880901e-06, |
| "loss": 0.009, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.7721207410286306, |
| "grad_norm": 0.48471522331237793, |
| "learning_rate": 5.191906147779098e-06, |
| "loss": 0.0177, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.7726389428682472, |
| "grad_norm": 0.27829957008361816, |
| "learning_rate": 5.1693545351722974e-06, |
| "loss": 0.0313, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.7731571447078637, |
| "grad_norm": 0.1682707667350769, |
| "learning_rate": 5.146844735641827e-06, |
| "loss": 0.0137, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.7736753465474803, |
| "grad_norm": 0.12730175256729126, |
| "learning_rate": 5.124376812651149e-06, |
| "loss": 0.0079, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.7741935483870968, |
| "grad_norm": 0.2443925142288208, |
| "learning_rate": 5.101950829545646e-06, |
| "loss": 0.0138, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.7747117502267133, |
| "grad_norm": 0.2247922420501709, |
| "learning_rate": 5.079566849552466e-06, |
| "loss": 0.0103, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.7752299520663298, |
| "grad_norm": 0.38228839635849, |
| "learning_rate": 5.057224935780325e-06, |
| "loss": 0.0147, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.7757481539059463, |
| "grad_norm": 0.425642192363739, |
| "learning_rate": 5.034925151219343e-06, |
| "loss": 0.0204, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.7762663557455629, |
| "grad_norm": 0.16054239869117737, |
| "learning_rate": 5.012667558740865e-06, |
| "loss": 0.007, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.7767845575851794, |
| "grad_norm": 0.13907340168952942, |
| "learning_rate": 4.9904522210972836e-06, |
| "loss": 0.0033, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.777302759424796, |
| "grad_norm": 0.32811591029167175, |
| "learning_rate": 4.968279200921844e-06, |
| "loss": 0.01, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7778209612644125, |
| "grad_norm": 0.31322336196899414, |
| "learning_rate": 4.946148560728501e-06, |
| "loss": 0.0197, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.7783391631040291, |
| "grad_norm": 0.25639650225639343, |
| "learning_rate": 4.9240603629117175e-06, |
| "loss": 0.0124, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.7788573649436455, |
| "grad_norm": 0.31371498107910156, |
| "learning_rate": 4.902014669746286e-06, |
| "loss": 0.0152, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.779375566783262, |
| "grad_norm": 0.15151162445545197, |
| "learning_rate": 4.880011543387186e-06, |
| "loss": 0.011, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.7798937686228786, |
| "grad_norm": 0.2916856110095978, |
| "learning_rate": 4.858051045869354e-06, |
| "loss": 0.0136, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.7804119704624951, |
| "grad_norm": 0.25480547547340393, |
| "learning_rate": 4.836133239107567e-06, |
| "loss": 0.0234, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.7809301723021117, |
| "grad_norm": 0.18069693446159363, |
| "learning_rate": 4.814258184896234e-06, |
| "loss": 0.0098, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.7814483741417282, |
| "grad_norm": 0.1400531381368637, |
| "learning_rate": 4.792425944909218e-06, |
| "loss": 0.0077, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.7819665759813448, |
| "grad_norm": 0.27684223651885986, |
| "learning_rate": 4.770636580699681e-06, |
| "loss": 0.0176, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.7824847778209613, |
| "grad_norm": 0.12374227494001389, |
| "learning_rate": 4.748890153699907e-06, |
| "loss": 0.009, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.7830029796605777, |
| "grad_norm": 0.06070549041032791, |
| "learning_rate": 4.7271867252211224e-06, |
| "loss": 0.0047, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.7835211815001943, |
| "grad_norm": 0.2304593175649643, |
| "learning_rate": 4.7055263564533096e-06, |
| "loss": 0.0131, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.7840393833398108, |
| "grad_norm": 0.24694843590259552, |
| "learning_rate": 4.683909108465068e-06, |
| "loss": 0.0104, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.7845575851794274, |
| "grad_norm": 0.22078731656074524, |
| "learning_rate": 4.662335042203417e-06, |
| "loss": 0.0157, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.7850757870190439, |
| "grad_norm": 0.13630007207393646, |
| "learning_rate": 4.640804218493638e-06, |
| "loss": 0.0028, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.7855939888586605, |
| "grad_norm": 0.5278586149215698, |
| "learning_rate": 4.6193166980390755e-06, |
| "loss": 0.0158, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.786112190698277, |
| "grad_norm": 0.08716629445552826, |
| "learning_rate": 4.597872541421007e-06, |
| "loss": 0.0089, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.7866303925378935, |
| "grad_norm": 0.18373537063598633, |
| "learning_rate": 4.5764718090984395e-06, |
| "loss": 0.0065, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.7871485943775101, |
| "grad_norm": 0.3253873586654663, |
| "learning_rate": 4.555114561407965e-06, |
| "loss": 0.0167, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.7876667962171265, |
| "grad_norm": 0.22763191163539886, |
| "learning_rate": 4.533800858563553e-06, |
| "loss": 0.0191, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.7881849980567431, |
| "grad_norm": 0.09696512669324875, |
| "learning_rate": 4.512530760656429e-06, |
| "loss": 0.007, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.7887031998963596, |
| "grad_norm": 0.2907656133174896, |
| "learning_rate": 4.491304327654855e-06, |
| "loss": 0.0091, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.7892214017359762, |
| "grad_norm": 0.3057742714881897, |
| "learning_rate": 4.4701216194040105e-06, |
| "loss": 0.0132, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.7897396035755927, |
| "grad_norm": 0.374776154756546, |
| "learning_rate": 4.448982695625778e-06, |
| "loss": 0.0206, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.7902578054152092, |
| "grad_norm": 0.14000919461250305, |
| "learning_rate": 4.4278876159186064e-06, |
| "loss": 0.013, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.7907760072548258, |
| "grad_norm": 0.2944836914539337, |
| "learning_rate": 4.4068364397573335e-06, |
| "loss": 0.0087, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.7912942090944423, |
| "grad_norm": 0.5514625310897827, |
| "learning_rate": 4.385829226493015e-06, |
| "loss": 0.0183, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.7918124109340589, |
| "grad_norm": 0.29109495878219604, |
| "learning_rate": 4.364866035352749e-06, |
| "loss": 0.0127, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.7923306127736753, |
| "grad_norm": 0.5156393051147461, |
| "learning_rate": 4.343946925439529e-06, |
| "loss": 0.0207, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.7928488146132919, |
| "grad_norm": 0.2541627883911133, |
| "learning_rate": 4.323071955732065e-06, |
| "loss": 0.0173, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.7933670164529084, |
| "grad_norm": 0.2753004729747772, |
| "learning_rate": 4.30224118508463e-06, |
| "loss": 0.018, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.7938852182925249, |
| "grad_norm": 0.15237921476364136, |
| "learning_rate": 4.2814546722268595e-06, |
| "loss": 0.0178, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.7944034201321415, |
| "grad_norm": 0.0733996033668518, |
| "learning_rate": 4.260712475763627e-06, |
| "loss": 0.0069, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.794921621971758, |
| "grad_norm": 0.159640833735466, |
| "learning_rate": 4.240014654174857e-06, |
| "loss": 0.0123, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.7954398238113746, |
| "grad_norm": 0.26743224263191223, |
| "learning_rate": 4.219361265815378e-06, |
| "loss": 0.0073, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.795958025650991, |
| "grad_norm": 0.15401487052440643, |
| "learning_rate": 4.1987523689147155e-06, |
| "loss": 0.0079, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.7964762274906076, |
| "grad_norm": 0.22780843079090118, |
| "learning_rate": 4.178188021576983e-06, |
| "loss": 0.0079, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.7969944293302241, |
| "grad_norm": 0.3683816194534302, |
| "learning_rate": 4.157668281780689e-06, |
| "loss": 0.0155, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.7975126311698406, |
| "grad_norm": 0.27427056431770325, |
| "learning_rate": 4.137193207378561e-06, |
| "loss": 0.0213, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.7980308330094572, |
| "grad_norm": 0.1827021986246109, |
| "learning_rate": 4.116762856097416e-06, |
| "loss": 0.0092, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.7985490348490737, |
| "grad_norm": 0.08515940606594086, |
| "learning_rate": 4.096377285537965e-06, |
| "loss": 0.0036, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.7990672366886903, |
| "grad_norm": 0.23422442376613617, |
| "learning_rate": 4.076036553174678e-06, |
| "loss": 0.0271, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.7995854385283068, |
| "grad_norm": 0.21451188623905182, |
| "learning_rate": 4.05574071635561e-06, |
| "loss": 0.0212, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.8001036403679233, |
| "grad_norm": 0.08211307972669601, |
| "learning_rate": 4.035489832302224e-06, |
| "loss": 0.0045, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.8006218422075398, |
| "grad_norm": 0.2552872896194458, |
| "learning_rate": 4.01528395810926e-06, |
| "loss": 0.0147, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.8011400440471563, |
| "grad_norm": 0.2829188406467438, |
| "learning_rate": 3.995123150744548e-06, |
| "loss": 0.021, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.8016582458867729, |
| "grad_norm": 0.33412739634513855, |
| "learning_rate": 3.975007467048875e-06, |
| "loss": 0.0249, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.8021764477263894, |
| "grad_norm": 0.05663935840129852, |
| "learning_rate": 3.954936963735778e-06, |
| "loss": 0.0023, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.802694649566006, |
| "grad_norm": 0.15037386119365692, |
| "learning_rate": 3.934911697391442e-06, |
| "loss": 0.0047, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.8032128514056225, |
| "grad_norm": 0.09823647886514664, |
| "learning_rate": 3.9149317244745e-06, |
| "loss": 0.0043, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.803731053245239, |
| "grad_norm": 0.1772301197052002, |
| "learning_rate": 3.8949971013158894e-06, |
| "loss": 0.0197, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.8042492550848556, |
| "grad_norm": 0.2572917342185974, |
| "learning_rate": 3.875107884118681e-06, |
| "loss": 0.0237, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.804767456924472, |
| "grad_norm": 0.1350460946559906, |
| "learning_rate": 3.8552641289579405e-06, |
| "loss": 0.0099, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.8052856587640886, |
| "grad_norm": 0.2017035335302353, |
| "learning_rate": 3.835465891780554e-06, |
| "loss": 0.0105, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.8058038606037051, |
| "grad_norm": 0.19643430411815643, |
| "learning_rate": 3.815713228405085e-06, |
| "loss": 0.0115, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.8063220624433217, |
| "grad_norm": 0.1383439600467682, |
| "learning_rate": 3.7960061945215863e-06, |
| "loss": 0.0089, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.8068402642829382, |
| "grad_norm": 0.16496983170509338, |
| "learning_rate": 3.7763448456914886e-06, |
| "loss": 0.0076, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.8073584661225547, |
| "grad_norm": 0.27516958117485046, |
| "learning_rate": 3.756729237347396e-06, |
| "loss": 0.0111, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.8078766679621713, |
| "grad_norm": 0.7920728325843811, |
| "learning_rate": 3.7371594247929776e-06, |
| "loss": 0.0204, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.8083948698017878, |
| "grad_norm": 0.21858440339565277, |
| "learning_rate": 3.7176354632027668e-06, |
| "loss": 0.019, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.8089130716414044, |
| "grad_norm": 0.3572098910808563, |
| "learning_rate": 3.698157407622034e-06, |
| "loss": 0.0384, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.8094312734810208, |
| "grad_norm": 0.23645535111427307, |
| "learning_rate": 3.678725312966629e-06, |
| "loss": 0.01, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.8099494753206374, |
| "grad_norm": 0.222391739487648, |
| "learning_rate": 3.6593392340228183e-06, |
| "loss": 0.0173, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.8104676771602539, |
| "grad_norm": 0.1789429634809494, |
| "learning_rate": 3.6399992254471215e-06, |
| "loss": 0.0097, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.8109858789998704, |
| "grad_norm": 0.37951716780662537, |
| "learning_rate": 3.620705341766184e-06, |
| "loss": 0.0233, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.811504080839487, |
| "grad_norm": 0.17974528670310974, |
| "learning_rate": 3.6014576373765973e-06, |
| "loss": 0.013, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.8120222826791035, |
| "grad_norm": 0.12391126900911331, |
| "learning_rate": 3.582256166544773e-06, |
| "loss": 0.0084, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.8125404845187201, |
| "grad_norm": 0.0891820415854454, |
| "learning_rate": 3.563100983406744e-06, |
| "loss": 0.0033, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.8130586863583366, |
| "grad_norm": 0.06003464758396149, |
| "learning_rate": 3.543992141968069e-06, |
| "loss": 0.0019, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.8135768881979532, |
| "grad_norm": 0.21977417171001434, |
| "learning_rate": 3.5249296961036384e-06, |
| "loss": 0.0143, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.8140950900375696, |
| "grad_norm": 0.12480836361646652, |
| "learning_rate": 3.5059136995575438e-06, |
| "loss": 0.0115, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.8146132918771861, |
| "grad_norm": 0.09526639431715012, |
| "learning_rate": 3.4869442059429084e-06, |
| "loss": 0.0101, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.8151314937168027, |
| "grad_norm": 0.20937477052211761, |
| "learning_rate": 3.4680212687417524e-06, |
| "loss": 0.0083, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.8156496955564192, |
| "grad_norm": 0.16887736320495605, |
| "learning_rate": 3.449144941304847e-06, |
| "loss": 0.0073, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.8161678973960358, |
| "grad_norm": 0.35441237688064575, |
| "learning_rate": 3.43031527685153e-06, |
| "loss": 0.0281, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.8166860992356523, |
| "grad_norm": 0.07251258194446564, |
| "learning_rate": 3.411532328469602e-06, |
| "loss": 0.007, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.8172043010752689, |
| "grad_norm": 0.1627056747674942, |
| "learning_rate": 3.392796149115132e-06, |
| "loss": 0.0053, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.8177225029148854, |
| "grad_norm": 0.19298546016216278, |
| "learning_rate": 3.3741067916123504e-06, |
| "loss": 0.0122, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.8182407047545018, |
| "grad_norm": 0.1412855088710785, |
| "learning_rate": 3.355464308653471e-06, |
| "loss": 0.0062, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.8187589065941184, |
| "grad_norm": 0.461653470993042, |
| "learning_rate": 3.336868752798541e-06, |
| "loss": 0.0097, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8192771084337349, |
| "grad_norm": 0.22594031691551208, |
| "learning_rate": 3.31832017647532e-06, |
| "loss": 0.0142, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.8197953102733515, |
| "grad_norm": 0.23084904253482819, |
| "learning_rate": 3.2998186319791037e-06, |
| "loss": 0.0169, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.820313512112968, |
| "grad_norm": 0.30051642656326294, |
| "learning_rate": 3.2813641714725963e-06, |
| "loss": 0.0183, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.8208317139525846, |
| "grad_norm": 0.184076189994812, |
| "learning_rate": 3.262956846985741e-06, |
| "loss": 0.0139, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.8213499157922011, |
| "grad_norm": 0.14037007093429565, |
| "learning_rate": 3.2445967104156018e-06, |
| "loss": 0.0055, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.8218681176318176, |
| "grad_norm": 0.1952822506427765, |
| "learning_rate": 3.226283813526192e-06, |
| "loss": 0.0115, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.8223863194714341, |
| "grad_norm": 0.2174348533153534, |
| "learning_rate": 3.2080182079483492e-06, |
| "loss": 0.0116, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.8229045213110506, |
| "grad_norm": 0.4994555711746216, |
| "learning_rate": 3.189799945179566e-06, |
| "loss": 0.0226, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.8234227231506672, |
| "grad_norm": 0.30759531259536743, |
| "learning_rate": 3.1716290765838664e-06, |
| "loss": 0.0173, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.8239409249902837, |
| "grad_norm": 0.22034572064876556, |
| "learning_rate": 3.1535056533916554e-06, |
| "loss": 0.0061, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.8244591268299002, |
| "grad_norm": 0.3770354390144348, |
| "learning_rate": 3.13542972669957e-06, |
| "loss": 0.0193, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.8249773286695168, |
| "grad_norm": 0.16963061690330505, |
| "learning_rate": 3.1174013474703255e-06, |
| "loss": 0.0065, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.8254955305091333, |
| "grad_norm": 0.16006392240524292, |
| "learning_rate": 3.0994205665326048e-06, |
| "loss": 0.0169, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.8260137323487499, |
| "grad_norm": 0.2839926481246948, |
| "learning_rate": 3.0814874345808677e-06, |
| "loss": 0.0151, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.8265319341883663, |
| "grad_norm": 0.24182650446891785, |
| "learning_rate": 3.063602002175261e-06, |
| "loss": 0.0152, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.8270501360279829, |
| "grad_norm": 0.291611909866333, |
| "learning_rate": 3.0457643197414244e-06, |
| "loss": 0.0181, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.8275683378675994, |
| "grad_norm": 0.1036018580198288, |
| "learning_rate": 3.027974437570389e-06, |
| "loss": 0.012, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.8280865397072159, |
| "grad_norm": 0.06551776081323624, |
| "learning_rate": 3.010232405818414e-06, |
| "loss": 0.002, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.8286047415468325, |
| "grad_norm": 0.5260184407234192, |
| "learning_rate": 2.9925382745068576e-06, |
| "loss": 0.026, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.829122943386449, |
| "grad_norm": 0.29389244318008423, |
| "learning_rate": 2.9748920935220083e-06, |
| "loss": 0.0122, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8296411452260656, |
| "grad_norm": 0.2470783144235611, |
| "learning_rate": 2.957293912614987e-06, |
| "loss": 0.0063, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.8301593470656821, |
| "grad_norm": 0.2586616277694702, |
| "learning_rate": 2.939743781401576e-06, |
| "loss": 0.0073, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.8306775489052987, |
| "grad_norm": 0.17473287880420685, |
| "learning_rate": 2.9222417493620912e-06, |
| "loss": 0.0159, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.8311957507449151, |
| "grad_norm": 0.10318975895643234, |
| "learning_rate": 2.9047878658412274e-06, |
| "loss": 0.0047, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.8317139525845316, |
| "grad_norm": 0.1832498013973236, |
| "learning_rate": 2.887382180047946e-06, |
| "loss": 0.0069, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.8322321544241482, |
| "grad_norm": 0.5013145208358765, |
| "learning_rate": 2.8700247410553107e-06, |
| "loss": 0.0176, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.8327503562637647, |
| "grad_norm": 0.20341339707374573, |
| "learning_rate": 2.852715597800373e-06, |
| "loss": 0.0095, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.8332685581033813, |
| "grad_norm": 0.15497015416622162, |
| "learning_rate": 2.8354547990839963e-06, |
| "loss": 0.0035, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.8337867599429978, |
| "grad_norm": 0.19624440371990204, |
| "learning_rate": 2.8182423935707692e-06, |
| "loss": 0.0147, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.8343049617826144, |
| "grad_norm": 0.1570410281419754, |
| "learning_rate": 2.8010784297888305e-06, |
| "loss": 0.0096, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.8348231636222309, |
| "grad_norm": 0.34960222244262695, |
| "learning_rate": 2.7839629561297377e-06, |
| "loss": 0.0252, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.8353413654618473, |
| "grad_norm": 0.1798889935016632, |
| "learning_rate": 2.766896020848351e-06, |
| "loss": 0.0065, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.8358595673014639, |
| "grad_norm": 0.17661862075328827, |
| "learning_rate": 2.749877672062664e-06, |
| "loss": 0.0098, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.8363777691410804, |
| "grad_norm": 0.45938044786453247, |
| "learning_rate": 2.732907957753708e-06, |
| "loss": 0.0106, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.836895970980697, |
| "grad_norm": 0.18319405615329742, |
| "learning_rate": 2.715986925765386e-06, |
| "loss": 0.0092, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.8374141728203135, |
| "grad_norm": 0.2548009157180786, |
| "learning_rate": 2.69911462380434e-06, |
| "loss": 0.0113, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.8379323746599301, |
| "grad_norm": 0.27619224786758423, |
| "learning_rate": 2.6822910994398334e-06, |
| "loss": 0.0246, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.8384505764995466, |
| "grad_norm": 0.2859005331993103, |
| "learning_rate": 2.6655164001036073e-06, |
| "loss": 0.0123, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.8389687783391631, |
| "grad_norm": 0.28470805287361145, |
| "learning_rate": 2.6487905730897524e-06, |
| "loss": 0.0129, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.8394869801787797, |
| "grad_norm": 0.3091945946216583, |
| "learning_rate": 2.6321136655545475e-06, |
| "loss": 0.0112, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.8400051820183961, |
| "grad_norm": 0.26233717799186707, |
| "learning_rate": 2.6154857245163755e-06, |
| "loss": 0.014, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.8405233838580127, |
| "grad_norm": 0.255073606967926, |
| "learning_rate": 2.5989067968555514e-06, |
| "loss": 0.0133, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.8410415856976292, |
| "grad_norm": 0.07703150063753128, |
| "learning_rate": 2.5823769293142074e-06, |
| "loss": 0.0047, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.8415597875372458, |
| "grad_norm": 0.23450684547424316, |
| "learning_rate": 2.5658961684961537e-06, |
| "loss": 0.009, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.8420779893768623, |
| "grad_norm": 0.4751213788986206, |
| "learning_rate": 2.5494645608667503e-06, |
| "loss": 0.0219, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.8425961912164788, |
| "grad_norm": 0.2951497733592987, |
| "learning_rate": 2.5330821527527793e-06, |
| "loss": 0.0145, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.8431143930560954, |
| "grad_norm": 0.20706944167613983, |
| "learning_rate": 2.516748990342317e-06, |
| "loss": 0.0064, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.8436325948957119, |
| "grad_norm": 0.3090721666812897, |
| "learning_rate": 2.5004651196845786e-06, |
| "loss": 0.0122, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.8441507967353284, |
| "grad_norm": 0.5408398509025574, |
| "learning_rate": 2.484230586689833e-06, |
| "loss": 0.0273, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.8446689985749449, |
| "grad_norm": 0.12723809480667114, |
| "learning_rate": 2.468045437129223e-06, |
| "loss": 0.0088, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.8451872004145615, |
| "grad_norm": 0.18674789369106293, |
| "learning_rate": 2.4519097166346816e-06, |
| "loss": 0.0092, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.845705402254178, |
| "grad_norm": 0.2017349749803543, |
| "learning_rate": 2.435823470698768e-06, |
| "loss": 0.0109, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.8462236040937945, |
| "grad_norm": 0.17251279950141907, |
| "learning_rate": 2.419786744674564e-06, |
| "loss": 0.0195, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.8467418059334111, |
| "grad_norm": 0.28121671080589294, |
| "learning_rate": 2.4037995837755324e-06, |
| "loss": 0.0079, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.8472600077730276, |
| "grad_norm": 0.403934508562088, |
| "learning_rate": 2.387862033075399e-06, |
| "loss": 0.0136, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.8477782096126442, |
| "grad_norm": 0.28479158878326416, |
| "learning_rate": 2.3719741375080084e-06, |
| "loss": 0.0114, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.8482964114522606, |
| "grad_norm": 0.4967851936817169, |
| "learning_rate": 2.356135941867217e-06, |
| "loss": 0.0238, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.8488146132918772, |
| "grad_norm": 0.10027792304754257, |
| "learning_rate": 2.3403474908067624e-06, |
| "loss": 0.0028, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.8493328151314937, |
| "grad_norm": 0.20121395587921143, |
| "learning_rate": 2.3246088288401313e-06, |
| "loss": 0.0112, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.8498510169711102, |
| "grad_norm": 0.2108752280473709, |
| "learning_rate": 2.3089200003404265e-06, |
| "loss": 0.0071, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.8503692188107268, |
| "grad_norm": 0.1123914048075676, |
| "learning_rate": 2.2932810495402612e-06, |
| "loss": 0.0064, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.8508874206503433, |
| "grad_norm": 0.24224601686000824, |
| "learning_rate": 2.2776920205316276e-06, |
| "loss": 0.0244, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.8514056224899599, |
| "grad_norm": 0.40744614601135254, |
| "learning_rate": 2.2621529572657685e-06, |
| "loss": 0.0208, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.8519238243295764, |
| "grad_norm": 0.28944340348243713, |
| "learning_rate": 2.2466639035530435e-06, |
| "loss": 0.0105, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.8524420261691928, |
| "grad_norm": 0.19422157108783722, |
| "learning_rate": 2.2312249030628297e-06, |
| "loss": 0.0066, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.8529602280088094, |
| "grad_norm": 0.09577490389347076, |
| "learning_rate": 2.2158359993233814e-06, |
| "loss": 0.0051, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.8534784298484259, |
| "grad_norm": 0.16629934310913086, |
| "learning_rate": 2.2004972357217146e-06, |
| "loss": 0.0044, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.8539966316880425, |
| "grad_norm": 0.1245030090212822, |
| "learning_rate": 2.1852086555034747e-06, |
| "loss": 0.0055, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.854514833527659, |
| "grad_norm": 0.35298582911491394, |
| "learning_rate": 2.1699703017728237e-06, |
| "loss": 0.0174, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.8550330353672756, |
| "grad_norm": 0.17353323101997375, |
| "learning_rate": 2.1547822174923196e-06, |
| "loss": 0.0065, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.8555512372068921, |
| "grad_norm": 0.2672171890735626, |
| "learning_rate": 2.1396444454827956e-06, |
| "loss": 0.0131, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.8560694390465086, |
| "grad_norm": 0.1073671206831932, |
| "learning_rate": 2.1245570284232263e-06, |
| "loss": 0.0041, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.8565876408861252, |
| "grad_norm": 0.11136187613010406, |
| "learning_rate": 2.1095200088506227e-06, |
| "loss": 0.0143, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.8571058427257416, |
| "grad_norm": 0.15751762688159943, |
| "learning_rate": 2.0945334291599084e-06, |
| "loss": 0.0062, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.8576240445653582, |
| "grad_norm": 0.5879334211349487, |
| "learning_rate": 2.079597331603798e-06, |
| "loss": 0.0172, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.8581422464049747, |
| "grad_norm": 0.24367937445640564, |
| "learning_rate": 2.064711758292668e-06, |
| "loss": 0.0142, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.8586604482445913, |
| "grad_norm": 0.14427150785923004, |
| "learning_rate": 2.049876751194464e-06, |
| "loss": 0.0114, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.8591786500842078, |
| "grad_norm": 0.355831116437912, |
| "learning_rate": 2.035092352134558e-06, |
| "loss": 0.0218, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.8596968519238243, |
| "grad_norm": 0.24205715954303741, |
| "learning_rate": 2.0203586027956423e-06, |
| "loss": 0.0144, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.8602150537634409, |
| "grad_norm": 0.16865667700767517, |
| "learning_rate": 2.0056755447176003e-06, |
| "loss": 0.008, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.8607332556030574, |
| "grad_norm": 0.35851263999938965, |
| "learning_rate": 1.9910432192974105e-06, |
| "loss": 0.0191, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.861251457442674, |
| "grad_norm": 0.3120400905609131, |
| "learning_rate": 1.976461667789009e-06, |
| "loss": 0.0169, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.8617696592822904, |
| "grad_norm": 0.23192231357097626, |
| "learning_rate": 1.961930931303191e-06, |
| "loss": 0.013, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.862287861121907, |
| "grad_norm": 0.2518763840198517, |
| "learning_rate": 1.9474510508074694e-06, |
| "loss": 0.0118, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.8628060629615235, |
| "grad_norm": 0.17504754662513733, |
| "learning_rate": 1.933022067125987e-06, |
| "loss": 0.0097, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.86332426480114, |
| "grad_norm": 0.2840370535850525, |
| "learning_rate": 1.918644020939393e-06, |
| "loss": 0.0141, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.8638424666407566, |
| "grad_norm": 0.4161594808101654, |
| "learning_rate": 1.9043169527847106e-06, |
| "loss": 0.0228, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.8643606684803731, |
| "grad_norm": 0.216422900557518, |
| "learning_rate": 1.8900409030552502e-06, |
| "loss": 0.0195, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.8648788703199897, |
| "grad_norm": 0.27762821316719055, |
| "learning_rate": 1.8758159120004737e-06, |
| "loss": 0.0069, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.8653970721596062, |
| "grad_norm": 0.17240680754184723, |
| "learning_rate": 1.8616420197258977e-06, |
| "loss": 0.0051, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.8659152739992227, |
| "grad_norm": 0.23954899609088898, |
| "learning_rate": 1.8475192661929676e-06, |
| "loss": 0.0126, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.8664334758388392, |
| "grad_norm": 0.4306492507457733, |
| "learning_rate": 1.833447691218948e-06, |
| "loss": 0.0106, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.8669516776784557, |
| "grad_norm": 0.31067901849746704, |
| "learning_rate": 1.8194273344768153e-06, |
| "loss": 0.006, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.8674698795180723, |
| "grad_norm": 0.25931859016418457, |
| "learning_rate": 1.8054582354951434e-06, |
| "loss": 0.0171, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.8679880813576888, |
| "grad_norm": 0.27569490671157837, |
| "learning_rate": 1.7915404336579923e-06, |
| "loss": 0.0106, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.8685062831973054, |
| "grad_norm": 0.15181864798069, |
| "learning_rate": 1.7776739682047894e-06, |
| "loss": 0.0108, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.8690244850369219, |
| "grad_norm": 0.1656786948442459, |
| "learning_rate": 1.7638588782302336e-06, |
| "loss": 0.0075, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.8695426868765385, |
| "grad_norm": 0.2415611892938614, |
| "learning_rate": 1.7500952026841766e-06, |
| "loss": 0.0173, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.870060888716155, |
| "grad_norm": 0.13132847845554352, |
| "learning_rate": 1.7363829803715115e-06, |
| "loss": 0.0061, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.8705790905557714, |
| "grad_norm": 0.2118501365184784, |
| "learning_rate": 1.7227222499520601e-06, |
| "loss": 0.008, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.871097292395388, |
| "grad_norm": 0.25103017687797546, |
| "learning_rate": 1.7091130499404807e-06, |
| "loss": 0.0086, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.8716154942350045, |
| "grad_norm": 0.08915001153945923, |
| "learning_rate": 1.6955554187061406e-06, |
| "loss": 0.004, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.8721336960746211, |
| "grad_norm": 0.08841594308614731, |
| "learning_rate": 1.6820493944730266e-06, |
| "loss": 0.0032, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.8726518979142376, |
| "grad_norm": 0.40854230523109436, |
| "learning_rate": 1.668595015319603e-06, |
| "loss": 0.0191, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.8731700997538542, |
| "grad_norm": 0.27266111969947815, |
| "learning_rate": 1.6551923191787555e-06, |
| "loss": 0.0121, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.8736883015934707, |
| "grad_norm": 0.2558603584766388, |
| "learning_rate": 1.6418413438376335e-06, |
| "loss": 0.0098, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.8742065034330871, |
| "grad_norm": 0.14733348786830902, |
| "learning_rate": 1.6285421269375823e-06, |
| "loss": 0.0102, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.8747247052727037, |
| "grad_norm": 0.29257291555404663, |
| "learning_rate": 1.6152947059740087e-06, |
| "loss": 0.0175, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.8752429071123202, |
| "grad_norm": 0.40963825583457947, |
| "learning_rate": 1.6020991182962964e-06, |
| "loss": 0.0256, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.8757611089519368, |
| "grad_norm": 0.2824581265449524, |
| "learning_rate": 1.5889554011076901e-06, |
| "loss": 0.0146, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.8762793107915533, |
| "grad_norm": 0.28801703453063965, |
| "learning_rate": 1.5758635914651922e-06, |
| "loss": 0.016, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.8767975126311699, |
| "grad_norm": 0.17242257297039032, |
| "learning_rate": 1.5628237262794544e-06, |
| "loss": 0.0104, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.8773157144707864, |
| "grad_norm": 0.28166109323501587, |
| "learning_rate": 1.5498358423146797e-06, |
| "loss": 0.0292, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.8778339163104029, |
| "grad_norm": 0.19362136721611023, |
| "learning_rate": 1.5368999761885218e-06, |
| "loss": 0.0158, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.8783521181500195, |
| "grad_norm": 0.19558751583099365, |
| "learning_rate": 1.5240161643719753e-06, |
| "loss": 0.0064, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.8788703199896359, |
| "grad_norm": 0.14374202489852905, |
| "learning_rate": 1.5111844431892643e-06, |
| "loss": 0.0131, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.8793885218292525, |
| "grad_norm": 0.24548299610614777, |
| "learning_rate": 1.4984048488177628e-06, |
| "loss": 0.0218, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.879906723668869, |
| "grad_norm": 0.2641260325908661, |
| "learning_rate": 1.485677417287874e-06, |
| "loss": 0.0085, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.8804249255084855, |
| "grad_norm": 0.24745555222034454, |
| "learning_rate": 1.4730021844829411e-06, |
| "loss": 0.0098, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.8809431273481021, |
| "grad_norm": 0.3807303309440613, |
| "learning_rate": 1.460379186139127e-06, |
| "loss": 0.0263, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.8814613291877186, |
| "grad_norm": 0.3894922733306885, |
| "learning_rate": 1.4478084578453389e-06, |
| "loss": 0.0162, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.8819795310273352, |
| "grad_norm": 0.2722943127155304, |
| "learning_rate": 1.435290035043111e-06, |
| "loss": 0.0172, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.8824977328669517, |
| "grad_norm": 0.23959621787071228, |
| "learning_rate": 1.4228239530265042e-06, |
| "loss": 0.0105, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.8830159347065683, |
| "grad_norm": 0.20592434704303741, |
| "learning_rate": 1.4104102469420222e-06, |
| "loss": 0.0077, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.8835341365461847, |
| "grad_norm": 0.37758317589759827, |
| "learning_rate": 1.3980489517884865e-06, |
| "loss": 0.0138, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.8840523383858012, |
| "grad_norm": 0.047654855996370316, |
| "learning_rate": 1.3857401024169682e-06, |
| "loss": 0.0022, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.8845705402254178, |
| "grad_norm": 0.16144023835659027, |
| "learning_rate": 1.373483733530665e-06, |
| "loss": 0.0121, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.8850887420650343, |
| "grad_norm": 0.059519317001104355, |
| "learning_rate": 1.361279879684816e-06, |
| "loss": 0.0018, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.8856069439046509, |
| "grad_norm": 0.36740997433662415, |
| "learning_rate": 1.3491285752865956e-06, |
| "loss": 0.021, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.8861251457442674, |
| "grad_norm": 0.0914311334490776, |
| "learning_rate": 1.3370298545950334e-06, |
| "loss": 0.003, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.886643347583884, |
| "grad_norm": 0.5390928387641907, |
| "learning_rate": 1.3249837517208986e-06, |
| "loss": 0.0099, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.8871615494235005, |
| "grad_norm": 0.1984144002199173, |
| "learning_rate": 1.3129903006266065e-06, |
| "loss": 0.0057, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.8876797512631169, |
| "grad_norm": 0.19295454025268555, |
| "learning_rate": 1.3010495351261333e-06, |
| "loss": 0.0177, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.8881979531027335, |
| "grad_norm": 0.2037198692560196, |
| "learning_rate": 1.2891614888849202e-06, |
| "loss": 0.013, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.88871615494235, |
| "grad_norm": 0.2238919585943222, |
| "learning_rate": 1.2773261954197657e-06, |
| "loss": 0.0143, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.8892343567819666, |
| "grad_norm": 0.6479920148849487, |
| "learning_rate": 1.265543688098736e-06, |
| "loss": 0.0361, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.8897525586215831, |
| "grad_norm": 0.5114318132400513, |
| "learning_rate": 1.2538140001410826e-06, |
| "loss": 0.024, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.8902707604611997, |
| "grad_norm": 0.132893905043602, |
| "learning_rate": 1.2421371646171343e-06, |
| "loss": 0.0051, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.8907889623008162, |
| "grad_norm": 0.18512026965618134, |
| "learning_rate": 1.2305132144482123e-06, |
| "loss": 0.0117, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.8913071641404327, |
| "grad_norm": 0.20125305652618408, |
| "learning_rate": 1.2189421824065284e-06, |
| "loss": 0.0136, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.8918253659800492, |
| "grad_norm": 0.6059772372245789, |
| "learning_rate": 1.207424101115109e-06, |
| "loss": 0.0167, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.8923435678196657, |
| "grad_norm": 0.4089493155479431, |
| "learning_rate": 1.1959590030476798e-06, |
| "loss": 0.0134, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.8928617696592823, |
| "grad_norm": 0.31541603803634644, |
| "learning_rate": 1.1845469205285998e-06, |
| "loss": 0.0299, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.8933799714988988, |
| "grad_norm": 0.5524953603744507, |
| "learning_rate": 1.173187885732745e-06, |
| "loss": 0.0274, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.8938981733385154, |
| "grad_norm": 0.09349332749843597, |
| "learning_rate": 1.1618819306854402e-06, |
| "loss": 0.0079, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.8944163751781319, |
| "grad_norm": 0.25360822677612305, |
| "learning_rate": 1.150629087262356e-06, |
| "loss": 0.0072, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.8949345770177484, |
| "grad_norm": 0.36719512939453125, |
| "learning_rate": 1.1394293871894258e-06, |
| "loss": 0.0104, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.895452778857365, |
| "grad_norm": 0.17874731123447418, |
| "learning_rate": 1.1282828620427378e-06, |
| "loss": 0.0091, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.8959709806969814, |
| "grad_norm": 0.29093101620674133, |
| "learning_rate": 1.1171895432484758e-06, |
| "loss": 0.0199, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.896489182536598, |
| "grad_norm": 0.35000985860824585, |
| "learning_rate": 1.1061494620828084e-06, |
| "loss": 0.017, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.8970073843762145, |
| "grad_norm": 0.4346601068973541, |
| "learning_rate": 1.0951626496718126e-06, |
| "loss": 0.0314, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.8975255862158311, |
| "grad_norm": 0.21371477842330933, |
| "learning_rate": 1.0842291369913705e-06, |
| "loss": 0.0105, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.8980437880554476, |
| "grad_norm": 0.11501491814851761, |
| "learning_rate": 1.073348954867104e-06, |
| "loss": 0.0101, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.8985619898950641, |
| "grad_norm": 0.23166029155254364, |
| "learning_rate": 1.0625221339742708e-06, |
| "loss": 0.0103, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.8990801917346807, |
| "grad_norm": 0.3538880944252014, |
| "learning_rate": 1.0517487048376852e-06, |
| "loss": 0.0238, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.8995983935742972, |
| "grad_norm": 0.21843011677265167, |
| "learning_rate": 1.0410286978316276e-06, |
| "loss": 0.0203, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.9001165954139138, |
| "grad_norm": 0.14948545396327972, |
| "learning_rate": 1.0303621431797638e-06, |
| "loss": 0.0044, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.9006347972535302, |
| "grad_norm": 0.2392454892396927, |
| "learning_rate": 1.0197490709550606e-06, |
| "loss": 0.0094, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.9011529990931468, |
| "grad_norm": 0.06702300161123276, |
| "learning_rate": 1.0091895110796935e-06, |
| "loss": 0.0031, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.9016712009327633, |
| "grad_norm": 0.13145937025547028, |
| "learning_rate": 9.986834933249678e-07, |
| "loss": 0.01, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9021894027723798, |
| "grad_norm": 0.237742617726326, |
| "learning_rate": 9.882310473112323e-07, |
| "loss": 0.0243, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.9027076046119964, |
| "grad_norm": 0.0943235382437706, |
| "learning_rate": 9.778322025078025e-07, |
| "loss": 0.0043, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.9032258064516129, |
| "grad_norm": 0.2087947428226471, |
| "learning_rate": 9.674869882328708e-07, |
| "loss": 0.0065, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.9037440082912295, |
| "grad_norm": 0.22540369629859924, |
| "learning_rate": 9.571954336534194e-07, |
| "loss": 0.007, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.904262210130846, |
| "grad_norm": 0.7248985171318054, |
| "learning_rate": 9.469575677851473e-07, |
| "loss": 0.0261, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.9047804119704626, |
| "grad_norm": 0.14132840931415558, |
| "learning_rate": 9.367734194923895e-07, |
| "loss": 0.008, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.905298613810079, |
| "grad_norm": 0.2634159028530121, |
| "learning_rate": 9.266430174880314e-07, |
| "loss": 0.0166, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.9058168156496955, |
| "grad_norm": 0.3727540373802185, |
| "learning_rate": 9.165663903334132e-07, |
| "loss": 0.0325, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.9063350174893121, |
| "grad_norm": 0.406827449798584, |
| "learning_rate": 9.065435664382827e-07, |
| "loss": 0.0264, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.9068532193289286, |
| "grad_norm": 0.2695000469684601, |
| "learning_rate": 8.965745740606868e-07, |
| "loss": 0.0103, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9073714211685452, |
| "grad_norm": 0.3922761082649231, |
| "learning_rate": 8.866594413069074e-07, |
| "loss": 0.0165, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.9078896230081617, |
| "grad_norm": 0.23204711079597473, |
| "learning_rate": 8.767981961313632e-07, |
| "loss": 0.0069, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.9084078248477782, |
| "grad_norm": 0.2488652914762497, |
| "learning_rate": 8.669908663365567e-07, |
| "loss": 0.0122, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.9089260266873948, |
| "grad_norm": 0.13076753914356232, |
| "learning_rate": 8.57237479572981e-07, |
| "loss": 0.0096, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.9094442285270112, |
| "grad_norm": 0.2364853173494339, |
| "learning_rate": 8.475380633390417e-07, |
| "loss": 0.0125, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.9099624303666278, |
| "grad_norm": 0.23385170102119446, |
| "learning_rate": 8.378926449809799e-07, |
| "loss": 0.0139, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.9104806322062443, |
| "grad_norm": 0.18230576813220978, |
| "learning_rate": 8.283012516927979e-07, |
| "loss": 0.0054, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.9109988340458609, |
| "grad_norm": 0.2961260676383972, |
| "learning_rate": 8.187639105161805e-07, |
| "loss": 0.0204, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.9115170358854774, |
| "grad_norm": 0.2320140302181244, |
| "learning_rate": 8.09280648340427e-07, |
| "loss": 0.0103, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.9120352377250939, |
| "grad_norm": 0.3515540361404419, |
| "learning_rate": 7.998514919023547e-07, |
| "loss": 0.0222, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9125534395647105, |
| "grad_norm": 0.460862398147583, |
| "learning_rate": 7.904764677862475e-07, |
| "loss": 0.0279, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.913071641404327, |
| "grad_norm": 0.3304995596408844, |
| "learning_rate": 7.811556024237644e-07, |
| "loss": 0.0221, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.9135898432439435, |
| "grad_norm": 0.49983087182044983, |
| "learning_rate": 7.718889220938796e-07, |
| "loss": 0.0214, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.91410804508356, |
| "grad_norm": 0.20241613686084747, |
| "learning_rate": 7.626764529227859e-07, |
| "loss": 0.0159, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.9146262469231766, |
| "grad_norm": 0.20853309333324432, |
| "learning_rate": 7.535182208838465e-07, |
| "loss": 0.0192, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.9151444487627931, |
| "grad_norm": 0.19687706232070923, |
| "learning_rate": 7.444142517975028e-07, |
| "loss": 0.0087, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.9156626506024096, |
| "grad_norm": 0.08084183931350708, |
| "learning_rate": 7.353645713312164e-07, |
| "loss": 0.0037, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.9161808524420262, |
| "grad_norm": 0.18536792695522308, |
| "learning_rate": 7.263692049993798e-07, |
| "loss": 0.0102, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.9166990542816427, |
| "grad_norm": 0.1274387687444687, |
| "learning_rate": 7.174281781632597e-07, |
| "loss": 0.0043, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.9172172561212593, |
| "grad_norm": 0.42621079087257385, |
| "learning_rate": 7.085415160309184e-07, |
| "loss": 0.0232, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.9177354579608757, |
| "grad_norm": 0.3298850357532501, |
| "learning_rate": 6.997092436571473e-07, |
| "loss": 0.013, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.9182536598004923, |
| "grad_norm": 0.2310313880443573, |
| "learning_rate": 6.909313859433874e-07, |
| "loss": 0.0125, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.9187718616401088, |
| "grad_norm": 0.36734509468078613, |
| "learning_rate": 6.822079676376647e-07, |
| "loss": 0.0155, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.9192900634797253, |
| "grad_norm": 0.15562260150909424, |
| "learning_rate": 6.735390133345299e-07, |
| "loss": 0.0063, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.9198082653193419, |
| "grad_norm": 0.11486683785915375, |
| "learning_rate": 6.649245474749632e-07, |
| "loss": 0.0081, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.9203264671589584, |
| "grad_norm": 0.13218870759010315, |
| "learning_rate": 6.563645943463392e-07, |
| "loss": 0.0078, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.920844668998575, |
| "grad_norm": 0.14428293704986572, |
| "learning_rate": 6.478591780823262e-07, |
| "loss": 0.008, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.9213628708381915, |
| "grad_norm": 0.48469075560569763, |
| "learning_rate": 6.394083226628422e-07, |
| "loss": 0.0217, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.9218810726778081, |
| "grad_norm": 0.2574191689491272, |
| "learning_rate": 6.310120519139773e-07, |
| "loss": 0.0155, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.9223992745174245, |
| "grad_norm": 0.09832003712654114, |
| "learning_rate": 6.226703895079201e-07, |
| "loss": 0.0041, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.922917476357041, |
| "grad_norm": 0.15627124905586243, |
| "learning_rate": 6.143833589629045e-07, |
| "loss": 0.0154, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.9234356781966576, |
| "grad_norm": 0.09480048716068268, |
| "learning_rate": 6.061509836431367e-07, |
| "loss": 0.0027, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.9239538800362741, |
| "grad_norm": 0.22857904434204102, |
| "learning_rate": 5.97973286758733e-07, |
| "loss": 0.01, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.9244720818758907, |
| "grad_norm": 0.1899561583995819, |
| "learning_rate": 5.898502913656368e-07, |
| "loss": 0.0068, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.9249902837155072, |
| "grad_norm": 0.1858498901128769, |
| "learning_rate": 5.817820203655822e-07, |
| "loss": 0.0084, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.9255084855551238, |
| "grad_norm": 0.32740896940231323, |
| "learning_rate": 5.737684965060109e-07, |
| "loss": 0.0177, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.9260266873947403, |
| "grad_norm": 0.12484689801931381, |
| "learning_rate": 5.658097423800124e-07, |
| "loss": 0.007, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.9265448892343567, |
| "grad_norm": 0.30967602133750916, |
| "learning_rate": 5.579057804262533e-07, |
| "loss": 0.0223, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.9270630910739733, |
| "grad_norm": 0.2253575623035431, |
| "learning_rate": 5.500566329289325e-07, |
| "loss": 0.0191, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.9275812929135898, |
| "grad_norm": 0.18831248581409454, |
| "learning_rate": 5.42262322017697e-07, |
| "loss": 0.0066, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.9280994947532064, |
| "grad_norm": 0.08481088280677795, |
| "learning_rate": 5.345228696675975e-07, |
| "loss": 0.0041, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.9286176965928229, |
| "grad_norm": 0.4872352182865143, |
| "learning_rate": 5.268382976990083e-07, |
| "loss": 0.0174, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.9291358984324395, |
| "grad_norm": 0.1179795041680336, |
| "learning_rate": 5.192086277775855e-07, |
| "loss": 0.0073, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.929654100272056, |
| "grad_norm": 0.14406712353229523, |
| "learning_rate": 5.116338814141842e-07, |
| "loss": 0.0062, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.9301723021116725, |
| "grad_norm": 0.33626288175582886, |
| "learning_rate": 5.041140799648237e-07, |
| "loss": 0.0171, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.930690503951289, |
| "grad_norm": 0.32739537954330444, |
| "learning_rate": 4.966492446306026e-07, |
| "loss": 0.0424, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.9312087057909055, |
| "grad_norm": 0.3170034885406494, |
| "learning_rate": 4.892393964576547e-07, |
| "loss": 0.0103, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.9317269076305221, |
| "grad_norm": 0.12314608693122864, |
| "learning_rate": 4.818845563370822e-07, |
| "loss": 0.0071, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.9322451094701386, |
| "grad_norm": 0.3412172794342041, |
| "learning_rate": 4.745847450049046e-07, |
| "loss": 0.0151, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.9327633113097551, |
| "grad_norm": 0.13909827172756195, |
| "learning_rate": 4.6733998304198334e-07, |
| "loss": 0.0133, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.9332815131493717, |
| "grad_norm": 0.19364029169082642, |
| "learning_rate": 4.6015029087398855e-07, |
| "loss": 0.013, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.9337997149889882, |
| "grad_norm": 0.42233768105506897, |
| "learning_rate": 4.5301568877132106e-07, |
| "loss": 0.0328, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.9343179168286048, |
| "grad_norm": 0.3611566126346588, |
| "learning_rate": 4.4593619684906387e-07, |
| "loss": 0.0189, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.9348361186682212, |
| "grad_norm": 0.18141718208789825, |
| "learning_rate": 4.389118350669241e-07, |
| "loss": 0.0082, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.9353543205078378, |
| "grad_norm": 0.1644567996263504, |
| "learning_rate": 4.3194262322917347e-07, |
| "loss": 0.0058, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.9358725223474543, |
| "grad_norm": 0.2701861262321472, |
| "learning_rate": 4.250285809846011e-07, |
| "loss": 0.0148, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.9363907241870708, |
| "grad_norm": 0.16967220604419708, |
| "learning_rate": 4.181697278264496e-07, |
| "loss": 0.0069, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.9369089260266874, |
| "grad_norm": 0.3081384003162384, |
| "learning_rate": 4.113660830923638e-07, |
| "loss": 0.014, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.9374271278663039, |
| "grad_norm": 0.23030611872673035, |
| "learning_rate": 4.046176659643353e-07, |
| "loss": 0.0144, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.9379453297059205, |
| "grad_norm": 0.3798378109931946, |
| "learning_rate": 3.9792449546864677e-07, |
| "loss": 0.0106, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.938463531545537, |
| "grad_norm": 0.15186521410942078, |
| "learning_rate": 3.912865904758256e-07, |
| "loss": 0.0064, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.9389817333851536, |
| "grad_norm": 0.2820347845554352, |
| "learning_rate": 3.8470396970057946e-07, |
| "loss": 0.0201, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.93949993522477, |
| "grad_norm": 0.1129571720957756, |
| "learning_rate": 3.781766517017493e-07, |
| "loss": 0.0036, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.9400181370643865, |
| "grad_norm": 0.16945257782936096, |
| "learning_rate": 3.717046548822634e-07, |
| "loss": 0.0122, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.9405363389040031, |
| "grad_norm": 0.2008204162120819, |
| "learning_rate": 3.652879974890766e-07, |
| "loss": 0.0114, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.9410545407436196, |
| "grad_norm": 0.11003681272268295, |
| "learning_rate": 3.589266976131134e-07, |
| "loss": 0.0025, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.9415727425832362, |
| "grad_norm": 0.19532270729541779, |
| "learning_rate": 3.5262077318923615e-07, |
| "loss": 0.0051, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.9420909444228527, |
| "grad_norm": 0.12835298478603363, |
| "learning_rate": 3.4637024199617896e-07, |
| "loss": 0.0047, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.9426091462624693, |
| "grad_norm": 0.19439566135406494, |
| "learning_rate": 3.401751216565008e-07, |
| "loss": 0.0064, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.9431273481020858, |
| "grad_norm": 0.2604013979434967, |
| "learning_rate": 3.3403542963653666e-07, |
| "loss": 0.0284, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.9436455499417022, |
| "grad_norm": 0.3678673207759857, |
| "learning_rate": 3.279511832463511e-07, |
| "loss": 0.0178, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.9441637517813188, |
| "grad_norm": 0.22065676748752594, |
| "learning_rate": 3.2192239963968477e-07, |
| "loss": 0.0092, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.9446819536209353, |
| "grad_norm": 0.16649556159973145, |
| "learning_rate": 3.159490958139122e-07, |
| "loss": 0.006, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.9452001554605519, |
| "grad_norm": 0.2621767222881317, |
| "learning_rate": 3.100312886099821e-07, |
| "loss": 0.0192, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.9457183573001684, |
| "grad_norm": 0.25440752506256104, |
| "learning_rate": 3.0416899471238606e-07, |
| "loss": 0.0088, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.946236559139785, |
| "grad_norm": 0.18357832729816437, |
| "learning_rate": 2.983622306490963e-07, |
| "loss": 0.015, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.9467547609794015, |
| "grad_norm": 0.2571721374988556, |
| "learning_rate": 2.926110127915327e-07, |
| "loss": 0.0101, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.947272962819018, |
| "grad_norm": 0.2212700992822647, |
| "learning_rate": 2.869153573545025e-07, |
| "loss": 0.0094, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.9477911646586346, |
| "grad_norm": 0.3824091851711273, |
| "learning_rate": 2.8127528039616935e-07, |
| "loss": 0.0184, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.948309366498251, |
| "grad_norm": 0.2520553171634674, |
| "learning_rate": 2.7569079781799124e-07, |
| "loss": 0.0191, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.9488275683378676, |
| "grad_norm": 0.07536312937736511, |
| "learning_rate": 2.701619253646959e-07, |
| "loss": 0.0022, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.9493457701774841, |
| "grad_norm": 0.17381742596626282, |
| "learning_rate": 2.6468867862421865e-07, |
| "loss": 0.0109, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.9498639720171007, |
| "grad_norm": 0.37904366850852966, |
| "learning_rate": 2.5927107302766483e-07, |
| "loss": 0.0234, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.9503821738567172, |
| "grad_norm": 0.23849669098854065, |
| "learning_rate": 2.539091238492719e-07, |
| "loss": 0.0166, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.9509003756963337, |
| "grad_norm": 0.256930410861969, |
| "learning_rate": 2.486028462063561e-07, |
| "loss": 0.0104, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.9514185775359503, |
| "grad_norm": 0.09233854711055756, |
| "learning_rate": 2.433522550592793e-07, |
| "loss": 0.0029, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.9519367793755668, |
| "grad_norm": 0.18194934725761414, |
| "learning_rate": 2.3815736521139775e-07, |
| "loss": 0.0089, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.9524549812151833, |
| "grad_norm": 0.28712162375450134, |
| "learning_rate": 2.3301819130903124e-07, |
| "loss": 0.0099, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.9529731830547998, |
| "grad_norm": 0.21452538669109344, |
| "learning_rate": 2.2793474784140957e-07, |
| "loss": 0.007, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.9534913848944164, |
| "grad_norm": 0.23924578726291656, |
| "learning_rate": 2.2290704914064155e-07, |
| "loss": 0.0217, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.9540095867340329, |
| "grad_norm": 0.3106108009815216, |
| "learning_rate": 2.179351093816706e-07, |
| "loss": 0.0186, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.9545277885736494, |
| "grad_norm": 0.5527723431587219, |
| "learning_rate": 2.1301894258223045e-07, |
| "loss": 0.0249, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.955045990413266, |
| "grad_norm": 0.1543256938457489, |
| "learning_rate": 2.081585626028204e-07, |
| "loss": 0.0097, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.9555641922528825, |
| "grad_norm": 0.27393466234207153, |
| "learning_rate": 2.0335398314664578e-07, |
| "loss": 0.0126, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.9560823940924991, |
| "grad_norm": 0.24850605428218842, |
| "learning_rate": 1.986052177595954e-07, |
| "loss": 0.0118, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.9566005959321155, |
| "grad_norm": 0.12293598055839539, |
| "learning_rate": 1.939122798301929e-07, |
| "loss": 0.0075, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.9571187977717321, |
| "grad_norm": 0.2503874599933624, |
| "learning_rate": 1.8927518258957e-07, |
| "loss": 0.0139, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.9576369996113486, |
| "grad_norm": 0.24323077499866486, |
| "learning_rate": 1.8469393911141553e-07, |
| "loss": 0.013, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.9581552014509651, |
| "grad_norm": 0.32238924503326416, |
| "learning_rate": 1.8016856231195533e-07, |
| "loss": 0.0116, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.9586734032905817, |
| "grad_norm": 0.3171192407608032, |
| "learning_rate": 1.7569906494989463e-07, |
| "loss": 0.011, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.9591916051301982, |
| "grad_norm": 0.2532232105731964, |
| "learning_rate": 1.7128545962640242e-07, |
| "loss": 0.0083, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.9597098069698148, |
| "grad_norm": 0.12934939563274384, |
| "learning_rate": 1.6692775878506705e-07, |
| "loss": 0.0061, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.9602280088094313, |
| "grad_norm": 0.5671677589416504, |
| "learning_rate": 1.6262597471185636e-07, |
| "loss": 0.0225, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.9607462106490477, |
| "grad_norm": 0.19938619434833527, |
| "learning_rate": 1.5838011953509313e-07, |
| "loss": 0.02, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.9612644124886643, |
| "grad_norm": 0.15334980189800262, |
| "learning_rate": 1.5419020522541295e-07, |
| "loss": 0.0237, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.9617826143282808, |
| "grad_norm": 0.15145401656627655, |
| "learning_rate": 1.5005624359573533e-07, |
| "loss": 0.0069, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.9623008161678974, |
| "grad_norm": 0.17450299859046936, |
| "learning_rate": 1.4597824630122604e-07, |
| "loss": 0.0127, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.9628190180075139, |
| "grad_norm": 0.2813020348548889, |
| "learning_rate": 1.4195622483926809e-07, |
| "loss": 0.0218, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.9633372198471305, |
| "grad_norm": 0.21633239090442657, |
| "learning_rate": 1.3799019054943073e-07, |
| "loss": 0.0058, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 0.06448470056056976, |
| "learning_rate": 1.340801546134296e-07, |
| "loss": 0.003, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.9643736235263635, |
| "grad_norm": 0.1556619107723236, |
| "learning_rate": 1.3022612805510204e-07, |
| "loss": 0.0082, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.9648918253659801, |
| "grad_norm": 0.21246463060379028, |
| "learning_rate": 1.2642812174037621e-07, |
| "loss": 0.0071, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.9654100272055965, |
| "grad_norm": 0.24705225229263306, |
| "learning_rate": 1.2268614637723554e-07, |
| "loss": 0.0165, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.9659282290452131, |
| "grad_norm": 0.2733205258846283, |
| "learning_rate": 1.1900021251569415e-07, |
| "loss": 0.0194, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.9664464308848296, |
| "grad_norm": 0.07219294458627701, |
| "learning_rate": 1.153703305477638e-07, |
| "loss": 0.0054, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.9669646327244462, |
| "grad_norm": 0.08205874264240265, |
| "learning_rate": 1.117965107074248e-07, |
| "loss": 0.0068, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.9674828345640627, |
| "grad_norm": 0.2987622320652008, |
| "learning_rate": 1.0827876307059503e-07, |
| "loss": 0.0209, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.9680010364036792, |
| "grad_norm": 0.1716400384902954, |
| "learning_rate": 1.0481709755510994e-07, |
| "loss": 0.0062, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.9685192382432958, |
| "grad_norm": 0.2547082304954529, |
| "learning_rate": 1.0141152392068254e-07, |
| "loss": 0.0094, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.9690374400829123, |
| "grad_norm": 0.17818105220794678, |
| "learning_rate": 9.80620517688835e-08, |
| "loss": 0.0046, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.9695556419225289, |
| "grad_norm": 0.1513381004333496, |
| "learning_rate": 9.476869054311888e-08, |
| "loss": 0.0098, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.9700738437621453, |
| "grad_norm": 0.2405107617378235, |
| "learning_rate": 9.153144952859017e-08, |
| "loss": 0.0121, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.9705920456017619, |
| "grad_norm": 0.05375241860747337, |
| "learning_rate": 8.835033785227432e-08, |
| "loss": 0.0013, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.9711102474413784, |
| "grad_norm": 0.15731529891490936, |
| "learning_rate": 8.522536448290597e-08, |
| "loss": 0.0076, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.9716284492809949, |
| "grad_norm": 0.2520892322063446, |
| "learning_rate": 8.215653823093973e-08, |
| "loss": 0.0193, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.9721466511206115, |
| "grad_norm": 0.2123764306306839, |
| "learning_rate": 7.914386774853234e-08, |
| "loss": 0.0224, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.972664852960228, |
| "grad_norm": 0.5205470323562622, |
| "learning_rate": 7.618736152951611e-08, |
| "loss": 0.0177, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.9731830547998446, |
| "grad_norm": 0.45584774017333984, |
| "learning_rate": 7.32870279093767e-08, |
| "loss": 0.0305, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.973701256639461, |
| "grad_norm": 0.3030462861061096, |
| "learning_rate": 7.044287506522862e-08, |
| "loss": 0.0084, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.9742194584790776, |
| "grad_norm": 0.49225398898124695, |
| "learning_rate": 6.76549110157887e-08, |
| "loss": 0.012, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.9747376603186941, |
| "grad_norm": 0.40178224444389343, |
| "learning_rate": 6.49231436213582e-08, |
| "loss": 0.0227, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.9752558621583106, |
| "grad_norm": 0.30079546570777893, |
| "learning_rate": 6.224758058380298e-08, |
| "loss": 0.0146, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.9757740639979272, |
| "grad_norm": 0.15537235140800476, |
| "learning_rate": 5.962822944652224e-08, |
| "loss": 0.0087, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.9762922658375437, |
| "grad_norm": 0.27355560660362244, |
| "learning_rate": 5.706509759443979e-08, |
| "loss": 0.0107, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.9768104676771603, |
| "grad_norm": 0.1678086519241333, |
| "learning_rate": 5.455819225396841e-08, |
| "loss": 0.01, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.9773286695167768, |
| "grad_norm": 0.3235739767551422, |
| "learning_rate": 5.210752049300327e-08, |
| "loss": 0.0218, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.9778468713563934, |
| "grad_norm": 0.24950754642486572, |
| "learning_rate": 4.971308922089746e-08, |
| "loss": 0.0174, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.9783650731960098, |
| "grad_norm": 0.35669779777526855, |
| "learning_rate": 4.737490518843313e-08, |
| "loss": 0.0229, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.9788832750356263, |
| "grad_norm": 0.2362123280763626, |
| "learning_rate": 4.509297498781928e-08, |
| "loss": 0.014, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.9794014768752429, |
| "grad_norm": 0.11838959157466888, |
| "learning_rate": 4.28673050526629e-08, |
| "loss": 0.0057, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.9799196787148594, |
| "grad_norm": 0.21067118644714355, |
| "learning_rate": 4.069790165794896e-08, |
| "loss": 0.0083, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.980437880554476, |
| "grad_norm": 0.11189526319503784, |
| "learning_rate": 3.85847709200271e-08, |
| "loss": 0.0029, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.9809560823940925, |
| "grad_norm": 0.12419242411851883, |
| "learning_rate": 3.652791879659612e-08, |
| "loss": 0.0053, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.9814742842337091, |
| "grad_norm": 0.24143096804618835, |
| "learning_rate": 3.452735108668393e-08, |
| "loss": 0.011, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.9819924860733256, |
| "grad_norm": 0.24221214652061462, |
| "learning_rate": 3.258307343062983e-08, |
| "loss": 0.0096, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.982510687912942, |
| "grad_norm": 0.2091411054134369, |
| "learning_rate": 3.069509131007342e-08, |
| "loss": 0.0147, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.9830288897525586, |
| "grad_norm": 0.2547149062156677, |
| "learning_rate": 2.886341004793014e-08, |
| "loss": 0.0145, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.9835470915921751, |
| "grad_norm": 0.2162514328956604, |
| "learning_rate": 2.7088034808393503e-08, |
| "loss": 0.0198, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.9840652934317917, |
| "grad_norm": 0.4096549451351166, |
| "learning_rate": 2.536897059689958e-08, |
| "loss": 0.0157, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.9845834952714082, |
| "grad_norm": 0.14970317482948303, |
| "learning_rate": 2.3706222260126977e-08, |
| "loss": 0.0153, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.9851016971110248, |
| "grad_norm": 0.331853449344635, |
| "learning_rate": 2.2099794485976878e-08, |
| "loss": 0.0106, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.9856198989506413, |
| "grad_norm": 0.2141299843788147, |
| "learning_rate": 2.0549691803566362e-08, |
| "loss": 0.0243, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.9861381007902578, |
| "grad_norm": 0.1847657710313797, |
| "learning_rate": 1.905591858320621e-08, |
| "loss": 0.0043, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.9866563026298744, |
| "grad_norm": 0.2540552616119385, |
| "learning_rate": 1.7618479036396463e-08, |
| "loss": 0.0228, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.9871745044694908, |
| "grad_norm": 0.09786626696586609, |
| "learning_rate": 1.623737721581087e-08, |
| "loss": 0.0032, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.9876927063091074, |
| "grad_norm": 0.3893474042415619, |
| "learning_rate": 1.491261701528579e-08, |
| "loss": 0.0287, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.9882109081487239, |
| "grad_norm": 0.1743607223033905, |
| "learning_rate": 1.3644202169813547e-08, |
| "loss": 0.013, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.9887291099883404, |
| "grad_norm": 0.14568978548049927, |
| "learning_rate": 1.24321362555202e-08, |
| "loss": 0.0035, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.989247311827957, |
| "grad_norm": 0.24614860117435455, |
| "learning_rate": 1.1276422689672217e-08, |
| "loss": 0.0082, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.9897655136675735, |
| "grad_norm": 0.07558930665254593, |
| "learning_rate": 1.0177064730649832e-08, |
| "loss": 0.0029, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.9902837155071901, |
| "grad_norm": 0.30173760652542114, |
| "learning_rate": 9.134065477953701e-09, |
| "loss": 0.0146, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.9908019173468066, |
| "grad_norm": 0.13991938531398773, |
| "learning_rate": 8.147427872180481e-09, |
| "loss": 0.0077, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.9913201191864232, |
| "grad_norm": 0.15137632191181183, |
| "learning_rate": 7.217154695029482e-09, |
| "loss": 0.0076, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.9918383210260396, |
| "grad_norm": 0.3737245798110962, |
| "learning_rate": 6.343248569284921e-09, |
| "loss": 0.0188, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.9923565228656561, |
| "grad_norm": 0.1780516505241394, |
| "learning_rate": 5.525711958811464e-09, |
| "loss": 0.0216, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.9928747247052727, |
| "grad_norm": 0.1532387137413025, |
| "learning_rate": 4.7645471685453525e-09, |
| "loss": 0.0045, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.9933929265448892, |
| "grad_norm": 0.2650953233242035, |
| "learning_rate": 4.059756344492183e-09, |
| "loss": 0.0093, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.9939111283845058, |
| "grad_norm": 0.1893928050994873, |
| "learning_rate": 3.411341473720242e-09, |
| "loss": 0.0083, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.9944293302241223, |
| "grad_norm": 0.24139444530010223, |
| "learning_rate": 2.8193043843471878e-09, |
| "loss": 0.0169, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.9949475320637389, |
| "grad_norm": 0.36209264397621155, |
| "learning_rate": 2.2836467455489286e-09, |
| "loss": 0.0158, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.9954657339033554, |
| "grad_norm": 0.25236305594444275, |
| "learning_rate": 1.8043700675418607e-09, |
| "loss": 0.0163, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.9959839357429718, |
| "grad_norm": 0.1075766533613205, |
| "learning_rate": 1.381475701580648e-09, |
| "loss": 0.0045, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.9965021375825884, |
| "grad_norm": 0.062261324375867844, |
| "learning_rate": 1.0149648399648827e-09, |
| "loss": 0.0019, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.9970203394222049, |
| "grad_norm": 0.11437200754880905, |
| "learning_rate": 7.048385160213223e-10, |
| "loss": 0.0079, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.9975385412618215, |
| "grad_norm": 0.14774172008037567, |
| "learning_rate": 4.5109760411055084e-10, |
| "loss": 0.0091, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.998056743101438, |
| "grad_norm": 0.1296924203634262, |
| "learning_rate": 2.5374281962697866e-10, |
| "loss": 0.005, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.9985749449410546, |
| "grad_norm": 0.13076238334178925, |
| "learning_rate": 1.1277471898107905e-10, |
| "loss": 0.0075, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.9990931467806711, |
| "grad_norm": 0.2739886939525604, |
| "learning_rate": 2.8193699617151682e-11, |
| "loss": 0.012, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.9996113486202876, |
| "grad_norm": 0.28379353880882263, |
| "learning_rate": 0.0, |
| "loss": 0.0162, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.9996113486202876, |
| "step": 1929, |
| "total_flos": 2.4104116310493763e+21, |
| "train_loss": 0.02103021735445673, |
| "train_runtime": 5871.0379, |
| "train_samples_per_second": 5.259, |
| "train_steps_per_second": 0.329 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1929, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.4104116310493763e+21, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|